Actual source code: kaij.c
1: /*
2: Defines the basic matrix operations for the KAIJ matrix storage format.
3: This format is used to evaluate matrices of the form:
5: [I \otimes S + A \otimes T]
7: where
8: S is a dense (p \times q) matrix
9: T is a dense (p \times q) matrix
10: A is an AIJ (n \times n) matrix
11: I is the identity matrix
13: The resulting matrix is (np \times nq)
15: We provide:
16: MatMult()
17: MatMultAdd()
18: MatInvertBlockDiagonal()
19: and
20: MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
22: This single directory handles both the sequential and parallel codes
23: */
25: #include <../src/mat/impls/kaij/kaij.h>
26: #include <../src/mat/utils/freespace.h>
27: #include <petsc/private/vecimpl.h>
29: /*@C
30: MatKAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix
32: Not Collective, but if the `MATKAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel
34: Input Parameter:
35: . A - the `MATKAIJ` matrix
37: Output Parameter:
38: . B - the `MATAIJ` matrix
40: Level: advanced
42: Note:
43: The reference count on the `MATAIJ` matrix is not increased so you should not destroy it.
45: .seealso: [](ch_matrices), `Mat`, `MatCreateKAIJ()`, `MATKAIJ`, `MATAIJ`
46: @*/
47: PetscErrorCode MatKAIJGetAIJ(Mat A, Mat *B)
48: {
49: PetscBool ismpikaij, isseqkaij;
51: PetscFunctionBegin;
52: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij));
53: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQKAIJ, &isseqkaij));
54: if (ismpikaij) {
55: Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
57: *B = b->A;
58: } else if (isseqkaij) {
59: Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
61: *B = b->AIJ;
62: } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix passed in is not of type KAIJ");
63: PetscFunctionReturn(PETSC_SUCCESS);
64: }
66: /*@C
67: MatKAIJGetS - Get the `S` matrix describing the shift action of the `MATKAIJ` matrix
69: Not Collective; the entire `S` is stored and returned independently on all processes.
71: Input Parameter:
72: . A - the `MATKAIJ` matrix
74: Output Parameters:
75: + m - the number of rows in `S`
76: . n - the number of columns in `S`
77: - S - the S matrix, in form of a scalar array in column-major format
79: Level: advanced
81: Note:
82: All output parameters are optional (pass `NULL` if not desired)
84: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
85: @*/
86: PetscErrorCode MatKAIJGetS(Mat A, PetscInt *m, PetscInt *n, PetscScalar **S)
87: {
88: Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
89: PetscFunctionBegin;
90: if (m) *m = b->p;
91: if (n) *n = b->q;
92: if (S) *S = b->S;
93: PetscFunctionReturn(PETSC_SUCCESS);
94: }
96: /*@C
97: MatKAIJGetSRead - Get a read-only pointer to the `S` matrix describing the shift action of the `MATKAIJ` matrix
99: Not Collective; the entire `S` is stored and returned independently on all processes.
101: Input Parameter:
102: . A - the `MATKAIJ` matrix
104: Output Parameters:
105: + m - the number of rows in `S`
106: . n - the number of columns in `S`
107: - S - the S matrix, in form of a scalar array in column-major format
109: Level: advanced
111: Note:
112: All output parameters are optional (pass `NULL` if not desired)
114: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
115: @*/
116: PetscErrorCode MatKAIJGetSRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar **S)
117: {
118: Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
119: PetscFunctionBegin;
120: if (m) *m = b->p;
121: if (n) *n = b->q;
122: if (S) *S = b->S;
123: PetscFunctionReturn(PETSC_SUCCESS);
124: }
126: /*@C
127: MatKAIJRestoreS - Restore array obtained with `MatKAIJGetS()`
129: Not Collective
131: Input Parameters:
132: + A - the `MATKAIJ` matrix
133: - S - location of pointer to array obtained with `MatKAIJGetS()`
135: Level: advanced
137: Note:
138: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
139: If `NULL` is passed, it will not attempt to zero the array pointer.
141: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()`
142: @*/
143: PetscErrorCode MatKAIJRestoreS(Mat A, PetscScalar **S)
144: {
145: PetscFunctionBegin;
146: if (S) *S = NULL;
147: PetscCall(PetscObjectStateIncrease((PetscObject)A));
148: PetscFunctionReturn(PETSC_SUCCESS);
149: }
151: /*@C
152: MatKAIJRestoreSRead - Restore array obtained with `MatKAIJGetSRead()`
154: Not Collective
156: Input Parameters:
157: + A - the `MATKAIJ` matrix
158: - S - location of pointer to array obtained with `MatKAIJGetS()`
160: Level: advanced
162: Note:
163: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
164: If `NULL` is passed, it will not attempt to zero the array pointer.
166: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()`
167: @*/
168: PetscErrorCode MatKAIJRestoreSRead(Mat A, const PetscScalar **S)
169: {
170: PetscFunctionBegin;
171: if (S) *S = NULL;
172: PetscFunctionReturn(PETSC_SUCCESS);
173: }
175: /*@C
176: MatKAIJGetT - Get the transformation matrix `T` associated with the `MATKAIJ` matrix
178: Not Collective; the entire `T` is stored and returned independently on all processes
180: Input Parameter:
181: . A - the `MATKAIJ` matrix
183: Output Parameters:
184: + m - the number of rows in `T`
185: . n - the number of columns in `T`
186: - T - the T matrix, in form of a scalar array in column-major format
188: Level: advanced
190: Note:
191: All output parameters are optional (pass `NULL` if not desired)
193: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
194: @*/
195: PetscErrorCode MatKAIJGetT(Mat A, PetscInt *m, PetscInt *n, PetscScalar **T)
196: {
197: Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
198: PetscFunctionBegin;
199: if (m) *m = b->p;
200: if (n) *n = b->q;
201: if (T) *T = b->T;
202: PetscFunctionReturn(PETSC_SUCCESS);
203: }
205: /*@C
206: MatKAIJGetTRead - Get a read-only pointer to the transformation matrix `T` associated with the `MATKAIJ` matrix
208: Not Collective; the entire `T` is stored and returned independently on all processes
210: Input Parameter:
211: . A - the `MATKAIJ` matrix
213: Output Parameters:
214: + m - the number of rows in `T`
215: . n - the number of columns in `T`
216: - T - the T matrix, in form of a scalar array in column-major format
218: Level: advanced
220: Note:
221: All output parameters are optional (pass `NULL` if not desired)
223: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
224: @*/
225: PetscErrorCode MatKAIJGetTRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar **T)
226: {
227: Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
228: PetscFunctionBegin;
229: if (m) *m = b->p;
230: if (n) *n = b->q;
231: if (T) *T = b->T;
232: PetscFunctionReturn(PETSC_SUCCESS);
233: }
235: /*@C
236: MatKAIJRestoreT - Restore array obtained with `MatKAIJGetT()`
238: Not Collective
240: Input Parameters:
241: + A - the `MATKAIJ` matrix
242: - T - location of pointer to array obtained with `MatKAIJGetS()`
244: Level: advanced
246: Note:
247: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
248: If `NULL` is passed, it will not attempt to zero the array pointer.
250: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()`
251: @*/
252: PetscErrorCode MatKAIJRestoreT(Mat A, PetscScalar **T)
253: {
254: PetscFunctionBegin;
255: if (T) *T = NULL;
256: PetscCall(PetscObjectStateIncrease((PetscObject)A));
257: PetscFunctionReturn(PETSC_SUCCESS);
258: }
260: /*@C
261: MatKAIJRestoreTRead - Restore array obtained with `MatKAIJGetTRead()`
263: Not Collective
265: Input Parameters:
266: + A - the `MATKAIJ` matrix
267: - T - location of pointer to array obtained with `MatKAIJGetS()`
269: Level: advanced
271: Note:
272: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
273: If `NULL` is passed, it will not attempt to zero the array pointer.
275: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()`
276: @*/
277: PetscErrorCode MatKAIJRestoreTRead(Mat A, const PetscScalar **T)
278: {
279: PetscFunctionBegin;
280: if (T) *T = NULL;
281: PetscFunctionReturn(PETSC_SUCCESS);
282: }
284: /*@
285: MatKAIJSetAIJ - Set the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix
287: Logically Collective; if the `MATAIJ` matrix is parallel, the `MATKAIJ` matrix is also parallel
289: Input Parameters:
290: + A - the `MATKAIJ` matrix
291: - B - the `MATAIJ` matrix
293: Level: advanced
295: Notes:
296: This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed.
298: Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix.
300: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`
301: @*/
302: PetscErrorCode MatKAIJSetAIJ(Mat A, Mat B)
303: {
304: PetscMPIInt size;
305: PetscBool flg;
307: PetscFunctionBegin;
308: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
309: if (size == 1) {
310: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQAIJ, &flg));
311: PetscCheck(flg, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MatKAIJSetAIJ() with MATSEQKAIJ does not support %s as the AIJ mat", ((PetscObject)B)->type_name);
312: Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
313: a->AIJ = B;
314: } else {
315: Mat_MPIKAIJ *a = (Mat_MPIKAIJ *)A->data;
316: a->A = B;
317: }
318: PetscCall(PetscObjectReference((PetscObject)B));
319: PetscFunctionReturn(PETSC_SUCCESS);
320: }
322: /*@C
323: MatKAIJSetS - Set the `S` matrix describing the shift action of the `MATKAIJ` matrix
325: Logically Collective; the entire `S` is stored independently on all processes.
327: Input Parameters:
328: + A - the `MATKAIJ` matrix
329: . p - the number of rows in `S`
330: . q - the number of columns in `S`
331: - S - the S matrix, in form of a scalar array in column-major format
333: Level: advanced
335: Notes:
336: The dimensions `p` and `q` must match those of the transformation matrix `T` associated with the `MATKAIJ` matrix.
338: The `S` matrix is copied, so the user can destroy this array.
340: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJSetT()`, `MatKAIJSetAIJ()`
341: @*/
342: PetscErrorCode MatKAIJSetS(Mat A, PetscInt p, PetscInt q, const PetscScalar S[])
343: {
344: Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
346: PetscFunctionBegin;
347: PetscCall(PetscFree(a->S));
348: if (S) {
349: PetscCall(PetscMalloc1(p * q, &a->S));
350: PetscCall(PetscMemcpy(a->S, S, p * q * sizeof(PetscScalar)));
351: } else a->S = NULL;
353: a->p = p;
354: a->q = q;
355: PetscFunctionReturn(PETSC_SUCCESS);
356: }
358: /*@C
359: MatKAIJGetScaledIdentity - Check if both `S` and `T` are scaled identities.
361: Logically Collective.
363: Input Parameter:
364: . A - the `MATKAIJ` matrix
366: Output Parameter:
367: . identity - the Boolean value
369: Level: advanced
371: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetT()`
372: @*/
373: PetscErrorCode MatKAIJGetScaledIdentity(Mat A, PetscBool *identity)
374: {
375: Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
376: PetscInt i, j;
378: PetscFunctionBegin;
379: if (a->p != a->q) {
380: *identity = PETSC_FALSE;
381: PetscFunctionReturn(PETSC_SUCCESS);
382: } else *identity = PETSC_TRUE;
383: if (!a->isTI || a->S) {
384: for (i = 0; i < a->p && *identity; i++) {
385: for (j = 0; j < a->p && *identity; j++) {
386: if (i != j) {
387: if (a->S && PetscAbsScalar(a->S[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
388: if (a->T && PetscAbsScalar(a->T[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
389: } else {
390: if (a->S && PetscAbsScalar(a->S[i * (a->p + 1)] - a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
391: if (a->T && PetscAbsScalar(a->T[i * (a->p + 1)] - a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
392: }
393: }
394: }
395: }
396: PetscFunctionReturn(PETSC_SUCCESS);
397: }
399: /*@C
400: MatKAIJSetT - Set the transformation matrix `T` associated with the `MATKAIJ` matrix
402: Logically Collective; the entire `T` is stored independently on all processes.
404: Input Parameters:
405: + A - the `MATKAIJ` matrix
406: . p - the number of rows in `S`
407: . q - the number of columns in `S`
408: - T - the `T` matrix, in form of a scalar array in column-major format
410: Level: advanced
412: Notes:
413: The dimensions `p` and `q` must match those of the shift matrix `S` associated with the `MATKAIJ` matrix.
415: The `T` matrix is copied, so the user can destroy this array.
417: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJSetS()`, `MatKAIJSetAIJ()`
418: @*/
419: PetscErrorCode MatKAIJSetT(Mat A, PetscInt p, PetscInt q, const PetscScalar T[])
420: {
421: PetscInt i, j;
422: Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
423: PetscBool isTI = PETSC_FALSE;
425: PetscFunctionBegin;
426: /* check if T is an identity matrix */
427: if (T && (p == q)) {
428: isTI = PETSC_TRUE;
429: for (i = 0; i < p; i++) {
430: for (j = 0; j < q; j++) {
431: if (i == j) {
432: /* diagonal term must be 1 */
433: if (T[i + j * p] != 1.0) isTI = PETSC_FALSE;
434: } else {
435: /* off-diagonal term must be 0 */
436: if (T[i + j * p] != 0.0) isTI = PETSC_FALSE;
437: }
438: }
439: }
440: }
441: a->isTI = isTI;
443: PetscCall(PetscFree(a->T));
444: if (T && (!isTI)) {
445: PetscCall(PetscMalloc1(p * q, &a->T));
446: PetscCall(PetscMemcpy(a->T, T, p * q * sizeof(PetscScalar)));
447: } else a->T = NULL;
449: a->p = p;
450: a->q = q;
451: PetscFunctionReturn(PETSC_SUCCESS);
452: }
454: static PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
455: {
456: Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
458: PetscFunctionBegin;
459: PetscCall(MatDestroy(&b->AIJ));
460: PetscCall(PetscFree(b->S));
461: PetscCall(PetscFree(b->T));
462: PetscCall(PetscFree(b->ibdiag));
463: PetscCall(PetscFree5(b->sor.w, b->sor.y, b->sor.work, b->sor.t, b->sor.arr));
464: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", NULL));
465: PetscCall(PetscFree(A->data));
466: PetscFunctionReturn(PETSC_SUCCESS);
467: }
469: static PetscErrorCode MatKAIJ_build_AIJ_OAIJ(Mat A)
470: {
471: Mat_MPIKAIJ *a;
472: Mat_MPIAIJ *mpiaij;
473: PetscScalar *T;
474: PetscInt i, j;
475: PetscObjectState state;
477: PetscFunctionBegin;
478: a = (Mat_MPIKAIJ *)A->data;
479: mpiaij = (Mat_MPIAIJ *)a->A->data;
481: PetscCall(PetscObjectStateGet((PetscObject)a->A, &state));
482: if (state == a->state) {
483: /* The existing AIJ and KAIJ members are up-to-date, so simply exit. */
484: PetscFunctionReturn(PETSC_SUCCESS);
485: } else {
486: PetscCall(MatDestroy(&a->AIJ));
487: PetscCall(MatDestroy(&a->OAIJ));
488: if (a->isTI) {
489: /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
490: * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
491: * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
492: * to pass in. */
493: PetscCall(PetscMalloc1(a->p * a->q, &T));
494: for (i = 0; i < a->p; i++) {
495: for (j = 0; j < a->q; j++) {
496: if (i == j) T[i + j * a->p] = 1.0;
497: else T[i + j * a->p] = 0.0;
498: }
499: }
500: } else T = a->T;
501: PetscCall(MatCreateKAIJ(mpiaij->A, a->p, a->q, a->S, T, &a->AIJ));
502: PetscCall(MatCreateKAIJ(mpiaij->B, a->p, a->q, NULL, T, &a->OAIJ));
503: if (a->isTI) PetscCall(PetscFree(T));
504: a->state = state;
505: }
507: PetscFunctionReturn(PETSC_SUCCESS);
508: }
510: static PetscErrorCode MatSetUp_KAIJ(Mat A)
511: {
512: PetscInt n;
513: PetscMPIInt size;
514: Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ *)A->data;
516: PetscFunctionBegin;
517: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
518: if (size == 1) {
519: PetscCall(MatSetSizes(A, seqkaij->p * seqkaij->AIJ->rmap->n, seqkaij->q * seqkaij->AIJ->cmap->n, seqkaij->p * seqkaij->AIJ->rmap->N, seqkaij->q * seqkaij->AIJ->cmap->N));
520: PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p));
521: PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q));
522: PetscCall(PetscLayoutSetUp(A->rmap));
523: PetscCall(PetscLayoutSetUp(A->cmap));
524: } else {
525: Mat_MPIKAIJ *a;
526: Mat_MPIAIJ *mpiaij;
527: IS from, to;
528: Vec gvec;
530: a = (Mat_MPIKAIJ *)A->data;
531: mpiaij = (Mat_MPIAIJ *)a->A->data;
532: PetscCall(MatSetSizes(A, a->p * a->A->rmap->n, a->q * a->A->cmap->n, a->p * a->A->rmap->N, a->q * a->A->cmap->N));
533: PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p));
534: PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q));
535: PetscCall(PetscLayoutSetUp(A->rmap));
536: PetscCall(PetscLayoutSetUp(A->cmap));
538: PetscCall(MatKAIJ_build_AIJ_OAIJ(A));
540: PetscCall(VecGetSize(mpiaij->lvec, &n));
541: PetscCall(VecCreate(PETSC_COMM_SELF, &a->w));
542: PetscCall(VecSetSizes(a->w, n * a->q, n * a->q));
543: PetscCall(VecSetBlockSize(a->w, a->q));
544: PetscCall(VecSetType(a->w, VECSEQ));
546: /* create two temporary Index sets for build scatter gather */
547: PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)a->A), a->q, n, mpiaij->garray, PETSC_COPY_VALUES, &from));
548: PetscCall(ISCreateStride(PETSC_COMM_SELF, n * a->q, 0, 1, &to));
550: /* create temporary global vector to generate scatter context */
551: PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A), a->q, a->q * a->A->cmap->n, a->q * a->A->cmap->N, NULL, &gvec));
553: /* generate the scatter context */
554: PetscCall(VecScatterCreate(gvec, from, a->w, to, &a->ctx));
556: PetscCall(ISDestroy(&from));
557: PetscCall(ISDestroy(&to));
558: PetscCall(VecDestroy(&gvec));
559: }
561: A->assembled = PETSC_TRUE;
562: PetscFunctionReturn(PETSC_SUCCESS);
563: }
565: static PetscErrorCode MatView_KAIJ(Mat A, PetscViewer viewer)
566: {
567: PetscViewerFormat format;
568: Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
569: Mat B;
570: PetscInt i;
571: PetscBool ismpikaij;
573: PetscFunctionBegin;
574: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij));
575: PetscCall(PetscViewerGetFormat(viewer, &format));
576: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) {
577: PetscCall(PetscViewerASCIIPrintf(viewer, "S and T have %" PetscInt_FMT " rows and %" PetscInt_FMT " columns\n", a->p, a->q));
579: /* Print appropriate details for S. */
580: if (!a->S) {
581: PetscCall(PetscViewerASCIIPrintf(viewer, "S is NULL\n"));
582: } else if (format == PETSC_VIEWER_ASCII_IMPL) {
583: PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of S are "));
584: for (i = 0; i < (a->p * a->q); i++) {
585: #if defined(PETSC_USE_COMPLEX)
586: PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->S[i]), (double)PetscImaginaryPart(a->S[i])));
587: #else
588: PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->S[i])));
589: #endif
590: }
591: PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
592: }
594: /* Print appropriate details for T. */
595: if (a->isTI) {
596: PetscCall(PetscViewerASCIIPrintf(viewer, "T is the identity matrix\n"));
597: } else if (!a->T) {
598: PetscCall(PetscViewerASCIIPrintf(viewer, "T is NULL\n"));
599: } else if (format == PETSC_VIEWER_ASCII_IMPL) {
600: PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of T are "));
601: for (i = 0; i < (a->p * a->q); i++) {
602: #if defined(PETSC_USE_COMPLEX)
603: PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->T[i]), (double)PetscImaginaryPart(a->T[i])));
604: #else
605: PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->T[i])));
606: #endif
607: }
608: PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
609: }
611: /* Now print details for the AIJ matrix, using the AIJ viewer. */
612: PetscCall(PetscViewerASCIIPrintf(viewer, "Now viewing the associated AIJ matrix:\n"));
613: if (ismpikaij) {
614: Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
615: PetscCall(MatView(b->A, viewer));
616: } else {
617: PetscCall(MatView(a->AIJ, viewer));
618: }
620: } else {
621: /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */
622: PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &B));
623: PetscCall(MatView(B, viewer));
624: PetscCall(MatDestroy(&B));
625: }
626: PetscFunctionReturn(PETSC_SUCCESS);
627: }
629: static PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
630: {
631: Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
633: PetscFunctionBegin;
634: PetscCall(MatDestroy(&b->AIJ));
635: PetscCall(MatDestroy(&b->OAIJ));
636: PetscCall(MatDestroy(&b->A));
637: PetscCall(VecScatterDestroy(&b->ctx));
638: PetscCall(VecDestroy(&b->w));
639: PetscCall(PetscFree(b->S));
640: PetscCall(PetscFree(b->T));
641: PetscCall(PetscFree(b->ibdiag));
642: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", NULL));
643: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", NULL));
644: PetscCall(PetscFree(A->data));
645: PetscFunctionReturn(PETSC_SUCCESS);
646: }
648: /* zz = yy + Axx */
649: static PetscErrorCode MatMultAdd_SeqKAIJ(Mat A, Vec xx, Vec yy, Vec zz)
650: {
651: Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
652: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
653: const PetscScalar *s = b->S, *t = b->T;
654: const PetscScalar *x, *v, *bx;
655: PetscScalar *y, *sums;
656: const PetscInt m = b->AIJ->rmap->n, *idx, *ii;
657: PetscInt n, i, jrow, j, l, p = b->p, q = b->q, k;
659: PetscFunctionBegin;
660: if (!yy) {
661: PetscCall(VecSet(zz, 0.0));
662: } else {
663: PetscCall(VecCopy(yy, zz));
664: }
665: if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(PETSC_SUCCESS);
667: PetscCall(VecGetArrayRead(xx, &x));
668: PetscCall(VecGetArray(zz, &y));
669: idx = a->j;
670: v = a->a;
671: ii = a->i;
673: if (b->isTI) {
674: for (i = 0; i < m; i++) {
675: jrow = ii[i];
676: n = ii[i + 1] - jrow;
677: sums = y + p * i;
678: for (j = 0; j < n; j++) {
679: for (k = 0; k < p; k++) sums[k] += v[jrow + j] * x[q * idx[jrow + j] + k];
680: }
681: }
682: PetscCall(PetscLogFlops(3.0 * (a->nz) * p));
683: } else if (t) {
684: for (i = 0; i < m; i++) {
685: jrow = ii[i];
686: n = ii[i + 1] - jrow;
687: sums = y + p * i;
688: for (j = 0; j < n; j++) {
689: for (k = 0; k < p; k++) {
690: for (l = 0; l < q; l++) sums[k] += v[jrow + j] * t[k + l * p] * x[q * idx[jrow + j] + l];
691: }
692: }
693: }
694: /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do),
695: * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T),
696: * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter
697: * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */
698: PetscCall(PetscLogFlops((2.0 * p * q - p) * m + 2.0 * p * a->nz));
699: }
700: if (s) {
701: for (i = 0; i < m; i++) {
702: sums = y + p * i;
703: bx = x + q * i;
704: if (i < b->AIJ->cmap->n) {
705: for (j = 0; j < q; j++) {
706: for (k = 0; k < p; k++) sums[k] += s[k + j * p] * bx[j];
707: }
708: }
709: }
710: PetscCall(PetscLogFlops(2.0 * m * p * q));
711: }
713: PetscCall(VecRestoreArrayRead(xx, &x));
714: PetscCall(VecRestoreArray(zz, &y));
715: PetscFunctionReturn(PETSC_SUCCESS);
716: }
718: static PetscErrorCode MatMult_SeqKAIJ(Mat A, Vec xx, Vec yy)
719: {
720: PetscFunctionBegin;
721: PetscCall(MatMultAdd_SeqKAIJ(A, xx, NULL, yy));
722: PetscFunctionReturn(PETSC_SUCCESS);
723: }
725: #include <petsc/private/kernels/blockinvert.h>
727: static PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A, const PetscScalar **values)
728: {
729: Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
730: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
731: const PetscScalar *S = b->S;
732: const PetscScalar *T = b->T;
733: const PetscScalar *v = a->a;
734: const PetscInt p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
735: PetscInt i, j, *v_pivots, dof, dof2;
736: PetscScalar *diag, aval, *v_work;
738: PetscFunctionBegin;
739: PetscCheck(p == q, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Block size must be square to calculate inverse.");
740: PetscCheck(S || T || b->isTI, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Cannot invert a zero matrix.");
742: dof = p;
743: dof2 = dof * dof;
745: if (b->ibdiagvalid) {
746: if (values) *values = b->ibdiag;
747: PetscFunctionReturn(PETSC_SUCCESS);
748: }
749: if (!b->ibdiag) PetscCall(PetscMalloc1(dof2 * m, &b->ibdiag));
750: if (values) *values = b->ibdiag;
751: diag = b->ibdiag;
753: PetscCall(PetscMalloc2(dof, &v_work, dof, &v_pivots));
754: for (i = 0; i < m; i++) {
755: if (S) {
756: PetscCall(PetscMemcpy(diag, S, dof2 * sizeof(PetscScalar)));
757: } else {
758: PetscCall(PetscMemzero(diag, dof2 * sizeof(PetscScalar)));
759: }
760: if (b->isTI) {
761: aval = 0;
762: for (j = ii[i]; j < ii[i + 1]; j++)
763: if (idx[j] == i) aval = v[j];
764: for (j = 0; j < dof; j++) diag[j + dof * j] += aval;
765: } else if (T) {
766: aval = 0;
767: for (j = ii[i]; j < ii[i + 1]; j++)
768: if (idx[j] == i) aval = v[j];
769: for (j = 0; j < dof2; j++) diag[j] += aval * T[j];
770: }
771: PetscCall(PetscKernel_A_gets_inverse_A(dof, diag, v_pivots, v_work, PETSC_FALSE, NULL));
772: diag += dof2;
773: }
774: PetscCall(PetscFree2(v_work, v_pivots));
776: b->ibdiagvalid = PETSC_TRUE;
777: PetscFunctionReturn(PETSC_SUCCESS);
778: }
780: static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A, Mat *B)
781: {
782: Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ *)A->data;
784: PetscFunctionBegin;
785: *B = kaij->AIJ;
786: PetscFunctionReturn(PETSC_SUCCESS);
787: }
789: static PetscErrorCode MatConvert_KAIJ_AIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
790: {
791: Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
792: Mat AIJ, OAIJ, B;
793: PetscInt *d_nnz, *o_nnz = NULL, nz, i, j, m, d;
794: const PetscInt p = a->p, q = a->q;
795: PetscBool ismpikaij, missing;
797: PetscFunctionBegin;
798: if (reuse != MAT_REUSE_MATRIX) {
799: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij));
800: if (ismpikaij) {
801: Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
802: AIJ = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ;
803: OAIJ = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ;
804: } else {
805: AIJ = a->AIJ;
806: OAIJ = NULL;
807: }
808: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
809: PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
810: PetscCall(MatSetType(B, MATAIJ));
811: PetscCall(MatGetSize(AIJ, &m, NULL));
812: PetscCall(MatMissingDiagonal(AIJ, &missing, &d)); /* assumption that all successive rows will have a missing diagonal */
813: if (!missing || !a->S) d = m;
814: PetscCall(PetscMalloc1(m * p, &d_nnz));
815: for (i = 0; i < m; ++i) {
816: PetscCall(MatGetRow_SeqAIJ(AIJ, i, &nz, NULL, NULL));
817: for (j = 0; j < p; ++j) d_nnz[i * p + j] = nz * q + (i >= d) * q;
818: PetscCall(MatRestoreRow_SeqAIJ(AIJ, i, &nz, NULL, NULL));
819: }
820: if (OAIJ) {
821: PetscCall(PetscMalloc1(m * p, &o_nnz));
822: for (i = 0; i < m; ++i) {
823: PetscCall(MatGetRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL));
824: for (j = 0; j < p; ++j) o_nnz[i * p + j] = nz * q;
825: PetscCall(MatRestoreRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL));
826: }
827: PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
828: } else {
829: PetscCall(MatSeqAIJSetPreallocation(B, 0, d_nnz));
830: }
831: PetscCall(PetscFree(d_nnz));
832: PetscCall(PetscFree(o_nnz));
833: } else B = *newmat;
834: PetscCall(MatConvert_Basic(A, newtype, MAT_REUSE_MATRIX, &B));
835: if (reuse == MAT_INPLACE_MATRIX) {
836: PetscCall(MatHeaderReplace(A, &B));
837: } else *newmat = B;
838: PetscFunctionReturn(PETSC_SUCCESS);
839: }
841: static PetscErrorCode MatSOR_SeqKAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
842: {
843: Mat_SeqKAIJ *kaij = (Mat_SeqKAIJ *)A->data;
844: Mat_SeqAIJ *a = (Mat_SeqAIJ *)kaij->AIJ->data;
845: const PetscScalar *aa = a->a, *T = kaij->T, *v;
846: const PetscInt m = kaij->AIJ->rmap->n, *ai = a->i, *aj = a->j, p = kaij->p, q = kaij->q, *diag, *vi;
847: const PetscScalar *b, *xb, *idiag;
848: PetscScalar *x, *work, *workt, *w, *y, *arr, *t, *arrt;
849: PetscInt i, j, k, i2, bs, bs2, nz;
851: PetscFunctionBegin;
852: its = its * lits;
853: PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
854: PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
855: PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift");
856: PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts");
857: PetscCheck(p == q, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSOR for KAIJ: No support for non-square dense blocks");
858: bs = p;
859: bs2 = bs * bs;
861: if (!m) PetscFunctionReturn(PETSC_SUCCESS);
863: if (!kaij->ibdiagvalid) PetscCall(MatInvertBlockDiagonal_SeqKAIJ(A, NULL));
864: idiag = kaij->ibdiag;
865: diag = a->diag;
867: if (!kaij->sor.setup) {
868: PetscCall(PetscMalloc5(bs, &kaij->sor.w, bs, &kaij->sor.y, m * bs, &kaij->sor.work, m * bs, &kaij->sor.t, m * bs2, &kaij->sor.arr));
869: kaij->sor.setup = PETSC_TRUE;
870: }
871: y = kaij->sor.y;
872: w = kaij->sor.w;
873: work = kaij->sor.work;
874: t = kaij->sor.t;
875: arr = kaij->sor.arr;
877: PetscCall(VecGetArray(xx, &x));
878: PetscCall(VecGetArrayRead(bb, &b));
880: if (flag & SOR_ZERO_INITIAL_GUESS) {
881: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
882: PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); /* x[0:bs] <- D^{-1} b[0:bs] */
883: PetscCall(PetscMemcpy(t, b, bs * sizeof(PetscScalar)));
884: i2 = bs;
885: idiag += bs2;
886: for (i = 1; i < m; i++) {
887: v = aa + ai[i];
888: vi = aj + ai[i];
889: nz = diag[i] - ai[i];
891: if (T) { /* b - T (Arow * x) */
892: PetscCall(PetscMemzero(w, bs * sizeof(PetscScalar)));
893: for (j = 0; j < nz; j++) {
894: for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k];
895: }
896: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs, w, T, &t[i2]);
897: for (k = 0; k < bs; k++) t[i2 + k] += b[i2 + k];
898: } else if (kaij->isTI) {
899: PetscCall(PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar)));
900: for (j = 0; j < nz; j++) {
901: for (k = 0; k < bs; k++) t[i2 + k] -= v[j] * x[vi[j] * bs + k];
902: }
903: } else {
904: PetscCall(PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar)));
905: }
907: PetscKernel_w_gets_Ar_times_v(bs, bs, t + i2, idiag, y);
908: for (j = 0; j < bs; j++) x[i2 + j] = omega * y[j];
910: idiag += bs2;
911: i2 += bs;
912: }
913: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
914: PetscCall(PetscLogFlops(1.0 * bs2 * a->nz));
915: xb = t;
916: } else xb = b;
917: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
918: idiag = kaij->ibdiag + bs2 * (m - 1);
919: i2 = bs * (m - 1);
920: PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar)));
921: PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
922: i2 -= bs;
923: idiag -= bs2;
924: for (i = m - 2; i >= 0; i--) {
925: v = aa + diag[i] + 1;
926: vi = aj + diag[i] + 1;
927: nz = ai[i + 1] - diag[i] - 1;
929: if (T) { /* FIXME: This branch untested */
930: PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar)));
931: /* copy all rows of x that are needed into contiguous space */
932: workt = work;
933: for (j = 0; j < nz; j++) {
934: PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
935: workt += bs;
936: }
937: arrt = arr;
938: for (j = 0; j < nz; j++) {
939: PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
940: for (k = 0; k < bs2; k++) arrt[k] *= v[j];
941: arrt += bs2;
942: }
943: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
944: } else if (kaij->isTI) {
945: PetscCall(PetscMemcpy(w, t + i2, bs * sizeof(PetscScalar)));
946: for (j = 0; j < nz; j++) {
947: for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k];
948: }
949: }
951: PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); /* RHS incorrect for omega != 1.0 */
952: for (j = 0; j < bs; j++) x[i2 + j] = (1.0 - omega) * x[i2 + j] + omega * y[j];
954: idiag -= bs2;
955: i2 -= bs;
956: }
957: PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
958: }
959: its--;
960: }
961: while (its--) { /* FIXME: This branch not updated */
962: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
963: i2 = 0;
964: idiag = kaij->ibdiag;
965: for (i = 0; i < m; i++) {
966: PetscCall(PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar)));
968: v = aa + ai[i];
969: vi = aj + ai[i];
970: nz = diag[i] - ai[i];
971: workt = work;
972: for (j = 0; j < nz; j++) {
973: PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
974: workt += bs;
975: }
976: arrt = arr;
977: if (T) {
978: for (j = 0; j < nz; j++) {
979: PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
980: for (k = 0; k < bs2; k++) arrt[k] *= v[j];
981: arrt += bs2;
982: }
983: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
984: } else if (kaij->isTI) {
985: for (j = 0; j < nz; j++) {
986: PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
987: for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
988: arrt += bs2;
989: }
990: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
991: }
992: PetscCall(PetscMemcpy(t + i2, w, bs * sizeof(PetscScalar)));
994: v = aa + diag[i] + 1;
995: vi = aj + diag[i] + 1;
996: nz = ai[i + 1] - diag[i] - 1;
997: workt = work;
998: for (j = 0; j < nz; j++) {
999: PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1000: workt += bs;
1001: }
1002: arrt = arr;
1003: if (T) {
1004: for (j = 0; j < nz; j++) {
1005: PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1006: for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1007: arrt += bs2;
1008: }
1009: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1010: } else if (kaij->isTI) {
1011: for (j = 0; j < nz; j++) {
1012: PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1013: for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1014: arrt += bs2;
1015: }
1016: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1017: }
1019: PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1020: for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);
1022: idiag += bs2;
1023: i2 += bs;
1024: }
1025: xb = t;
1026: } else xb = b;
1027: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1028: idiag = kaij->ibdiag + bs2 * (m - 1);
1029: i2 = bs * (m - 1);
1030: if (xb == b) {
1031: for (i = m - 1; i >= 0; i--) {
1032: PetscCall(PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar)));
1034: v = aa + ai[i];
1035: vi = aj + ai[i];
1036: nz = diag[i] - ai[i];
1037: workt = work;
1038: for (j = 0; j < nz; j++) {
1039: PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1040: workt += bs;
1041: }
1042: arrt = arr;
1043: if (T) {
1044: for (j = 0; j < nz; j++) {
1045: PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1046: for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1047: arrt += bs2;
1048: }
1049: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1050: } else if (kaij->isTI) {
1051: for (j = 0; j < nz; j++) {
1052: PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1053: for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1054: arrt += bs2;
1055: }
1056: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1057: }
1059: v = aa + diag[i] + 1;
1060: vi = aj + diag[i] + 1;
1061: nz = ai[i + 1] - diag[i] - 1;
1062: workt = work;
1063: for (j = 0; j < nz; j++) {
1064: PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1065: workt += bs;
1066: }
1067: arrt = arr;
1068: if (T) {
1069: for (j = 0; j < nz; j++) {
1070: PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1071: for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1072: arrt += bs2;
1073: }
1074: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1075: } else if (kaij->isTI) {
1076: for (j = 0; j < nz; j++) {
1077: PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1078: for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1079: arrt += bs2;
1080: }
1081: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1082: }
1084: PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1085: for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);
1086: }
1087: } else {
1088: for (i = m - 1; i >= 0; i--) {
1089: PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar)));
1090: v = aa + diag[i] + 1;
1091: vi = aj + diag[i] + 1;
1092: nz = ai[i + 1] - diag[i] - 1;
1093: workt = work;
1094: for (j = 0; j < nz; j++) {
1095: PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1096: workt += bs;
1097: }
1098: arrt = arr;
1099: if (T) {
1100: for (j = 0; j < nz; j++) {
1101: PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1102: for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1103: arrt += bs2;
1104: }
1105: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1106: } else if (kaij->isTI) {
1107: for (j = 0; j < nz; j++) {
1108: PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1109: for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1110: arrt += bs2;
1111: }
1112: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1113: }
1114: PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1115: for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);
1116: }
1117: }
1118: PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
1119: }
1120: }
1122: PetscCall(VecRestoreArray(xx, &x));
1123: PetscCall(VecRestoreArrayRead(bb, &b));
1124: PetscFunctionReturn(PETSC_SUCCESS);
1125: }
1127: /*===================================================================================*/
1129: static PetscErrorCode MatMultAdd_MPIKAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1130: {
1131: Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
1133: PetscFunctionBegin;
1134: if (!yy) {
1135: PetscCall(VecSet(zz, 0.0));
1136: } else {
1137: PetscCall(VecCopy(yy, zz));
1138: }
1139: PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */
1140: /* start the scatter */
1141: PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
1142: PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, zz, zz));
1143: PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
1144: PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz));
1145: PetscFunctionReturn(PETSC_SUCCESS);
1146: }
1148: static PetscErrorCode MatMult_MPIKAIJ(Mat A, Vec xx, Vec yy)
1149: {
1150: PetscFunctionBegin;
1151: PetscCall(MatMultAdd_MPIKAIJ(A, xx, NULL, yy));
1152: PetscFunctionReturn(PETSC_SUCCESS);
1153: }
1155: static PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A, const PetscScalar **values)
1156: {
1157: Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
1159: PetscFunctionBegin;
1160: PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ is up to date. */
1161: PetscCall((*b->AIJ->ops->invertblockdiagonal)(b->AIJ, values));
1162: PetscFunctionReturn(PETSC_SUCCESS);
1163: }
1165: static PetscErrorCode MatGetRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values)
1166: {
1167: Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
1168: PetscBool diag = PETSC_FALSE;
1169: PetscInt nzaij, nz, *colsaij, *idx, i, j, p = b->p, q = b->q, r = row / p, s = row % p, c;
1170: PetscScalar *vaij, *v, *S = b->S, *T = b->T;
1172: PetscFunctionBegin;
1173: PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1174: b->getrowactive = PETSC_TRUE;
1175: PetscCheck(row >= 0 && row < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row);
1177: if ((!S) && (!T) && (!b->isTI)) {
1178: if (ncols) *ncols = 0;
1179: if (cols) *cols = NULL;
1180: if (values) *values = NULL;
1181: PetscFunctionReturn(PETSC_SUCCESS);
1182: }
1184: if (T || b->isTI) {
1185: PetscCall(MatGetRow_SeqAIJ(b->AIJ, r, &nzaij, &colsaij, &vaij));
1186: c = nzaij;
1187: for (i = 0; i < nzaij; i++) {
1188: /* check if this row contains a diagonal entry */
1189: if (colsaij[i] == r) {
1190: diag = PETSC_TRUE;
1191: c = i;
1192: }
1193: }
1194: } else nzaij = c = 0;
1196: /* calculate size of row */
1197: nz = 0;
1198: if (S) nz += q;
1199: if (T || b->isTI) nz += (diag && S ? (nzaij - 1) * q : nzaij * q);
1201: if (cols || values) {
1202: PetscCall(PetscMalloc2(nz, &idx, nz, &v));
1203: for (i = 0; i < q; i++) {
1204: /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1205: v[i] = 0.0;
1206: }
1207: if (b->isTI) {
1208: for (i = 0; i < nzaij; i++) {
1209: for (j = 0; j < q; j++) {
1210: idx[i * q + j] = colsaij[i] * q + j;
1211: v[i * q + j] = (j == s ? vaij[i] : 0);
1212: }
1213: }
1214: } else if (T) {
1215: for (i = 0; i < nzaij; i++) {
1216: for (j = 0; j < q; j++) {
1217: idx[i * q + j] = colsaij[i] * q + j;
1218: v[i * q + j] = vaij[i] * T[s + j * p];
1219: }
1220: }
1221: }
1222: if (S) {
1223: for (j = 0; j < q; j++) {
1224: idx[c * q + j] = r * q + j;
1225: v[c * q + j] += S[s + j * p];
1226: }
1227: }
1228: }
1230: if (ncols) *ncols = nz;
1231: if (cols) *cols = idx;
1232: if (values) *values = v;
1233: PetscFunctionReturn(PETSC_SUCCESS);
1234: }
1236: static PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1237: {
1238: PetscFunctionBegin;
1239: PetscCall(PetscFree2(*idx, *v));
1240: ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE;
1241: PetscFunctionReturn(PETSC_SUCCESS);
1242: }
1244: static PetscErrorCode MatGetRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values)
1245: {
1246: Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
1247: Mat AIJ = b->A;
1248: PetscBool diag = PETSC_FALSE;
1249: Mat MatAIJ, MatOAIJ;
1250: const PetscInt rstart = A->rmap->rstart, rend = A->rmap->rend, p = b->p, q = b->q, *garray;
1251: PetscInt nz, *idx, ncolsaij = 0, ncolsoaij = 0, *colsaij, *colsoaij, r, s, c, i, j, lrow;
1252: PetscScalar *v, *vals, *ovals, *S = b->S, *T = b->T;
1254: PetscFunctionBegin;
1255: PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */
1256: MatAIJ = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ;
1257: MatOAIJ = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ;
1258: PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1259: b->getrowactive = PETSC_TRUE;
1260: PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1261: lrow = row - rstart;
1263: if ((!S) && (!T) && (!b->isTI)) {
1264: if (ncols) *ncols = 0;
1265: if (cols) *cols = NULL;
1266: if (values) *values = NULL;
1267: PetscFunctionReturn(PETSC_SUCCESS);
1268: }
1270: r = lrow / p;
1271: s = lrow % p;
1273: if (T || b->isTI) {
1274: PetscCall(MatMPIAIJGetSeqAIJ(AIJ, NULL, NULL, &garray));
1275: PetscCall(MatGetRow_SeqAIJ(MatAIJ, lrow / p, &ncolsaij, &colsaij, &vals));
1276: PetscCall(MatGetRow_SeqAIJ(MatOAIJ, lrow / p, &ncolsoaij, &colsoaij, &ovals));
1278: c = ncolsaij + ncolsoaij;
1279: for (i = 0; i < ncolsaij; i++) {
1280: /* check if this row contains a diagonal entry */
1281: if (colsaij[i] == r) {
1282: diag = PETSC_TRUE;
1283: c = i;
1284: }
1285: }
1286: } else c = 0;
1288: /* calculate size of row */
1289: nz = 0;
1290: if (S) nz += q;
1291: if (T || b->isTI) nz += (diag && S ? (ncolsaij + ncolsoaij - 1) * q : (ncolsaij + ncolsoaij) * q);
1293: if (cols || values) {
1294: PetscCall(PetscMalloc2(nz, &idx, nz, &v));
1295: for (i = 0; i < q; i++) {
1296: /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1297: v[i] = 0.0;
1298: }
1299: if (b->isTI) {
1300: for (i = 0; i < ncolsaij; i++) {
1301: for (j = 0; j < q; j++) {
1302: idx[i * q + j] = (colsaij[i] + rstart / p) * q + j;
1303: v[i * q + j] = (j == s ? vals[i] : 0.0);
1304: }
1305: }
1306: for (i = 0; i < ncolsoaij; i++) {
1307: for (j = 0; j < q; j++) {
1308: idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j;
1309: v[(i + ncolsaij) * q + j] = (j == s ? ovals[i] : 0.0);
1310: }
1311: }
1312: } else if (T) {
1313: for (i = 0; i < ncolsaij; i++) {
1314: for (j = 0; j < q; j++) {
1315: idx[i * q + j] = (colsaij[i] + rstart / p) * q + j;
1316: v[i * q + j] = vals[i] * T[s + j * p];
1317: }
1318: }
1319: for (i = 0; i < ncolsoaij; i++) {
1320: for (j = 0; j < q; j++) {
1321: idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j;
1322: v[(i + ncolsaij) * q + j] = ovals[i] * T[s + j * p];
1323: }
1324: }
1325: }
1326: if (S) {
1327: for (j = 0; j < q; j++) {
1328: idx[c * q + j] = (r + rstart / p) * q + j;
1329: v[c * q + j] += S[s + j * p];
1330: }
1331: }
1332: }
1334: if (ncols) *ncols = nz;
1335: if (cols) *cols = idx;
1336: if (values) *values = v;
1337: PetscFunctionReturn(PETSC_SUCCESS);
1338: }
1340: static PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1341: {
1342: PetscFunctionBegin;
1343: PetscCall(PetscFree2(*idx, *v));
1344: ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE;
1345: PetscFunctionReturn(PETSC_SUCCESS);
1346: }
1348: static PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat)
1349: {
1350: Mat A;
1352: PetscFunctionBegin;
1353: PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
1354: PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat));
1355: PetscCall(MatDestroy(&A));
1356: PetscFunctionReturn(PETSC_SUCCESS);
1357: }
1359: /*@C
1360: MatCreateKAIJ - Creates a matrix of type `MATKAIJ`.
1362: Collective
1364: Input Parameters:
1365: + A - the `MATAIJ` matrix
1366: . p - number of rows in `S` and `T`
1367: . q - number of columns in `S` and `T`
1368: . S - the `S` matrix (can be `NULL`), stored as a `PetscScalar` array (column-major)
1369: - T - the `T` matrix (can be `NULL`), stored as a `PetscScalar` array (column-major)
1371: Output Parameter:
1372: . kaij - the new `MATKAIJ` matrix
1374: Level: advanced
1376: Notes:
1377: The created matrix is of the following form\:
1378: .vb
1379: [I \otimes S + A \otimes T]
1380: .ve
1381: where
1382: .vb
1383: S is a dense (p \times q) matrix
1384: T is a dense (p \times q) matrix
1385: A is a `MATAIJ` (n \times n) matrix
1386: I is the identity matrix
1387: .ve
1388: The resulting matrix is (np \times nq)
1390: `S` and `T` are always stored independently on all processes as `PetscScalar` arrays in
1391: column-major format.
1393: This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed.
1395: Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix.
1397: Developer Notes:
1398: In the `MATMPIKAIJ` case, the internal 'AIJ' and 'OAIJ' sequential KAIJ matrices are kept up to date by tracking the object state
1399: of the AIJ matrix 'A' that describes the blockwise action of the `MATMPIKAIJ` matrix and, if the object state has changed, lazily
1400: rebuilding 'AIJ' and 'OAIJ' just before executing operations with the `MATMPIKAIJ` matrix. If new types of operations are added,
1401: routines implementing those must also ensure these are rebuilt when needed (by calling the internal MatKAIJ_build_AIJ_OAIJ() routine).
1403: .seealso: [](ch_matrices), `Mat`, `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MATKAIJ`
1404: @*/
1405: PetscErrorCode MatCreateKAIJ(Mat A, PetscInt p, PetscInt q, const PetscScalar S[], const PetscScalar T[], Mat *kaij)
1406: {
1407: PetscFunctionBegin;
1408: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), kaij));
1409: PetscCall(MatSetType(*kaij, MATKAIJ));
1410: PetscCall(MatKAIJSetAIJ(*kaij, A));
1411: PetscCall(MatKAIJSetS(*kaij, p, q, S));
1412: PetscCall(MatKAIJSetT(*kaij, p, q, T));
1413: PetscCall(MatSetUp(*kaij));
1414: PetscFunctionReturn(PETSC_SUCCESS);
1415: }
1417: /*MC
1418: MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form
1419: [I \otimes S + A \otimes T],
1420: where
1421: .vb
1422: S is a dense (p \times q) matrix,
1423: T is a dense (p \times q) matrix,
1424: A is an AIJ (n \times n) matrix,
1425: and I is the identity matrix.
1426: .ve
1427: The resulting matrix is (np \times nq).
1429: S and T are always stored independently on all processes as `PetscScalar` arrays in column-major format.
1431: Level: advanced
1433: Note:
1434: A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b,
1435: where x and b are column vectors containing the row-major representations of X and B.
1437: .seealso: [](ch_matrices), `Mat`, `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MatCreateKAIJ()`
1438: M*/
1440: PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
1441: {
1442: Mat_MPIKAIJ *b;
1443: PetscMPIInt size;
1445: PetscFunctionBegin;
1446: PetscCall(PetscNew(&b));
1447: A->data = (void *)b;
1449: PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps)));
1451: b->w = NULL;
1452: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1453: if (size == 1) {
1454: PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQKAIJ));
1455: A->ops->destroy = MatDestroy_SeqKAIJ;
1456: A->ops->mult = MatMult_SeqKAIJ;
1457: A->ops->multadd = MatMultAdd_SeqKAIJ;
1458: A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ;
1459: A->ops->getrow = MatGetRow_SeqKAIJ;
1460: A->ops->restorerow = MatRestoreRow_SeqKAIJ;
1461: A->ops->sor = MatSOR_SeqKAIJ;
1462: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", MatConvert_KAIJ_AIJ));
1463: } else {
1464: PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIKAIJ));
1465: A->ops->destroy = MatDestroy_MPIKAIJ;
1466: A->ops->mult = MatMult_MPIKAIJ;
1467: A->ops->multadd = MatMultAdd_MPIKAIJ;
1468: A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ;
1469: A->ops->getrow = MatGetRow_MPIKAIJ;
1470: A->ops->restorerow = MatRestoreRow_MPIKAIJ;
1471: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", MatGetDiagonalBlock_MPIKAIJ));
1472: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", MatConvert_KAIJ_AIJ));
1473: }
1474: A->ops->setup = MatSetUp_KAIJ;
1475: A->ops->view = MatView_KAIJ;
1476: A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1477: PetscFunctionReturn(PETSC_SUCCESS);
1478: }