Actual source code: kaij.c

  1: /*
  2:   Defines the basic matrix operations for the KAIJ  matrix storage format.
  3:   This format is used to evaluate matrices of the form:

  5:     [I \otimes S + A \otimes T]

  7:   where
  8:     S is a dense (p \times q) matrix
  9:     T is a dense (p \times q) matrix
 10:     A is an AIJ  (n \times n) matrix
 11:     I is the identity matrix

 13:   The resulting matrix is (np \times nq)

 15:   We provide:
 16:      MatMult()
 17:      MatMultAdd()
 18:      MatInvertBlockDiagonal()
 19:   and
 20:      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)

 22:   This single directory handles both the sequential and parallel codes
 23: */

 25: #include <../src/mat/impls/kaij/kaij.h>
 26: #include <../src/mat/utils/freespace.h>
 27: #include <petsc/private/vecimpl.h>

 29: /*@C
 30:   MatKAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix

 32:   Not Collective, but if the `MATKAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel

 34:   Input Parameter:
 35: . A - the `MATKAIJ` matrix

 37:   Output Parameter:
 38: . B - the `MATAIJ` matrix

 40:   Level: advanced

 42:   Note:
 43:   The reference count on the `MATAIJ` matrix is not increased so you should not destroy it.

 45: .seealso: [](ch_matrices), `Mat`, `MatCreateKAIJ()`, `MATKAIJ`, `MATAIJ`
 46: @*/
 47: PetscErrorCode MatKAIJGetAIJ(Mat A, Mat *B)
 48: {
 49:   PetscBool ismpikaij, isseqkaij;

 51:   PetscFunctionBegin;
 52:   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij));
 53:   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQKAIJ, &isseqkaij));
 54:   if (ismpikaij) {
 55:     Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;

 57:     *B = b->A;
 58:   } else if (isseqkaij) {
 59:     Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;

 61:     *B = b->AIJ;
 62:   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix passed in is not of type KAIJ");
 63:   PetscFunctionReturn(PETSC_SUCCESS);
 64: }

 66: /*@C
 67:   MatKAIJGetS - Get the `S` matrix describing the shift action of the `MATKAIJ` matrix

 69:   Not Collective; the entire `S` is stored and returned independently on all processes.

 71:   Input Parameter:
 72: . A - the `MATKAIJ` matrix

 74:   Output Parameters:
 75: + m - the number of rows in `S`
 76: . n - the number of columns in `S`
 77: - S - the S matrix, in form of a scalar array in column-major format

 79:   Level: advanced

 81:   Note:
 82:   All output parameters are optional (pass `NULL` if not desired)

 84: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
 85: @*/
 86: PetscErrorCode MatKAIJGetS(Mat A, PetscInt *m, PetscInt *n, PetscScalar **S)
 87: {
 88:   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
 89:   PetscFunctionBegin;
 90:   if (m) *m = b->p;
 91:   if (n) *n = b->q;
 92:   if (S) *S = b->S;
 93:   PetscFunctionReturn(PETSC_SUCCESS);
 94: }

 96: /*@C
 97:   MatKAIJGetSRead - Get a read-only pointer to the `S` matrix describing the shift action of the `MATKAIJ` matrix

 99:   Not Collective; the entire `S` is stored and returned independently on all processes.

101:   Input Parameter:
102: . A - the `MATKAIJ` matrix

104:   Output Parameters:
105: + m - the number of rows in `S`
106: . n - the number of columns in `S`
107: - S - the S matrix, in form of a scalar array in column-major format

109:   Level: advanced

111:   Note:
112:   All output parameters are optional (pass `NULL` if not desired)

114: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
115: @*/
116: PetscErrorCode MatKAIJGetSRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar **S)
117: {
118:   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
119:   PetscFunctionBegin;
120:   if (m) *m = b->p;
121:   if (n) *n = b->q;
122:   if (S) *S = b->S;
123:   PetscFunctionReturn(PETSC_SUCCESS);
124: }

126: /*@C
127:   MatKAIJRestoreS - Restore array obtained with `MatKAIJGetS()`

129:   Not Collective

131:   Input Parameters:
132: + A - the `MATKAIJ` matrix
133: - S - location of pointer to array obtained with `MatKAIJGetS()`

135:   Level: advanced

137:   Note:
138:   This routine zeros the array pointer to prevent accidental reuse after it has been restored.
139:   If `NULL` is passed, it will not attempt to zero the array pointer.

141: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()`
142: @*/
143: PetscErrorCode MatKAIJRestoreS(Mat A, PetscScalar **S)
144: {
145:   PetscFunctionBegin;
146:   if (S) *S = NULL;
147:   PetscCall(PetscObjectStateIncrease((PetscObject)A));
148:   PetscFunctionReturn(PETSC_SUCCESS);
149: }

151: /*@C
152:   MatKAIJRestoreSRead - Restore array obtained with `MatKAIJGetSRead()`

154:   Not Collective

156:   Input Parameters:
157: + A - the `MATKAIJ` matrix
158: - S - location of pointer to array obtained with `MatKAIJGetS()`

160:   Level: advanced

162:   Note:
163:   This routine zeros the array pointer to prevent accidental reuse after it has been restored.
164:   If `NULL` is passed, it will not attempt to zero the array pointer.

166: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()`
167: @*/
168: PetscErrorCode MatKAIJRestoreSRead(Mat A, const PetscScalar **S)
169: {
170:   PetscFunctionBegin;
171:   if (S) *S = NULL;
172:   PetscFunctionReturn(PETSC_SUCCESS);
173: }

175: /*@C
176:   MatKAIJGetT - Get the transformation matrix `T` associated with the `MATKAIJ` matrix

178:   Not Collective; the entire `T` is stored and returned independently on all processes

180:   Input Parameter:
181: . A - the `MATKAIJ` matrix

183:   Output Parameters:
184: + m - the number of rows in `T`
185: . n - the number of columns in `T`
186: - T - the T matrix, in form of a scalar array in column-major format

188:   Level: advanced

190:   Note:
191:   All output parameters are optional (pass `NULL` if not desired)

193: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
194: @*/
195: PetscErrorCode MatKAIJGetT(Mat A, PetscInt *m, PetscInt *n, PetscScalar **T)
196: {
197:   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
198:   PetscFunctionBegin;
199:   if (m) *m = b->p;
200:   if (n) *n = b->q;
201:   if (T) *T = b->T;
202:   PetscFunctionReturn(PETSC_SUCCESS);
203: }

205: /*@C
206:   MatKAIJGetTRead - Get a read-only pointer to the transformation matrix `T` associated with the `MATKAIJ` matrix

208:   Not Collective; the entire `T` is stored and returned independently on all processes

210:   Input Parameter:
211: . A - the `MATKAIJ` matrix

213:   Output Parameters:
214: + m - the number of rows in `T`
215: . n - the number of columns in `T`
216: - T - the T matrix, in form of a scalar array in column-major format

218:   Level: advanced

220:   Note:
221:   All output parameters are optional (pass `NULL` if not desired)

223: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
224: @*/
225: PetscErrorCode MatKAIJGetTRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar **T)
226: {
227:   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
228:   PetscFunctionBegin;
229:   if (m) *m = b->p;
230:   if (n) *n = b->q;
231:   if (T) *T = b->T;
232:   PetscFunctionReturn(PETSC_SUCCESS);
233: }

235: /*@C
236:   MatKAIJRestoreT - Restore array obtained with `MatKAIJGetT()`

238:   Not Collective

240:   Input Parameters:
241: + A - the `MATKAIJ` matrix
242: - T - location of pointer to array obtained with `MatKAIJGetS()`

244:   Level: advanced

246:   Note:
247:   This routine zeros the array pointer to prevent accidental reuse after it has been restored.
248:   If `NULL` is passed, it will not attempt to zero the array pointer.

250: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()`
251: @*/
252: PetscErrorCode MatKAIJRestoreT(Mat A, PetscScalar **T)
253: {
254:   PetscFunctionBegin;
255:   if (T) *T = NULL;
256:   PetscCall(PetscObjectStateIncrease((PetscObject)A));
257:   PetscFunctionReturn(PETSC_SUCCESS);
258: }

260: /*@C
261:   MatKAIJRestoreTRead - Restore array obtained with `MatKAIJGetTRead()`

263:   Not Collective

265:   Input Parameters:
266: + A - the `MATKAIJ` matrix
267: - T - location of pointer to array obtained with `MatKAIJGetS()`

269:   Level: advanced

271:   Note:
272:   This routine zeros the array pointer to prevent accidental reuse after it has been restored.
273:   If `NULL` is passed, it will not attempt to zero the array pointer.

275: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()`
276: @*/
277: PetscErrorCode MatKAIJRestoreTRead(Mat A, const PetscScalar **T)
278: {
279:   PetscFunctionBegin;
280:   if (T) *T = NULL;
281:   PetscFunctionReturn(PETSC_SUCCESS);
282: }

284: /*@
285:   MatKAIJSetAIJ - Set the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix

287:   Logically Collective; if the `MATAIJ` matrix is parallel, the `MATKAIJ` matrix is also parallel

289:   Input Parameters:
290: + A - the `MATKAIJ` matrix
291: - B - the `MATAIJ` matrix

293:   Level: advanced

295:   Notes:
296:   This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed.

298:   Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix.

300: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`
301: @*/
302: PetscErrorCode MatKAIJSetAIJ(Mat A, Mat B)
303: {
304:   PetscMPIInt size;
305:   PetscBool   flg;

307:   PetscFunctionBegin;
308:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
309:   if (size == 1) {
310:     PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQAIJ, &flg));
311:     PetscCheck(flg, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MatKAIJSetAIJ() with MATSEQKAIJ does not support %s as the AIJ mat", ((PetscObject)B)->type_name);
312:     Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
313:     a->AIJ         = B;
314:   } else {
315:     Mat_MPIKAIJ *a = (Mat_MPIKAIJ *)A->data;
316:     a->A           = B;
317:   }
318:   PetscCall(PetscObjectReference((PetscObject)B));
319:   PetscFunctionReturn(PETSC_SUCCESS);
320: }

322: /*@C
323:   MatKAIJSetS - Set the `S` matrix describing the shift action of the `MATKAIJ` matrix

325:   Logically Collective; the entire `S` is stored independently on all processes.

327:   Input Parameters:
328: + A - the `MATKAIJ` matrix
329: . p - the number of rows in `S`
330: . q - the number of columns in `S`
331: - S - the S matrix, in form of a scalar array in column-major format

333:   Level: advanced

335:   Notes:
336:   The dimensions `p` and `q` must match those of the transformation matrix `T` associated with the `MATKAIJ` matrix.

338:   The `S` matrix is copied, so the user can destroy this array.

340: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJSetT()`, `MatKAIJSetAIJ()`
341: @*/
342: PetscErrorCode MatKAIJSetS(Mat A, PetscInt p, PetscInt q, const PetscScalar S[])
343: {
344:   Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;

346:   PetscFunctionBegin;
347:   PetscCall(PetscFree(a->S));
348:   if (S) {
349:     PetscCall(PetscMalloc1(p * q, &a->S));
350:     PetscCall(PetscMemcpy(a->S, S, p * q * sizeof(PetscScalar)));
351:   } else a->S = NULL;

353:   a->p = p;
354:   a->q = q;
355:   PetscFunctionReturn(PETSC_SUCCESS);
356: }

358: /*@C
359:   MatKAIJGetScaledIdentity - Check if both `S` and `T` are scaled identities.

361:   Logically Collective.

363:   Input Parameter:
364: . A - the `MATKAIJ` matrix

366:   Output Parameter:
367: . identity - the Boolean value

369:   Level: advanced

371: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetT()`
372: @*/
373: PetscErrorCode MatKAIJGetScaledIdentity(Mat A, PetscBool *identity)
374: {
375:   Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
376:   PetscInt     i, j;

378:   PetscFunctionBegin;
379:   if (a->p != a->q) {
380:     *identity = PETSC_FALSE;
381:     PetscFunctionReturn(PETSC_SUCCESS);
382:   } else *identity = PETSC_TRUE;
383:   if (!a->isTI || a->S) {
384:     for (i = 0; i < a->p && *identity; i++) {
385:       for (j = 0; j < a->p && *identity; j++) {
386:         if (i != j) {
387:           if (a->S && PetscAbsScalar(a->S[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
388:           if (a->T && PetscAbsScalar(a->T[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
389:         } else {
390:           if (a->S && PetscAbsScalar(a->S[i * (a->p + 1)] - a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
391:           if (a->T && PetscAbsScalar(a->T[i * (a->p + 1)] - a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
392:         }
393:       }
394:     }
395:   }
396:   PetscFunctionReturn(PETSC_SUCCESS);
397: }

399: /*@C
400:   MatKAIJSetT - Set the transformation matrix `T` associated with the `MATKAIJ` matrix

402:   Logically Collective; the entire `T` is stored independently on all processes.

404:   Input Parameters:
405: + A - the `MATKAIJ` matrix
406: . p - the number of rows in `S`
407: . q - the number of columns in `S`
408: - T - the `T` matrix, in form of a scalar array in column-major format

410:   Level: advanced

412:   Notes:
413:   The dimensions `p` and `q` must match those of the shift matrix `S` associated with the `MATKAIJ` matrix.

415:   The `T` matrix is copied, so the user can destroy this array.

417: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJSetS()`, `MatKAIJSetAIJ()`
418: @*/
419: PetscErrorCode MatKAIJSetT(Mat A, PetscInt p, PetscInt q, const PetscScalar T[])
420: {
421:   PetscInt     i, j;
422:   Mat_SeqKAIJ *a    = (Mat_SeqKAIJ *)A->data;
423:   PetscBool    isTI = PETSC_FALSE;

425:   PetscFunctionBegin;
426:   /* check if T is an identity matrix */
427:   if (T && (p == q)) {
428:     isTI = PETSC_TRUE;
429:     for (i = 0; i < p; i++) {
430:       for (j = 0; j < q; j++) {
431:         if (i == j) {
432:           /* diagonal term must be 1 */
433:           if (T[i + j * p] != 1.0) isTI = PETSC_FALSE;
434:         } else {
435:           /* off-diagonal term must be 0 */
436:           if (T[i + j * p] != 0.0) isTI = PETSC_FALSE;
437:         }
438:       }
439:     }
440:   }
441:   a->isTI = isTI;

443:   PetscCall(PetscFree(a->T));
444:   if (T && (!isTI)) {
445:     PetscCall(PetscMalloc1(p * q, &a->T));
446:     PetscCall(PetscMemcpy(a->T, T, p * q * sizeof(PetscScalar)));
447:   } else a->T = NULL;

449:   a->p = p;
450:   a->q = q;
451:   PetscFunctionReturn(PETSC_SUCCESS);
452: }

454: static PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
455: {
456:   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;

458:   PetscFunctionBegin;
459:   PetscCall(MatDestroy(&b->AIJ));
460:   PetscCall(PetscFree(b->S));
461:   PetscCall(PetscFree(b->T));
462:   PetscCall(PetscFree(b->ibdiag));
463:   PetscCall(PetscFree5(b->sor.w, b->sor.y, b->sor.work, b->sor.t, b->sor.arr));
464:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", NULL));
465:   PetscCall(PetscFree(A->data));
466:   PetscFunctionReturn(PETSC_SUCCESS);
467: }

469: static PetscErrorCode MatKAIJ_build_AIJ_OAIJ(Mat A)
470: {
471:   Mat_MPIKAIJ     *a;
472:   Mat_MPIAIJ      *mpiaij;
473:   PetscScalar     *T;
474:   PetscInt         i, j;
475:   PetscObjectState state;

477:   PetscFunctionBegin;
478:   a      = (Mat_MPIKAIJ *)A->data;
479:   mpiaij = (Mat_MPIAIJ *)a->A->data;

481:   PetscCall(PetscObjectStateGet((PetscObject)a->A, &state));
482:   if (state == a->state) {
483:     /* The existing AIJ and KAIJ members are up-to-date, so simply exit. */
484:     PetscFunctionReturn(PETSC_SUCCESS);
485:   } else {
486:     PetscCall(MatDestroy(&a->AIJ));
487:     PetscCall(MatDestroy(&a->OAIJ));
488:     if (a->isTI) {
489:       /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
490:        * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
491:        * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
492:        * to pass in. */
493:       PetscCall(PetscMalloc1(a->p * a->q, &T));
494:       for (i = 0; i < a->p; i++) {
495:         for (j = 0; j < a->q; j++) {
496:           if (i == j) T[i + j * a->p] = 1.0;
497:           else T[i + j * a->p] = 0.0;
498:         }
499:       }
500:     } else T = a->T;
501:     PetscCall(MatCreateKAIJ(mpiaij->A, a->p, a->q, a->S, T, &a->AIJ));
502:     PetscCall(MatCreateKAIJ(mpiaij->B, a->p, a->q, NULL, T, &a->OAIJ));
503:     if (a->isTI) PetscCall(PetscFree(T));
504:     a->state = state;
505:   }

507:   PetscFunctionReturn(PETSC_SUCCESS);
508: }

510: static PetscErrorCode MatSetUp_KAIJ(Mat A)
511: {
512:   PetscInt     n;
513:   PetscMPIInt  size;
514:   Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ *)A->data;

516:   PetscFunctionBegin;
517:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
518:   if (size == 1) {
519:     PetscCall(MatSetSizes(A, seqkaij->p * seqkaij->AIJ->rmap->n, seqkaij->q * seqkaij->AIJ->cmap->n, seqkaij->p * seqkaij->AIJ->rmap->N, seqkaij->q * seqkaij->AIJ->cmap->N));
520:     PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p));
521:     PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q));
522:     PetscCall(PetscLayoutSetUp(A->rmap));
523:     PetscCall(PetscLayoutSetUp(A->cmap));
524:   } else {
525:     Mat_MPIKAIJ *a;
526:     Mat_MPIAIJ  *mpiaij;
527:     IS           from, to;
528:     Vec          gvec;

530:     a      = (Mat_MPIKAIJ *)A->data;
531:     mpiaij = (Mat_MPIAIJ *)a->A->data;
532:     PetscCall(MatSetSizes(A, a->p * a->A->rmap->n, a->q * a->A->cmap->n, a->p * a->A->rmap->N, a->q * a->A->cmap->N));
533:     PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p));
534:     PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q));
535:     PetscCall(PetscLayoutSetUp(A->rmap));
536:     PetscCall(PetscLayoutSetUp(A->cmap));

538:     PetscCall(MatKAIJ_build_AIJ_OAIJ(A));

540:     PetscCall(VecGetSize(mpiaij->lvec, &n));
541:     PetscCall(VecCreate(PETSC_COMM_SELF, &a->w));
542:     PetscCall(VecSetSizes(a->w, n * a->q, n * a->q));
543:     PetscCall(VecSetBlockSize(a->w, a->q));
544:     PetscCall(VecSetType(a->w, VECSEQ));

546:     /* create two temporary Index sets for build scatter gather */
547:     PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)a->A), a->q, n, mpiaij->garray, PETSC_COPY_VALUES, &from));
548:     PetscCall(ISCreateStride(PETSC_COMM_SELF, n * a->q, 0, 1, &to));

550:     /* create temporary global vector to generate scatter context */
551:     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A), a->q, a->q * a->A->cmap->n, a->q * a->A->cmap->N, NULL, &gvec));

553:     /* generate the scatter context */
554:     PetscCall(VecScatterCreate(gvec, from, a->w, to, &a->ctx));

556:     PetscCall(ISDestroy(&from));
557:     PetscCall(ISDestroy(&to));
558:     PetscCall(VecDestroy(&gvec));
559:   }

561:   A->assembled = PETSC_TRUE;
562:   PetscFunctionReturn(PETSC_SUCCESS);
563: }

565: static PetscErrorCode MatView_KAIJ(Mat A, PetscViewer viewer)
566: {
567:   PetscViewerFormat format;
568:   Mat_SeqKAIJ      *a = (Mat_SeqKAIJ *)A->data;
569:   Mat               B;
570:   PetscInt          i;
571:   PetscBool         ismpikaij;

573:   PetscFunctionBegin;
574:   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij));
575:   PetscCall(PetscViewerGetFormat(viewer, &format));
576:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) {
577:     PetscCall(PetscViewerASCIIPrintf(viewer, "S and T have %" PetscInt_FMT " rows and %" PetscInt_FMT " columns\n", a->p, a->q));

579:     /* Print appropriate details for S. */
580:     if (!a->S) {
581:       PetscCall(PetscViewerASCIIPrintf(viewer, "S is NULL\n"));
582:     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
583:       PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of S are "));
584:       for (i = 0; i < (a->p * a->q); i++) {
585: #if defined(PETSC_USE_COMPLEX)
586:         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->S[i]), (double)PetscImaginaryPart(a->S[i])));
587: #else
588:         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->S[i])));
589: #endif
590:       }
591:       PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
592:     }

594:     /* Print appropriate details for T. */
595:     if (a->isTI) {
596:       PetscCall(PetscViewerASCIIPrintf(viewer, "T is the identity matrix\n"));
597:     } else if (!a->T) {
598:       PetscCall(PetscViewerASCIIPrintf(viewer, "T is NULL\n"));
599:     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
600:       PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of T are "));
601:       for (i = 0; i < (a->p * a->q); i++) {
602: #if defined(PETSC_USE_COMPLEX)
603:         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->T[i]), (double)PetscImaginaryPart(a->T[i])));
604: #else
605:         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->T[i])));
606: #endif
607:       }
608:       PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
609:     }

611:     /* Now print details for the AIJ matrix, using the AIJ viewer. */
612:     PetscCall(PetscViewerASCIIPrintf(viewer, "Now viewing the associated AIJ matrix:\n"));
613:     if (ismpikaij) {
614:       Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
615:       PetscCall(MatView(b->A, viewer));
616:     } else {
617:       PetscCall(MatView(a->AIJ, viewer));
618:     }

620:   } else {
621:     /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */
622:     PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &B));
623:     PetscCall(MatView(B, viewer));
624:     PetscCall(MatDestroy(&B));
625:   }
626:   PetscFunctionReturn(PETSC_SUCCESS);
627: }

629: static PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
630: {
631:   Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;

633:   PetscFunctionBegin;
634:   PetscCall(MatDestroy(&b->AIJ));
635:   PetscCall(MatDestroy(&b->OAIJ));
636:   PetscCall(MatDestroy(&b->A));
637:   PetscCall(VecScatterDestroy(&b->ctx));
638:   PetscCall(VecDestroy(&b->w));
639:   PetscCall(PetscFree(b->S));
640:   PetscCall(PetscFree(b->T));
641:   PetscCall(PetscFree(b->ibdiag));
642:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", NULL));
643:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", NULL));
644:   PetscCall(PetscFree(A->data));
645:   PetscFunctionReturn(PETSC_SUCCESS);
646: }

648: /* zz = yy + Axx */
649: static PetscErrorCode MatMultAdd_SeqKAIJ(Mat A, Vec xx, Vec yy, Vec zz)
650: {
651:   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ *)A->data;
652:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
653:   const PetscScalar *s = b->S, *t = b->T;
654:   const PetscScalar *x, *v, *bx;
655:   PetscScalar       *y, *sums;
656:   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
657:   PetscInt           n, i, jrow, j, l, p = b->p, q = b->q, k;

659:   PetscFunctionBegin;
660:   if (!yy) {
661:     PetscCall(VecSet(zz, 0.0));
662:   } else {
663:     PetscCall(VecCopy(yy, zz));
664:   }
665:   if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(PETSC_SUCCESS);

667:   PetscCall(VecGetArrayRead(xx, &x));
668:   PetscCall(VecGetArray(zz, &y));
669:   idx = a->j;
670:   v   = a->a;
671:   ii  = a->i;

673:   if (b->isTI) {
674:     for (i = 0; i < m; i++) {
675:       jrow = ii[i];
676:       n    = ii[i + 1] - jrow;
677:       sums = y + p * i;
678:       for (j = 0; j < n; j++) {
679:         for (k = 0; k < p; k++) sums[k] += v[jrow + j] * x[q * idx[jrow + j] + k];
680:       }
681:     }
682:     PetscCall(PetscLogFlops(3.0 * (a->nz) * p));
683:   } else if (t) {
684:     for (i = 0; i < m; i++) {
685:       jrow = ii[i];
686:       n    = ii[i + 1] - jrow;
687:       sums = y + p * i;
688:       for (j = 0; j < n; j++) {
689:         for (k = 0; k < p; k++) {
690:           for (l = 0; l < q; l++) sums[k] += v[jrow + j] * t[k + l * p] * x[q * idx[jrow + j] + l];
691:         }
692:       }
693:     }
694:     /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do),
695:      * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T),
696:      * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter
697:      * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */
698:     PetscCall(PetscLogFlops((2.0 * p * q - p) * m + 2.0 * p * a->nz));
699:   }
700:   if (s) {
701:     for (i = 0; i < m; i++) {
702:       sums = y + p * i;
703:       bx   = x + q * i;
704:       if (i < b->AIJ->cmap->n) {
705:         for (j = 0; j < q; j++) {
706:           for (k = 0; k < p; k++) sums[k] += s[k + j * p] * bx[j];
707:         }
708:       }
709:     }
710:     PetscCall(PetscLogFlops(2.0 * m * p * q));
711:   }

713:   PetscCall(VecRestoreArrayRead(xx, &x));
714:   PetscCall(VecRestoreArray(zz, &y));
715:   PetscFunctionReturn(PETSC_SUCCESS);
716: }

718: static PetscErrorCode MatMult_SeqKAIJ(Mat A, Vec xx, Vec yy)
719: {
720:   PetscFunctionBegin;
721:   PetscCall(MatMultAdd_SeqKAIJ(A, xx, NULL, yy));
722:   PetscFunctionReturn(PETSC_SUCCESS);
723: }

725: #include <petsc/private/kernels/blockinvert.h>

727: static PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A, const PetscScalar **values)
728: {
729:   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ *)A->data;
730:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
731:   const PetscScalar *S = b->S;
732:   const PetscScalar *T = b->T;
733:   const PetscScalar *v = a->a;
734:   const PetscInt     p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
735:   PetscInt           i, j, *v_pivots, dof, dof2;
736:   PetscScalar       *diag, aval, *v_work;

738:   PetscFunctionBegin;
739:   PetscCheck(p == q, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Block size must be square to calculate inverse.");
740:   PetscCheck(S || T || b->isTI, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Cannot invert a zero matrix.");

742:   dof  = p;
743:   dof2 = dof * dof;

745:   if (b->ibdiagvalid) {
746:     if (values) *values = b->ibdiag;
747:     PetscFunctionReturn(PETSC_SUCCESS);
748:   }
749:   if (!b->ibdiag) PetscCall(PetscMalloc1(dof2 * m, &b->ibdiag));
750:   if (values) *values = b->ibdiag;
751:   diag = b->ibdiag;

753:   PetscCall(PetscMalloc2(dof, &v_work, dof, &v_pivots));
754:   for (i = 0; i < m; i++) {
755:     if (S) {
756:       PetscCall(PetscMemcpy(diag, S, dof2 * sizeof(PetscScalar)));
757:     } else {
758:       PetscCall(PetscMemzero(diag, dof2 * sizeof(PetscScalar)));
759:     }
760:     if (b->isTI) {
761:       aval = 0;
762:       for (j = ii[i]; j < ii[i + 1]; j++)
763:         if (idx[j] == i) aval = v[j];
764:       for (j = 0; j < dof; j++) diag[j + dof * j] += aval;
765:     } else if (T) {
766:       aval = 0;
767:       for (j = ii[i]; j < ii[i + 1]; j++)
768:         if (idx[j] == i) aval = v[j];
769:       for (j = 0; j < dof2; j++) diag[j] += aval * T[j];
770:     }
771:     PetscCall(PetscKernel_A_gets_inverse_A(dof, diag, v_pivots, v_work, PETSC_FALSE, NULL));
772:     diag += dof2;
773:   }
774:   PetscCall(PetscFree2(v_work, v_pivots));

776:   b->ibdiagvalid = PETSC_TRUE;
777:   PetscFunctionReturn(PETSC_SUCCESS);
778: }

780: static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A, Mat *B)
781: {
782:   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ *)A->data;

784:   PetscFunctionBegin;
785:   *B = kaij->AIJ;
786:   PetscFunctionReturn(PETSC_SUCCESS);
787: }

789: static PetscErrorCode MatConvert_KAIJ_AIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
790: {
791:   Mat_SeqKAIJ   *a = (Mat_SeqKAIJ *)A->data;
792:   Mat            AIJ, OAIJ, B;
793:   PetscInt      *d_nnz, *o_nnz = NULL, nz, i, j, m, d;
794:   const PetscInt p = a->p, q = a->q;
795:   PetscBool      ismpikaij, missing;

797:   PetscFunctionBegin;
798:   if (reuse != MAT_REUSE_MATRIX) {
799:     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij));
800:     if (ismpikaij) {
801:       Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
802:       AIJ            = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ;
803:       OAIJ           = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ;
804:     } else {
805:       AIJ  = a->AIJ;
806:       OAIJ = NULL;
807:     }
808:     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
809:     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
810:     PetscCall(MatSetType(B, MATAIJ));
811:     PetscCall(MatGetSize(AIJ, &m, NULL));
812:     PetscCall(MatMissingDiagonal(AIJ, &missing, &d)); /* assumption that all successive rows will have a missing diagonal */
813:     if (!missing || !a->S) d = m;
814:     PetscCall(PetscMalloc1(m * p, &d_nnz));
815:     for (i = 0; i < m; ++i) {
816:       PetscCall(MatGetRow_SeqAIJ(AIJ, i, &nz, NULL, NULL));
817:       for (j = 0; j < p; ++j) d_nnz[i * p + j] = nz * q + (i >= d) * q;
818:       PetscCall(MatRestoreRow_SeqAIJ(AIJ, i, &nz, NULL, NULL));
819:     }
820:     if (OAIJ) {
821:       PetscCall(PetscMalloc1(m * p, &o_nnz));
822:       for (i = 0; i < m; ++i) {
823:         PetscCall(MatGetRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL));
824:         for (j = 0; j < p; ++j) o_nnz[i * p + j] = nz * q;
825:         PetscCall(MatRestoreRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL));
826:       }
827:       PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
828:     } else {
829:       PetscCall(MatSeqAIJSetPreallocation(B, 0, d_nnz));
830:     }
831:     PetscCall(PetscFree(d_nnz));
832:     PetscCall(PetscFree(o_nnz));
833:   } else B = *newmat;
834:   PetscCall(MatConvert_Basic(A, newtype, MAT_REUSE_MATRIX, &B));
835:   if (reuse == MAT_INPLACE_MATRIX) {
836:     PetscCall(MatHeaderReplace(A, &B));
837:   } else *newmat = B;
838:   PetscFunctionReturn(PETSC_SUCCESS);
839: }

841: static PetscErrorCode MatSOR_SeqKAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
842: {
843:   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ *)A->data;
844:   Mat_SeqAIJ        *a    = (Mat_SeqAIJ *)kaij->AIJ->data;
845:   const PetscScalar *aa = a->a, *T = kaij->T, *v;
846:   const PetscInt     m = kaij->AIJ->rmap->n, *ai = a->i, *aj = a->j, p = kaij->p, q = kaij->q, *diag, *vi;
847:   const PetscScalar *b, *xb, *idiag;
848:   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
849:   PetscInt           i, j, k, i2, bs, bs2, nz;

851:   PetscFunctionBegin;
852:   its = its * lits;
853:   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
854:   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
855:   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift");
856:   PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts");
857:   PetscCheck(p == q, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSOR for KAIJ: No support for non-square dense blocks");
858:   bs  = p;
859:   bs2 = bs * bs;

861:   if (!m) PetscFunctionReturn(PETSC_SUCCESS);

863:   if (!kaij->ibdiagvalid) PetscCall(MatInvertBlockDiagonal_SeqKAIJ(A, NULL));
864:   idiag = kaij->ibdiag;
865:   diag  = a->diag;

867:   if (!kaij->sor.setup) {
868:     PetscCall(PetscMalloc5(bs, &kaij->sor.w, bs, &kaij->sor.y, m * bs, &kaij->sor.work, m * bs, &kaij->sor.t, m * bs2, &kaij->sor.arr));
869:     kaij->sor.setup = PETSC_TRUE;
870:   }
871:   y    = kaij->sor.y;
872:   w    = kaij->sor.w;
873:   work = kaij->sor.work;
874:   t    = kaij->sor.t;
875:   arr  = kaij->sor.arr;

877:   PetscCall(VecGetArray(xx, &x));
878:   PetscCall(VecGetArrayRead(bb, &b));

880:   if (flag & SOR_ZERO_INITIAL_GUESS) {
881:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
882:       PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); /* x[0:bs] <- D^{-1} b[0:bs] */
883:       PetscCall(PetscMemcpy(t, b, bs * sizeof(PetscScalar)));
884:       i2 = bs;
885:       idiag += bs2;
886:       for (i = 1; i < m; i++) {
887:         v  = aa + ai[i];
888:         vi = aj + ai[i];
889:         nz = diag[i] - ai[i];

891:         if (T) { /* b - T (Arow * x) */
892:           PetscCall(PetscMemzero(w, bs * sizeof(PetscScalar)));
893:           for (j = 0; j < nz; j++) {
894:             for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k];
895:           }
896:           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs, w, T, &t[i2]);
897:           for (k = 0; k < bs; k++) t[i2 + k] += b[i2 + k];
898:         } else if (kaij->isTI) {
899:           PetscCall(PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar)));
900:           for (j = 0; j < nz; j++) {
901:             for (k = 0; k < bs; k++) t[i2 + k] -= v[j] * x[vi[j] * bs + k];
902:           }
903:         } else {
904:           PetscCall(PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar)));
905:         }

907:         PetscKernel_w_gets_Ar_times_v(bs, bs, t + i2, idiag, y);
908:         for (j = 0; j < bs; j++) x[i2 + j] = omega * y[j];

910:         idiag += bs2;
911:         i2 += bs;
912:       }
913:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
914:       PetscCall(PetscLogFlops(1.0 * bs2 * a->nz));
915:       xb = t;
916:     } else xb = b;
917:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
918:       idiag = kaij->ibdiag + bs2 * (m - 1);
919:       i2    = bs * (m - 1);
920:       PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar)));
921:       PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
922:       i2 -= bs;
923:       idiag -= bs2;
924:       for (i = m - 2; i >= 0; i--) {
925:         v  = aa + diag[i] + 1;
926:         vi = aj + diag[i] + 1;
927:         nz = ai[i + 1] - diag[i] - 1;

929:         if (T) { /* FIXME: This branch untested */
930:           PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar)));
931:           /* copy all rows of x that are needed into contiguous space */
932:           workt = work;
933:           for (j = 0; j < nz; j++) {
934:             PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
935:             workt += bs;
936:           }
937:           arrt = arr;
938:           for (j = 0; j < nz; j++) {
939:             PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
940:             for (k = 0; k < bs2; k++) arrt[k] *= v[j];
941:             arrt += bs2;
942:           }
943:           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
944:         } else if (kaij->isTI) {
945:           PetscCall(PetscMemcpy(w, t + i2, bs * sizeof(PetscScalar)));
946:           for (j = 0; j < nz; j++) {
947:             for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k];
948:           }
949:         }

951:         PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); /* RHS incorrect for omega != 1.0 */
952:         for (j = 0; j < bs; j++) x[i2 + j] = (1.0 - omega) * x[i2 + j] + omega * y[j];

954:         idiag -= bs2;
955:         i2 -= bs;
956:       }
957:       PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
958:     }
959:     its--;
960:   }
961:   while (its--) { /* FIXME: This branch not updated */
962:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
963:       i2    = 0;
964:       idiag = kaij->ibdiag;
965:       for (i = 0; i < m; i++) {
966:         PetscCall(PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar)));

968:         v     = aa + ai[i];
969:         vi    = aj + ai[i];
970:         nz    = diag[i] - ai[i];
971:         workt = work;
972:         for (j = 0; j < nz; j++) {
973:           PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
974:           workt += bs;
975:         }
976:         arrt = arr;
977:         if (T) {
978:           for (j = 0; j < nz; j++) {
979:             PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
980:             for (k = 0; k < bs2; k++) arrt[k] *= v[j];
981:             arrt += bs2;
982:           }
983:           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
984:         } else if (kaij->isTI) {
985:           for (j = 0; j < nz; j++) {
986:             PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
987:             for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
988:             arrt += bs2;
989:           }
990:           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
991:         }
992:         PetscCall(PetscMemcpy(t + i2, w, bs * sizeof(PetscScalar)));

994:         v     = aa + diag[i] + 1;
995:         vi    = aj + diag[i] + 1;
996:         nz    = ai[i + 1] - diag[i] - 1;
997:         workt = work;
998:         for (j = 0; j < nz; j++) {
999:           PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1000:           workt += bs;
1001:         }
1002:         arrt = arr;
1003:         if (T) {
1004:           for (j = 0; j < nz; j++) {
1005:             PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1006:             for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1007:             arrt += bs2;
1008:           }
1009:           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1010:         } else if (kaij->isTI) {
1011:           for (j = 0; j < nz; j++) {
1012:             PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1013:             for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1014:             arrt += bs2;
1015:           }
1016:           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1017:         }

1019:         PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1020:         for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);

1022:         idiag += bs2;
1023:         i2 += bs;
1024:       }
1025:       xb = t;
1026:     } else xb = b;
1027:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1028:       idiag = kaij->ibdiag + bs2 * (m - 1);
1029:       i2    = bs * (m - 1);
1030:       if (xb == b) {
1031:         for (i = m - 1; i >= 0; i--) {
1032:           PetscCall(PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar)));

1034:           v     = aa + ai[i];
1035:           vi    = aj + ai[i];
1036:           nz    = diag[i] - ai[i];
1037:           workt = work;
1038:           for (j = 0; j < nz; j++) {
1039:             PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1040:             workt += bs;
1041:           }
1042:           arrt = arr;
1043:           if (T) {
1044:             for (j = 0; j < nz; j++) {
1045:               PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1046:               for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1047:               arrt += bs2;
1048:             }
1049:             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1050:           } else if (kaij->isTI) {
1051:             for (j = 0; j < nz; j++) {
1052:               PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1053:               for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1054:               arrt += bs2;
1055:             }
1056:             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1057:           }

1059:           v     = aa + diag[i] + 1;
1060:           vi    = aj + diag[i] + 1;
1061:           nz    = ai[i + 1] - diag[i] - 1;
1062:           workt = work;
1063:           for (j = 0; j < nz; j++) {
1064:             PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1065:             workt += bs;
1066:           }
1067:           arrt = arr;
1068:           if (T) {
1069:             for (j = 0; j < nz; j++) {
1070:               PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1071:               for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1072:               arrt += bs2;
1073:             }
1074:             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1075:           } else if (kaij->isTI) {
1076:             for (j = 0; j < nz; j++) {
1077:               PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1078:               for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1079:               arrt += bs2;
1080:             }
1081:             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1082:           }

1084:           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1085:           for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);
1086:         }
1087:       } else {
1088:         for (i = m - 1; i >= 0; i--) {
1089:           PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar)));
1090:           v     = aa + diag[i] + 1;
1091:           vi    = aj + diag[i] + 1;
1092:           nz    = ai[i + 1] - diag[i] - 1;
1093:           workt = work;
1094:           for (j = 0; j < nz; j++) {
1095:             PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1096:             workt += bs;
1097:           }
1098:           arrt = arr;
1099:           if (T) {
1100:             for (j = 0; j < nz; j++) {
1101:               PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1102:               for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1103:               arrt += bs2;
1104:             }
1105:             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1106:           } else if (kaij->isTI) {
1107:             for (j = 0; j < nz; j++) {
1108:               PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1109:               for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1110:               arrt += bs2;
1111:             }
1112:             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1113:           }
1114:           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1115:           for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);
1116:         }
1117:       }
1118:       PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
1119:     }
1120:   }

1122:   PetscCall(VecRestoreArray(xx, &x));
1123:   PetscCall(VecRestoreArrayRead(bb, &b));
1124:   PetscFunctionReturn(PETSC_SUCCESS);
1125: }

1127: /*===================================================================================*/

1129: static PetscErrorCode MatMultAdd_MPIKAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1130: {
1131:   Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;

1133:   PetscFunctionBegin;
1134:   if (!yy) {
1135:     PetscCall(VecSet(zz, 0.0));
1136:   } else {
1137:     PetscCall(VecCopy(yy, zz));
1138:   }
1139:   PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */
1140:   /* start the scatter */
1141:   PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
1142:   PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, zz, zz));
1143:   PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
1144:   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz));
1145:   PetscFunctionReturn(PETSC_SUCCESS);
1146: }

1148: static PetscErrorCode MatMult_MPIKAIJ(Mat A, Vec xx, Vec yy)
1149: {
1150:   PetscFunctionBegin;
1151:   PetscCall(MatMultAdd_MPIKAIJ(A, xx, NULL, yy));
1152:   PetscFunctionReturn(PETSC_SUCCESS);
1153: }

1155: static PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A, const PetscScalar **values)
1156: {
1157:   Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;

1159:   PetscFunctionBegin;
1160:   PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ is up to date. */
1161:   PetscCall((*b->AIJ->ops->invertblockdiagonal)(b->AIJ, values));
1162:   PetscFunctionReturn(PETSC_SUCCESS);
1163: }

1165: static PetscErrorCode MatGetRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values)
1166: {
1167:   Mat_SeqKAIJ *b    = (Mat_SeqKAIJ *)A->data;
1168:   PetscBool    diag = PETSC_FALSE;
1169:   PetscInt     nzaij, nz, *colsaij, *idx, i, j, p = b->p, q = b->q, r = row / p, s = row % p, c;
1170:   PetscScalar *vaij, *v, *S = b->S, *T = b->T;

1172:   PetscFunctionBegin;
1173:   PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1174:   b->getrowactive = PETSC_TRUE;
1175:   PetscCheck(row >= 0 && row < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row);

1177:   if ((!S) && (!T) && (!b->isTI)) {
1178:     if (ncols) *ncols = 0;
1179:     if (cols) *cols = NULL;
1180:     if (values) *values = NULL;
1181:     PetscFunctionReturn(PETSC_SUCCESS);
1182:   }

1184:   if (T || b->isTI) {
1185:     PetscCall(MatGetRow_SeqAIJ(b->AIJ, r, &nzaij, &colsaij, &vaij));
1186:     c = nzaij;
1187:     for (i = 0; i < nzaij; i++) {
1188:       /* check if this row contains a diagonal entry */
1189:       if (colsaij[i] == r) {
1190:         diag = PETSC_TRUE;
1191:         c    = i;
1192:       }
1193:     }
1194:   } else nzaij = c = 0;

1196:   /* calculate size of row */
1197:   nz = 0;
1198:   if (S) nz += q;
1199:   if (T || b->isTI) nz += (diag && S ? (nzaij - 1) * q : nzaij * q);

1201:   if (cols || values) {
1202:     PetscCall(PetscMalloc2(nz, &idx, nz, &v));
1203:     for (i = 0; i < q; i++) {
1204:       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1205:       v[i] = 0.0;
1206:     }
1207:     if (b->isTI) {
1208:       for (i = 0; i < nzaij; i++) {
1209:         for (j = 0; j < q; j++) {
1210:           idx[i * q + j] = colsaij[i] * q + j;
1211:           v[i * q + j]   = (j == s ? vaij[i] : 0);
1212:         }
1213:       }
1214:     } else if (T) {
1215:       for (i = 0; i < nzaij; i++) {
1216:         for (j = 0; j < q; j++) {
1217:           idx[i * q + j] = colsaij[i] * q + j;
1218:           v[i * q + j]   = vaij[i] * T[s + j * p];
1219:         }
1220:       }
1221:     }
1222:     if (S) {
1223:       for (j = 0; j < q; j++) {
1224:         idx[c * q + j] = r * q + j;
1225:         v[c * q + j] += S[s + j * p];
1226:       }
1227:     }
1228:   }

1230:   if (ncols) *ncols = nz;
1231:   if (cols) *cols = idx;
1232:   if (values) *values = v;
1233:   PetscFunctionReturn(PETSC_SUCCESS);
1234: }

1236: static PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1237: {
1238:   PetscFunctionBegin;
1239:   PetscCall(PetscFree2(*idx, *v));
1240:   ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE;
1241:   PetscFunctionReturn(PETSC_SUCCESS);
1242: }

1244: static PetscErrorCode MatGetRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values)
1245: {
1246:   Mat_MPIKAIJ   *b    = (Mat_MPIKAIJ *)A->data;
1247:   Mat            AIJ  = b->A;
1248:   PetscBool      diag = PETSC_FALSE;
1249:   Mat            MatAIJ, MatOAIJ;
1250:   const PetscInt rstart = A->rmap->rstart, rend = A->rmap->rend, p = b->p, q = b->q, *garray;
1251:   PetscInt       nz, *idx, ncolsaij = 0, ncolsoaij = 0, *colsaij, *colsoaij, r, s, c, i, j, lrow;
1252:   PetscScalar   *v, *vals, *ovals, *S = b->S, *T = b->T;

1254:   PetscFunctionBegin;
1255:   PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */
1256:   MatAIJ  = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ;
1257:   MatOAIJ = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ;
1258:   PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1259:   b->getrowactive = PETSC_TRUE;
1260:   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1261:   lrow = row - rstart;

1263:   if ((!S) && (!T) && (!b->isTI)) {
1264:     if (ncols) *ncols = 0;
1265:     if (cols) *cols = NULL;
1266:     if (values) *values = NULL;
1267:     PetscFunctionReturn(PETSC_SUCCESS);
1268:   }

1270:   r = lrow / p;
1271:   s = lrow % p;

1273:   if (T || b->isTI) {
1274:     PetscCall(MatMPIAIJGetSeqAIJ(AIJ, NULL, NULL, &garray));
1275:     PetscCall(MatGetRow_SeqAIJ(MatAIJ, lrow / p, &ncolsaij, &colsaij, &vals));
1276:     PetscCall(MatGetRow_SeqAIJ(MatOAIJ, lrow / p, &ncolsoaij, &colsoaij, &ovals));

1278:     c = ncolsaij + ncolsoaij;
1279:     for (i = 0; i < ncolsaij; i++) {
1280:       /* check if this row contains a diagonal entry */
1281:       if (colsaij[i] == r) {
1282:         diag = PETSC_TRUE;
1283:         c    = i;
1284:       }
1285:     }
1286:   } else c = 0;

1288:   /* calculate size of row */
1289:   nz = 0;
1290:   if (S) nz += q;
1291:   if (T || b->isTI) nz += (diag && S ? (ncolsaij + ncolsoaij - 1) * q : (ncolsaij + ncolsoaij) * q);

1293:   if (cols || values) {
1294:     PetscCall(PetscMalloc2(nz, &idx, nz, &v));
1295:     for (i = 0; i < q; i++) {
1296:       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1297:       v[i] = 0.0;
1298:     }
1299:     if (b->isTI) {
1300:       for (i = 0; i < ncolsaij; i++) {
1301:         for (j = 0; j < q; j++) {
1302:           idx[i * q + j] = (colsaij[i] + rstart / p) * q + j;
1303:           v[i * q + j]   = (j == s ? vals[i] : 0.0);
1304:         }
1305:       }
1306:       for (i = 0; i < ncolsoaij; i++) {
1307:         for (j = 0; j < q; j++) {
1308:           idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j;
1309:           v[(i + ncolsaij) * q + j]   = (j == s ? ovals[i] : 0.0);
1310:         }
1311:       }
1312:     } else if (T) {
1313:       for (i = 0; i < ncolsaij; i++) {
1314:         for (j = 0; j < q; j++) {
1315:           idx[i * q + j] = (colsaij[i] + rstart / p) * q + j;
1316:           v[i * q + j]   = vals[i] * T[s + j * p];
1317:         }
1318:       }
1319:       for (i = 0; i < ncolsoaij; i++) {
1320:         for (j = 0; j < q; j++) {
1321:           idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j;
1322:           v[(i + ncolsaij) * q + j]   = ovals[i] * T[s + j * p];
1323:         }
1324:       }
1325:     }
1326:     if (S) {
1327:       for (j = 0; j < q; j++) {
1328:         idx[c * q + j] = (r + rstart / p) * q + j;
1329:         v[c * q + j] += S[s + j * p];
1330:       }
1331:     }
1332:   }

1334:   if (ncols) *ncols = nz;
1335:   if (cols) *cols = idx;
1336:   if (values) *values = v;
1337:   PetscFunctionReturn(PETSC_SUCCESS);
1338: }

1340: static PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1341: {
1342:   PetscFunctionBegin;
1343:   PetscCall(PetscFree2(*idx, *v));
1344:   ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE;
1345:   PetscFunctionReturn(PETSC_SUCCESS);
1346: }

1348: static PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat)
1349: {
1350:   Mat A;

1352:   PetscFunctionBegin;
1353:   PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
1354:   PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat));
1355:   PetscCall(MatDestroy(&A));
1356:   PetscFunctionReturn(PETSC_SUCCESS);
1357: }

1359: /*@C
1360:   MatCreateKAIJ - Creates a matrix of type `MATKAIJ`.

1362:   Collective

1364:   Input Parameters:
1365: + A - the `MATAIJ` matrix
1366: . p - number of rows in `S` and `T`
1367: . q - number of columns in `S` and `T`
1368: . S - the `S` matrix (can be `NULL`), stored as a `PetscScalar` array (column-major)
1369: - T - the `T` matrix (can be `NULL`), stored as a `PetscScalar` array (column-major)

1371:   Output Parameter:
1372: . kaij - the new `MATKAIJ` matrix

1374:   Level: advanced

1376:   Notes:
1377:   The created matrix is of the following form\:
1378: .vb
1379:     [I \otimes S + A \otimes T]
1380: .ve
1381:   where
1382: .vb
1383:   S is a dense (p \times q) matrix
1384:   T is a dense (p \times q) matrix
1385:   A is a `MATAIJ`  (n \times n) matrix
1386:   I is the identity matrix
1387: .ve
1388:   The resulting matrix is (np \times nq)

1390:   `S` and `T` are always stored independently on all processes as `PetscScalar` arrays in
1391:   column-major format.

1393:   This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed.

1395:   Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix.

1397:   Developer Notes:
1398:   In the `MATMPIKAIJ` case, the internal 'AIJ' and 'OAIJ' sequential KAIJ matrices are kept up to date by tracking the object state
1399:   of the AIJ matrix 'A' that describes the blockwise action of the `MATMPIKAIJ` matrix and, if the object state has changed, lazily
1400:   rebuilding 'AIJ' and 'OAIJ' just before executing operations with the `MATMPIKAIJ` matrix. If new types of operations are added,
1401:   routines implementing those must also ensure these are rebuilt when needed (by calling the internal MatKAIJ_build_AIJ_OAIJ() routine).

1403: .seealso: [](ch_matrices), `Mat`, `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MATKAIJ`
1404: @*/
1405: PetscErrorCode MatCreateKAIJ(Mat A, PetscInt p, PetscInt q, const PetscScalar S[], const PetscScalar T[], Mat *kaij)
1406: {
1407:   PetscFunctionBegin;
1408:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), kaij));
1409:   PetscCall(MatSetType(*kaij, MATKAIJ));
1410:   PetscCall(MatKAIJSetAIJ(*kaij, A));
1411:   PetscCall(MatKAIJSetS(*kaij, p, q, S));
1412:   PetscCall(MatKAIJSetT(*kaij, p, q, T));
1413:   PetscCall(MatSetUp(*kaij));
1414:   PetscFunctionReturn(PETSC_SUCCESS);
1415: }

1417: /*MC
1418:   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form
1419:     [I \otimes S + A \otimes T],
1420:   where
1421: .vb
1422:     S is a dense (p \times q) matrix,
1423:     T is a dense (p \times q) matrix,
1424:     A is an AIJ  (n \times n) matrix,
1425:     and I is the identity matrix.
1426: .ve
1427:   The resulting matrix is (np \times nq).

1429:   S and T are always stored independently on all processes as `PetscScalar` arrays in column-major format.

1431:   Level: advanced

1433:   Note:
1434:   A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b,
1435:   where x and b are column vectors containing the row-major representations of X and B.

1437: .seealso: [](ch_matrices), `Mat`, `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MatCreateKAIJ()`
1438: M*/

1440: PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
1441: {
1442:   Mat_MPIKAIJ *b;
1443:   PetscMPIInt  size;

1445:   PetscFunctionBegin;
1446:   PetscCall(PetscNew(&b));
1447:   A->data = (void *)b;

1449:   PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps)));

1451:   b->w = NULL;
1452:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1453:   if (size == 1) {
1454:     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQKAIJ));
1455:     A->ops->destroy             = MatDestroy_SeqKAIJ;
1456:     A->ops->mult                = MatMult_SeqKAIJ;
1457:     A->ops->multadd             = MatMultAdd_SeqKAIJ;
1458:     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ;
1459:     A->ops->getrow              = MatGetRow_SeqKAIJ;
1460:     A->ops->restorerow          = MatRestoreRow_SeqKAIJ;
1461:     A->ops->sor                 = MatSOR_SeqKAIJ;
1462:     PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", MatConvert_KAIJ_AIJ));
1463:   } else {
1464:     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIKAIJ));
1465:     A->ops->destroy             = MatDestroy_MPIKAIJ;
1466:     A->ops->mult                = MatMult_MPIKAIJ;
1467:     A->ops->multadd             = MatMultAdd_MPIKAIJ;
1468:     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ;
1469:     A->ops->getrow              = MatGetRow_MPIKAIJ;
1470:     A->ops->restorerow          = MatRestoreRow_MPIKAIJ;
1471:     PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", MatGetDiagonalBlock_MPIKAIJ));
1472:     PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", MatConvert_KAIJ_AIJ));
1473:   }
1474:   A->ops->setup           = MatSetUp_KAIJ;
1475:   A->ops->view            = MatView_KAIJ;
1476:   A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1477:   PetscFunctionReturn(PETSC_SUCCESS);
1478: }