Actual source code: kaij.c


  2: /*
  3:   Defines the basic matrix operations for the KAIJ  matrix storage format.
  4:   This format is used to evaluate matrices of the form:

  6:     [I \otimes S + A \otimes T]

  8:   where
  9:     S is a dense (p \times q) matrix
 10:     T is a dense (p \times q) matrix
 11:     A is an AIJ  (n \times n) matrix
 12:     I is the identity matrix

 14:   The resulting matrix is (np \times nq)

 16:   We provide:
 17:      MatMult()
 18:      MatMultAdd()
 19:      MatInvertBlockDiagonal()
 20:   and
 21:      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)

 23:   This single directory handles both the sequential and parallel codes
 24: */

 26: #include <../src/mat/impls/kaij/kaij.h>
 27: #include <../src/mat/utils/freespace.h>
 28: #include <petsc/private/vecimpl.h>

 30: /*@C
 31:    MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix

 33:    Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel

 35:    Input Parameter:
 36: .  A - the KAIJ matrix

 38:    Output Parameter:
 39: .  B - the AIJ matrix

 41:    Level: advanced

 43:    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.

 45: .seealso: MatCreateKAIJ()
 46: @*/
 47: PetscErrorCode  MatKAIJGetAIJ(Mat A,Mat *B)
 48: {
 49:   PetscBool      ismpikaij,isseqkaij;

 51:   PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);
 52:   PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);
 53:   if (ismpikaij) {
 54:     Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;

 56:     *B = b->A;
 57:   } else if (isseqkaij) {
 58:     Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;

 60:     *B = b->AIJ;
 61:   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ");
 62:   return 0;
 63: }

 65: /*@C
 66:    MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix

 68:    Not Collective; the entire S is stored and returned independently on all processes.

 70:    Input Parameter:
 71: .  A - the KAIJ matrix

 73:    Output Parameters:
 74: +  m - the number of rows in S
 75: .  n - the number of columns in S
 76: -  S - the S matrix, in form of a scalar array in column-major format

 78:    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)

 80:    Level: advanced

 82: .seealso: MatCreateKAIJ(), MatGetBlockSizes()
 83: @*/
 84: PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S)
 85: {
 86:   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
 87:   if (m) *m = b->p;
 88:   if (n) *n = b->q;
 89:   if (S) *S = b->S;
 90:   return 0;
 91: }

 93: /*@C
 94:    MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix

 96:    Not Collective; the entire S is stored and returned independently on all processes.

 98:    Input Parameter:
 99: .  A - the KAIJ matrix

101:    Output Parameters:
102: +  m - the number of rows in S
103: .  n - the number of columns in S
104: -  S - the S matrix, in form of a scalar array in column-major format

106:    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)

108:    Level: advanced

110: .seealso: MatCreateKAIJ(), MatGetBlockSizes()
111: @*/
112: PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S)
113: {
114:   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
115:   if (m) *m = b->p;
116:   if (n) *n = b->q;
117:   if (S) *S = b->S;
118:   return 0;
119: }

121: /*@C
122:   MatKAIJRestoreS - Restore array obtained with MatKAIJGetS()

124:   Not collective

126:   Input Parameter:
127: . A - the KAIJ matrix

129:   Output Parameter:
130: . S - location of pointer to array obtained with MatKAIJGetS()

132:   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
133:   If NULL is passed, it will not attempt to zero the array pointer.

135:   Level: advanced
136: .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
137: @*/
138: PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S)
139: {
140:   if (S) *S = NULL;
141:   PetscObjectStateIncrease((PetscObject)A);
142:   return 0;
143: }

145: /*@C
146:   MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead()

148:   Not collective

150:   Input Parameter:
151: . A - the KAIJ matrix

153:   Output Parameter:
154: . S - location of pointer to array obtained with MatKAIJGetS()

156:   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
157:   If NULL is passed, it will not attempt to zero the array pointer.

159:   Level: advanced
160: .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
161: @*/
162: PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S)
163: {
164:   if (S) *S = NULL;
165:   return 0;
166: }

168: /*@C
169:    MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix

171:    Not Collective; the entire T is stored and returned independently on all processes

173:    Input Parameter:
174: .  A - the KAIJ matrix

176:    Output Parameters:
177: +  m - the number of rows in T
178: .  n - the number of columns in T
179: -  T - the T matrix, in form of a scalar array in column-major format

181:    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)

183:    Level: advanced

185: .seealso: MatCreateKAIJ(), MatGetBlockSizes()
186: @*/
187: PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T)
188: {
189:   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
190:   if (m) *m = b->p;
191:   if (n) *n = b->q;
192:   if (T) *T = b->T;
193:   return 0;
194: }

196: /*@C
197:    MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix

199:    Not Collective; the entire T is stored and returned independently on all processes

201:    Input Parameter:
202: .  A - the KAIJ matrix

204:    Output Parameters:
205: +  m - the number of rows in T
206: .  n - the number of columns in T
207: -  T - the T matrix, in form of a scalar array in column-major format

209:    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)

211:    Level: advanced

213: .seealso: MatCreateKAIJ(), MatGetBlockSizes()
214: @*/
215: PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T)
216: {
217:   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
218:   if (m) *m = b->p;
219:   if (n) *n = b->q;
220:   if (T) *T = b->T;
221:   return 0;
222: }

224: /*@C
225:   MatKAIJRestoreT - Restore array obtained with MatKAIJGetT()

227:   Not collective

229:   Input Parameter:
230: . A - the KAIJ matrix

232:   Output Parameter:
233: . T - location of pointer to array obtained with MatKAIJGetS()

235:   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
236:   If NULL is passed, it will not attempt to zero the array pointer.

238:   Level: advanced
239: .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
240: @*/
241: PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T)
242: {
243:   if (T) *T = NULL;
244:   PetscObjectStateIncrease((PetscObject)A);
245:   return 0;
246: }

248: /*@C
249:   MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead()

251:   Not collective

253:   Input Parameter:
254: . A - the KAIJ matrix

256:   Output Parameter:
257: . T - location of pointer to array obtained with MatKAIJGetS()

259:   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
260:   If NULL is passed, it will not attempt to zero the array pointer.

262:   Level: advanced
263: .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
264: @*/
265: PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T)
266: {
267:   if (T) *T = NULL;
268:   return 0;
269: }

271: /*@
272:    MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix

274:    Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel

276:    Input Parameters:
277: +  A - the KAIJ matrix
278: -  B - the AIJ matrix

280:    Notes:
281:    This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
282:    Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.

284:    Level: advanced

286: .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT()
287: @*/
288: PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B)
289: {
290:   PetscMPIInt    size;
291:   PetscBool      flg;

293:   MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
294:   if (size == 1) {
295:     PetscObjectTypeCompare((PetscObject)B,MATSEQAIJ,&flg);
297:     Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
298:     a->AIJ = B;
299:   } else {
300:     Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data;
301:     a->A = B;
302:   }
303:   PetscObjectReference((PetscObject)B);
304:   return 0;
305: }

307: /*@C
308:    MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix

310:    Logically Collective; the entire S is stored independently on all processes.

312:    Input Parameters:
313: +  A - the KAIJ matrix
314: .  p - the number of rows in S
315: .  q - the number of columns in S
316: -  S - the S matrix, in form of a scalar array in column-major format

318:    Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix.
319:    The S matrix is copied, so the user can destroy this array.

321:    Level: Advanced

323: .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ()
324: @*/
325: PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[])
326: {
327:   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;

329:   PetscFree(a->S);
330:   if (S) {
331:     PetscMalloc1(p*q,&a->S);
332:     PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));
333:   } else  a->S = NULL;

335:   a->p = p;
336:   a->q = q;
337:   return 0;
338: }

340: /*@C
341:    MatKAIJGetScaledIdentity - Check if both S and T are scaled identities.

343:    Logically Collective.

345:    Input Parameter:
346: .  A - the KAIJ matrix

348:   Output Parameter:
349: .  identity - the Boolean value

351:    Level: Advanced

353: .seealso: MatKAIJGetS(), MatKAIJGetT()
354: @*/
355: PetscErrorCode MatKAIJGetScaledIdentity(Mat A,PetscBool* identity)
356: {
357:   Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
358:   PetscInt    i,j;

360:   if (a->p != a->q) {
361:     *identity = PETSC_FALSE;
362:     return 0;
363:   } else *identity = PETSC_TRUE;
364:   if (!a->isTI || a->S) {
365:     for (i=0; i<a->p && *identity; i++) {
366:       for (j=0; j<a->p && *identity; j++) {
367:         if (i != j) {
368:           if (a->S && PetscAbsScalar(a->S[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
369:           if (a->T && PetscAbsScalar(a->T[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
370:         } else {
371:           if (a->S && PetscAbsScalar(a->S[i*(a->p+1)]-a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
372:           if (a->T && PetscAbsScalar(a->T[i*(a->p+1)]-a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
373:         }
374:       }
375:     }
376:   }
377:   return 0;
378: }

380: /*@C
381:    MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix

383:    Logically Collective; the entire T is stored independently on all processes.

385:    Input Parameters:
386: +  A - the KAIJ matrix
387: .  p - the number of rows in S
388: .  q - the number of columns in S
389: -  T - the T matrix, in form of a scalar array in column-major format

391:    Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix.
392:    The T matrix is copied, so the user can destroy this array.

394:    Level: Advanced

396: .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ()
397: @*/
398: PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[])
399: {
400:   PetscInt       i,j;
401:   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
402:   PetscBool      isTI = PETSC_FALSE;

404:   /* check if T is an identity matrix */
405:   if (T && (p == q)) {
406:     isTI = PETSC_TRUE;
407:     for (i=0; i<p; i++) {
408:       for (j=0; j<q; j++) {
409:         if (i == j) {
410:           /* diagonal term must be 1 */
411:           if (T[i+j*p] != 1.0) isTI = PETSC_FALSE;
412:         } else {
413:           /* off-diagonal term must be 0 */
414:           if (T[i+j*p] != 0.0) isTI = PETSC_FALSE;
415:         }
416:       }
417:     }
418:   }
419:   a->isTI = isTI;

421:   PetscFree(a->T);
422:   if (T && (!isTI)) {
423:     PetscMalloc1(p*q,&a->T);
424:     PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));
425:   } else a->T = NULL;

427:   a->p = p;
428:   a->q = q;
429:   return 0;
430: }

432: PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
433: {
434:   Mat_SeqKAIJ    *b = (Mat_SeqKAIJ*)A->data;

436:   MatDestroy(&b->AIJ);
437:   PetscFree(b->S);
438:   PetscFree(b->T);
439:   PetscFree(b->ibdiag);
440:   PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);
441:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqkaij_seqaij_C",NULL);
442:   PetscFree(A->data);
443:   return 0;
444: }

446: PETSC_INTERN PetscErrorCode MatKAIJ_build_AIJ_OAIJ(Mat A)
447: {
448:   Mat_MPIKAIJ      *a;
449:   Mat_MPIAIJ       *mpiaij;
450:   PetscScalar      *T;
451:   PetscInt         i,j;
452:   PetscObjectState state;

454:   a = (Mat_MPIKAIJ*)A->data;
455:   mpiaij = (Mat_MPIAIJ*)a->A->data;

457:   PetscObjectStateGet((PetscObject)a->A,&state);
458:   if (state == a->state) {
459:     /* The existing AIJ and KAIJ members are up-to-date, so simply exit. */
460:     return 0;
461:   } else {
462:     MatDestroy(&a->AIJ);
463:     MatDestroy(&a->OAIJ);
464:     if (a->isTI) {
465:       /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
466:        * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
467:        * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
468:        * to pass in. */
469:       PetscMalloc1(a->p*a->q,&T);
470:       for (i=0; i<a->p; i++) {
471:         for (j=0; j<a->q; j++) {
472:           if (i==j) T[i+j*a->p] = 1.0;
473:           else      T[i+j*a->p] = 0.0;
474:         }
475:       }
476:     } else T = a->T;
477:     MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);
478:     MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);
479:     if (a->isTI) {
480:       PetscFree(T);
481:     }
482:     a->state = state;
483:   }

485:   return 0;
486: }

488: PetscErrorCode MatSetUp_KAIJ(Mat A)
489: {
490:   PetscInt       n;
491:   PetscMPIInt    size;
492:   Mat_SeqKAIJ    *seqkaij = (Mat_SeqKAIJ*)A->data;

494:   MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
495:   if (size == 1) {
496:     MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);
497:     PetscLayoutSetBlockSize(A->rmap,seqkaij->p);
498:     PetscLayoutSetBlockSize(A->cmap,seqkaij->q);
499:     PetscLayoutSetUp(A->rmap);
500:     PetscLayoutSetUp(A->cmap);
501:   } else {
502:     Mat_MPIKAIJ *a;
503:     Mat_MPIAIJ  *mpiaij;
504:     IS          from,to;
505:     Vec         gvec;

507:     a = (Mat_MPIKAIJ*)A->data;
508:     mpiaij = (Mat_MPIAIJ*)a->A->data;
509:     MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);
510:     PetscLayoutSetBlockSize(A->rmap,seqkaij->p);
511:     PetscLayoutSetBlockSize(A->cmap,seqkaij->q);
512:     PetscLayoutSetUp(A->rmap);
513:     PetscLayoutSetUp(A->cmap);

515:     MatKAIJ_build_AIJ_OAIJ(A);

517:     VecGetSize(mpiaij->lvec,&n);
518:     VecCreate(PETSC_COMM_SELF,&a->w);
519:     VecSetSizes(a->w,n*a->q,n*a->q);
520:     VecSetBlockSize(a->w,a->q);
521:     VecSetType(a->w,VECSEQ);

523:     /* create two temporary Index sets for build scatter gather */
524:     ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);
525:     ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);

527:     /* create temporary global vector to generate scatter context */
528:     VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);

530:     /* generate the scatter context */
531:     VecScatterCreate(gvec,from,a->w,to,&a->ctx);

533:     ISDestroy(&from);
534:     ISDestroy(&to);
535:     VecDestroy(&gvec);
536:   }

538:   A->assembled = PETSC_TRUE;
539:   return 0;
540: }

542: PetscErrorCode MatView_KAIJ(Mat A,PetscViewer viewer)
543: {
544:   PetscViewerFormat format;
545:   Mat_SeqKAIJ       *a = (Mat_SeqKAIJ*)A->data;
546:   Mat               B;
547:   PetscInt          i;
548:   PetscBool         ismpikaij;

550:   PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);
551:   PetscViewerGetFormat(viewer,&format);
552:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) {
553:     PetscViewerASCIIPrintf(viewer,"S and T have %" PetscInt_FMT " rows and %" PetscInt_FMT " columns\n",a->p,a->q);

555:     /* Print appropriate details for S. */
556:     if (!a->S) {
557:       PetscViewerASCIIPrintf(viewer,"S is NULL\n");
558:     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
559:       PetscViewerASCIIPrintf(viewer,"Entries of S are ");
560:       for (i=0; i<(a->p * a->q); i++) {
561: #if defined(PETSC_USE_COMPLEX)
562:         PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->S[i]),(double)PetscImaginaryPart(a->S[i]));
563: #else
564:         PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->S[i]));
565: #endif
566:       }
567:       PetscViewerASCIIPrintf(viewer,"\n");
568:     }

570:     /* Print appropriate details for T. */
571:     if (a->isTI) {
572:       PetscViewerASCIIPrintf(viewer,"T is the identity matrix\n");
573:     } else if (!a->T) {
574:       PetscViewerASCIIPrintf(viewer,"T is NULL\n");
575:     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
576:       PetscViewerASCIIPrintf(viewer,"Entries of T are ");
577:       for (i=0; i<(a->p * a->q); i++) {
578: #if defined(PETSC_USE_COMPLEX)
579:         PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->T[i]),(double)PetscImaginaryPart(a->T[i]));
580: #else
581:         PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->T[i]));
582: #endif
583:       }
584:       PetscViewerASCIIPrintf(viewer,"\n");
585:     }

587:     /* Now print details for the AIJ matrix, using the AIJ viewer. */
588:     PetscViewerASCIIPrintf(viewer,"Now viewing the associated AIJ matrix:\n");
589:     if (ismpikaij) {
590:       Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
591:       MatView(b->A,viewer);
592:     } else {
593:       MatView(a->AIJ,viewer);
594:     }

596:   } else {
597:     /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */
598:     MatConvert(A,MATAIJ,MAT_INITIAL_MATRIX,&B);
599:     MatView(B,viewer);
600:     MatDestroy(&B);
601:   }
602:   return 0;
603: }

605: PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
606: {
607:   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;

609:   MatDestroy(&b->AIJ);
610:   MatDestroy(&b->OAIJ);
611:   MatDestroy(&b->A);
612:   VecScatterDestroy(&b->ctx);
613:   VecDestroy(&b->w);
614:   PetscFree(b->S);
615:   PetscFree(b->T);
616:   PetscFree(b->ibdiag);
617:   PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",NULL);
618:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpikaij_mpiaij_C",NULL);
619:   PetscFree(A->data);
620:   return 0;
621: }

623: /* --------------------------------------------------------------------------------------*/

625: /* zz = yy + Axx */
626: PetscErrorCode MatMultAdd_SeqKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
627: {
628:   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ*)A->data;
629:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
630:   const PetscScalar *s = b->S, *t = b->T;
631:   const PetscScalar *x,*v,*bx;
632:   PetscScalar       *y,*sums;
633:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
634:   PetscInt          n,i,jrow,j,l,p=b->p,q=b->q,k;

636:   if (!yy) {
637:     VecSet(zz,0.0);
638:   } else {
639:     VecCopy(yy,zz);
640:   }
641:   if ((!s) && (!t) && (!b->isTI)) return 0;

643:   VecGetArrayRead(xx,&x);
644:   VecGetArray(zz,&y);
645:   idx  = a->j;
646:   v    = a->a;
647:   ii   = a->i;

649:   if (b->isTI) {
650:     for (i=0; i<m; i++) {
651:       jrow = ii[i];
652:       n    = ii[i+1] - jrow;
653:       sums = y + p*i;
654:       for (j=0; j<n; j++) {
655:         for (k=0; k<p; k++) {
656:           sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k];
657:         }
658:       }
659:     }
660:     PetscLogFlops(3.0*(a->nz)*p);
661:   } else if (t) {
662:     for (i=0; i<m; i++) {
663:       jrow = ii[i];
664:       n    = ii[i+1] - jrow;
665:       sums = y + p*i;
666:       for (j=0; j<n; j++) {
667:         for (k=0; k<p; k++) {
668:           for (l=0; l<q; l++) {
669:             sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l];
670:           }
671:         }
672:       }
673:     }
674:     /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do),
675:      * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T),
676:      * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter
677:      * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */
678:     PetscLogFlops((2.0*p*q-p)*m+2.0*p*a->nz);
679:   }
680:   if (s) {
681:     for (i=0; i<m; i++) {
682:       sums = y + p*i;
683:       bx   = x + q*i;
684:       if (i < b->AIJ->cmap->n) {
685:         for (j=0; j<q; j++) {
686:           for (k=0; k<p; k++) {
687:             sums[k] += s[k+j*p]*bx[j];
688:           }
689:         }
690:       }
691:     }
692:     PetscLogFlops(2.0*m*p*q);
693:   }

695:   VecRestoreArrayRead(xx,&x);
696:   VecRestoreArray(zz,&y);
697:   return 0;
698: }

700: PetscErrorCode MatMult_SeqKAIJ(Mat A,Vec xx,Vec yy)
701: {
702:   MatMultAdd_SeqKAIJ(A,xx,PETSC_NULL,yy);
703:   return 0;
704: }

706: #include <petsc/private/kernels/blockinvert.h>

708: PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A,const PetscScalar **values)
709: {
710:   Mat_SeqKAIJ       *b  = (Mat_SeqKAIJ*)A->data;
711:   Mat_SeqAIJ        *a  = (Mat_SeqAIJ*)b->AIJ->data;
712:   const PetscScalar *S  = b->S;
713:   const PetscScalar *T  = b->T;
714:   const PetscScalar *v  = a->a;
715:   const PetscInt     p  = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
716:   PetscInt          i,j,*v_pivots,dof,dof2;
717:   PetscScalar       *diag,aval,*v_work;


722:   dof  = p;
723:   dof2 = dof*dof;

725:   if (b->ibdiagvalid) {
726:     if (values) *values = b->ibdiag;
727:     return 0;
728:   }
729:   if (!b->ibdiag) {
730:     PetscMalloc1(dof2*m,&b->ibdiag);
731:     PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));
732:   }
733:   if (values) *values = b->ibdiag;
734:   diag = b->ibdiag;

736:   PetscMalloc2(dof,&v_work,dof,&v_pivots);
737:   for (i=0; i<m; i++) {
738:     if (S) {
739:       PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));
740:     } else {
741:       PetscMemzero(diag,dof2*sizeof(PetscScalar));
742:     }
743:     if (b->isTI) {
744:       aval = 0;
745:       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
746:       for (j=0; j<dof; j++) diag[j+dof*j] += aval;
747:     } else if (T) {
748:       aval = 0;
749:       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
750:       for (j=0; j<dof2; j++) diag[j] += aval*T[j];
751:     }
752:     PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);
753:     diag += dof2;
754:   }
755:   PetscFree2(v_work,v_pivots);

757:   b->ibdiagvalid = PETSC_TRUE;
758:   return 0;
759: }

761: static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B)
762: {
763:   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data;

765:   *B = kaij->AIJ;
766:   return 0;
767: }

769: static PetscErrorCode MatConvert_KAIJ_AIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
770: {
771:   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
772:   Mat            AIJ,OAIJ,B;
773:   PetscInt       *d_nnz,*o_nnz = NULL,nz,i,j,m,d;
774:   const PetscInt p = a->p,q = a->q;
775:   PetscBool      ismpikaij,missing;

777:   if (reuse != MAT_REUSE_MATRIX) {
778:     PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);
779:     if (ismpikaij) {
780:       Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
781:       AIJ = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
782:       OAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
783:     } else {
784:       AIJ = a->AIJ;
785:       OAIJ = NULL;
786:     }
787:     MatCreate(PetscObjectComm((PetscObject)A),&B);
788:     MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
789:     MatSetType(B,MATAIJ);
790:     MatGetSize(AIJ,&m,NULL);
791:     MatMissingDiagonal(AIJ,&missing,&d); /* assumption that all successive rows will have a missing diagonal */
792:     if (!missing || !a->S) d = m;
793:     PetscMalloc1(m*p,&d_nnz);
794:     for (i = 0; i < m; ++i) {
795:       MatGetRow_SeqAIJ(AIJ,i,&nz,NULL,NULL);
796:       for (j = 0; j < p; ++j) d_nnz[i*p + j] = nz*q + (i >= d)*q;
797:       MatRestoreRow_SeqAIJ(AIJ,i,&nz,NULL,NULL);
798:     }
799:     if (OAIJ) {
800:       PetscMalloc1(m*p,&o_nnz);
801:       for (i = 0; i < m; ++i) {
802:         MatGetRow_SeqAIJ(OAIJ,i,&nz,NULL,NULL);
803:         for (j = 0; j < p; ++j) o_nnz[i*p + j] = nz*q;
804:         MatRestoreRow_SeqAIJ(OAIJ,i,&nz,NULL,NULL);
805:       }
806:       MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);
807:     } else {
808:       MatSeqAIJSetPreallocation(B,0,d_nnz);
809:     }
810:     PetscFree(d_nnz);
811:     PetscFree(o_nnz);
812:   } else B = *newmat;
813:   MatConvert_Basic(A,newtype,MAT_REUSE_MATRIX,&B);
814:   if (reuse == MAT_INPLACE_MATRIX) {
815:     MatHeaderReplace(A,&B);
816:   } else *newmat = B;
817:   return 0;
818: }

820: PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
821: {
822:   PetscErrorCode    ierr;
823:   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ*) A->data;
824:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)kaij->AIJ->data;
825:   const PetscScalar *aa = a->a, *T = kaij->T, *v;
826:   const PetscInt    m  = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi;
827:   const PetscScalar *b, *xb, *idiag;
828:   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
829:   PetscInt          i, j, k, i2, bs, bs2, nz;

831:   its = its*lits;
837:   else        {bs = p; bs2 = bs*bs; }

839:   if (!m) return 0;

841:   if (!kaij->ibdiagvalid) MatInvertBlockDiagonal_SeqKAIJ(A,NULL);
842:   idiag = kaij->ibdiag;
843:   diag  = a->diag;

845:   if (!kaij->sor.setup) {
846:     PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);
847:     kaij->sor.setup = PETSC_TRUE;
848:   }
849:   y     = kaij->sor.y;
850:   w     = kaij->sor.w;
851:   work  = kaij->sor.work;
852:   t     = kaij->sor.t;
853:   arr   = kaij->sor.arr;

855:   VecGetArray(xx,&x);    ierr;
856:   VecGetArrayRead(bb,&b);

858:   if (flag & SOR_ZERO_INITIAL_GUESS) {
859:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
860:       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);                            /* x[0:bs] <- D^{-1} b[0:bs] */
861:       PetscMemcpy(t,b,bs*sizeof(PetscScalar));
862:       i2     =  bs;
863:       idiag  += bs2;
864:       for (i=1; i<m; i++) {
865:         v  = aa + ai[i];
866:         vi = aj + ai[i];
867:         nz = diag[i] - ai[i];

869:         if (T) {                /* b - T (Arow * x) */
870:           PetscMemzero(w,bs*sizeof(PetscScalar));
871:           for (j=0; j<nz; j++) {
872:             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
873:           }
874:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]);
875:           for (k=0; k<bs; k++) t[i2+k] += b[i2+k];
876:         } else if (kaij->isTI) {
877:           PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));
878:           for (j=0; j<nz; j++) {
879:             for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k];
880:           }
881:         } else {
882:           PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));
883:         }

885:         PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y);
886:         for (j=0; j<bs; j++) x[i2+j] = omega * y[j];

888:         idiag += bs2;
889:         i2    += bs;
890:       }
891:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
892:       PetscLogFlops(1.0*bs2*a->nz);
893:       xb = t;
894:     } else xb = b;
895:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
896:       idiag = kaij->ibdiag+bs2*(m-1);
897:       i2    = bs * (m-1);
898:       PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));
899:       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
900:       i2    -= bs;
901:       idiag -= bs2;
902:       for (i=m-2; i>=0; i--) {
903:         v  = aa + diag[i] + 1 ;
904:         vi = aj + diag[i] + 1;
905:         nz = ai[i+1] - diag[i] - 1;

907:         if (T) {                /* FIXME: This branch untested */
908:           PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));
909:           /* copy all rows of x that are needed into contiguous space */
910:           workt = work;
911:           for (j=0; j<nz; j++) {
912:             PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
913:             workt += bs;
914:           }
915:           arrt = arr;
916:           for (j=0; j<nz; j++) {
917:             PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));
918:             for (k=0; k<bs2; k++) arrt[k] *= v[j];
919:             arrt += bs2;
920:           }
921:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
922:         } else if (kaij->isTI) {
923:           PetscMemcpy(w,t+i2,bs*sizeof(PetscScalar));
924:           for (j=0; j<nz; j++) {
925:             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
926:           }
927:         }

929:         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */
930:         for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j];

932:         idiag -= bs2;
933:         i2    -= bs;
934:       }
935:       PetscLogFlops(1.0*bs2*(a->nz));
936:     }
937:     its--;
938:   }
939:   while (its--) {               /* FIXME: This branch not updated */
940:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
941:       i2     =  0;
942:       idiag  = kaij->ibdiag;
943:       for (i=0; i<m; i++) {
944:         PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));

946:         v  = aa + ai[i];
947:         vi = aj + ai[i];
948:         nz = diag[i] - ai[i];
949:         workt = work;
950:         for (j=0; j<nz; j++) {
951:           PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
952:           workt += bs;
953:         }
954:         arrt = arr;
955:         if (T) {
956:           for (j=0; j<nz; j++) {
957:             PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));
958:             for (k=0; k<bs2; k++) arrt[k] *= v[j];
959:             arrt += bs2;
960:           }
961:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
962:         } else if (kaij->isTI) {
963:           for (j=0; j<nz; j++) {
964:             PetscMemzero(arrt,bs2*sizeof(PetscScalar));
965:             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
966:             arrt += bs2;
967:           }
968:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
969:         }
970:         PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));

972:         v  = aa + diag[i] + 1;
973:         vi = aj + diag[i] + 1;
974:         nz = ai[i+1] - diag[i] - 1;
975:         workt = work;
976:         for (j=0; j<nz; j++) {
977:           PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
978:           workt += bs;
979:         }
980:         arrt = arr;
981:         if (T) {
982:           for (j=0; j<nz; j++) {
983:             PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));
984:             for (k=0; k<bs2; k++) arrt[k] *= v[j];
985:             arrt += bs2;
986:           }
987:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
988:         } else if (kaij->isTI) {
989:           for (j=0; j<nz; j++) {
990:             PetscMemzero(arrt,bs2*sizeof(PetscScalar));
991:             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
992:             arrt += bs2;
993:           }
994:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
995:         }

997:         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
998:         for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);

1000:         idiag += bs2;
1001:         i2    += bs;
1002:       }
1003:       xb = t;
1004:     }
1005:     else xb = b;
1006:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1007:       idiag = kaij->ibdiag+bs2*(m-1);
1008:       i2    = bs * (m-1);
1009:       if (xb == b) {
1010:         for (i=m-1; i>=0; i--) {
1011:           PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));

1013:           v  = aa + ai[i];
1014:           vi = aj + ai[i];
1015:           nz = diag[i] - ai[i];
1016:           workt = work;
1017:           for (j=0; j<nz; j++) {
1018:             PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
1019:             workt += bs;
1020:           }
1021:           arrt = arr;
1022:           if (T) {
1023:             for (j=0; j<nz; j++) {
1024:               PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));
1025:               for (k=0; k<bs2; k++) arrt[k] *= v[j];
1026:               arrt += bs2;
1027:             }
1028:             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1029:           } else if (kaij->isTI) {
1030:             for (j=0; j<nz; j++) {
1031:               PetscMemzero(arrt,bs2*sizeof(PetscScalar));
1032:               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1033:               arrt += bs2;
1034:             }
1035:             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1036:           }

1038:           v  = aa + diag[i] + 1;
1039:           vi = aj + diag[i] + 1;
1040:           nz = ai[i+1] - diag[i] - 1;
1041:           workt = work;
1042:           for (j=0; j<nz; j++) {
1043:             PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
1044:             workt += bs;
1045:           }
1046:           arrt = arr;
1047:           if (T) {
1048:             for (j=0; j<nz; j++) {
1049:               PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));
1050:               for (k=0; k<bs2; k++) arrt[k] *= v[j];
1051:               arrt += bs2;
1052:             }
1053:             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1054:           } else if (kaij->isTI) {
1055:             for (j=0; j<nz; j++) {
1056:               PetscMemzero(arrt,bs2*sizeof(PetscScalar));
1057:               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1058:               arrt += bs2;
1059:             }
1060:             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1061:           }

1063:           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
1064:           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
1065:         }
1066:       } else {
1067:         for (i=m-1; i>=0; i--) {
1068:           PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));
1069:           v  = aa + diag[i] + 1;
1070:           vi = aj + diag[i] + 1;
1071:           nz = ai[i+1] - diag[i] - 1;
1072:           workt = work;
1073:           for (j=0; j<nz; j++) {
1074:             PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
1075:             workt += bs;
1076:           }
1077:           arrt = arr;
1078:           if (T) {
1079:             for (j=0; j<nz; j++) {
1080:               PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));
1081:               for (k=0; k<bs2; k++) arrt[k] *= v[j];
1082:               arrt += bs2;
1083:             }
1084:             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1085:           } else if (kaij->isTI) {
1086:             for (j=0; j<nz; j++) {
1087:               PetscMemzero(arrt,bs2*sizeof(PetscScalar));
1088:               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1089:               arrt += bs2;
1090:             }
1091:             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1092:           }
1093:           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
1094:           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
1095:         }
1096:       }
1097:       PetscLogFlops(1.0*bs2*(a->nz));
1098:     }
1099:   }

1101:   VecRestoreArray(xx,&x);    ierr;
1102:   VecRestoreArrayRead(bb,&b);
1103:   return 0;
1104: }

1106: /*===================================================================================*/

1108: PetscErrorCode MatMultAdd_MPIKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1109: {
1110:   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;

1112:   if (!yy) {
1113:     VecSet(zz,0.0);
1114:   } else {
1115:     VecCopy(yy,zz);
1116:   }
1117:   MatKAIJ_build_AIJ_OAIJ(A); /* Ensure b->AIJ and b->OAIJ are up to date. */
1118:   /* start the scatter */
1119:   VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
1120:   (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);
1121:   VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
1122:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);
1123:   return 0;
1124: }

1126: PetscErrorCode MatMult_MPIKAIJ(Mat A,Vec xx,Vec yy)
1127: {
1128:   MatMultAdd_MPIKAIJ(A,xx,PETSC_NULL,yy);
1129:   return 0;
1130: }

1132: PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A,const PetscScalar **values)
1133: {
1134:   Mat_MPIKAIJ     *b = (Mat_MPIKAIJ*)A->data;

1136:   MatKAIJ_build_AIJ_OAIJ(A); /* Ensure b->AIJ is up to date. */
1137:   (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);
1138:   return 0;
1139: }

1141: /* ----------------------------------------------------------------*/

1143: PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1144: {
1145:   Mat_SeqKAIJ     *b   = (Mat_SeqKAIJ*) A->data;
1146:   PetscErrorCode  diag = PETSC_FALSE;
1147:   PetscInt        nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c;
1148:   PetscScalar     *vaij,*v,*S=b->S,*T=b->T;

1151:   b->getrowactive = PETSC_TRUE;

1154:   if ((!S) && (!T) && (!b->isTI)) {
1155:     if (ncols)    *ncols  = 0;
1156:     if (cols)     *cols   = NULL;
1157:     if (values)   *values = NULL;
1158:     return 0;
1159:   }

1161:   if (T || b->isTI) {
1162:     MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);
1163:     c     = nzaij;
1164:     for (i=0; i<nzaij; i++) {
1165:       /* check if this row contains a diagonal entry */
1166:       if (colsaij[i] == r) {
1167:         diag = PETSC_TRUE;
1168:         c = i;
1169:       }
1170:     }
1171:   } else nzaij = c = 0;

1173:   /* calculate size of row */
1174:   nz = 0;
1175:   if (S)            nz += q;
1176:   if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q);

1178:   if (cols || values) {
1179:     PetscMalloc2(nz,&idx,nz,&v);
1180:     for (i=0; i<q; i++) {
1181:       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1182:       v[i] = 0.0;
1183:     }
1184:     if (b->isTI) {
1185:       for (i=0; i<nzaij; i++) {
1186:         for (j=0; j<q; j++) {
1187:           idx[i*q+j] = colsaij[i]*q+j;
1188:           v[i*q+j]   = (j==s ? vaij[i] : 0);
1189:         }
1190:       }
1191:     } else if (T) {
1192:       for (i=0; i<nzaij; i++) {
1193:         for (j=0; j<q; j++) {
1194:           idx[i*q+j] = colsaij[i]*q+j;
1195:           v[i*q+j]   = vaij[i]*T[s+j*p];
1196:         }
1197:       }
1198:     }
1199:     if (S) {
1200:       for (j=0; j<q; j++) {
1201:         idx[c*q+j] = r*q+j;
1202:         v[c*q+j]  += S[s+j*p];
1203:       }
1204:     }
1205:   }

1207:   if (ncols)    *ncols  = nz;
1208:   if (cols)     *cols   = idx;
1209:   if (values)   *values = v;
1210:   return 0;
1211: }

1213: PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1214: {
1215:   if (nz) *nz = 0;
1216:   PetscFree2(*idx,*v);
1217:   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1218:   return 0;
1219: }

1221: PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1222: {
1223:   Mat_MPIKAIJ     *b      = (Mat_MPIKAIJ*) A->data;
1224:   Mat             AIJ     = b->A;
1225:   PetscBool       diag    = PETSC_FALSE;
1226:   Mat             MatAIJ,MatOAIJ;
1227:   const PetscInt  rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray;
1228:   PetscInt        nz,*idx,ncolsaij = 0,ncolsoaij = 0,*colsaij,*colsoaij,r,s,c,i,j,lrow;
1229:   PetscScalar     *v,*vals,*ovals,*S=b->S,*T=b->T;

1231:   MatKAIJ_build_AIJ_OAIJ(A); /* Ensure b->AIJ and b->OAIJ are up to date. */
1232:   MatAIJ  = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
1233:   MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
1235:   b->getrowactive = PETSC_TRUE;
1237:   lrow = row - rstart;

1239:   if ((!S) && (!T) && (!b->isTI)) {
1240:     if (ncols)    *ncols  = 0;
1241:     if (cols)     *cols   = NULL;
1242:     if (values)   *values = NULL;
1243:     return 0;
1244:   }

1246:   r = lrow/p;
1247:   s = lrow%p;

1249:   if (T || b->isTI) {
1250:     MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray);
1251:     MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);
1252:     MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);

1254:     c     = ncolsaij + ncolsoaij;
1255:     for (i=0; i<ncolsaij; i++) {
1256:       /* check if this row contains a diagonal entry */
1257:       if (colsaij[i] == r) {
1258:         diag = PETSC_TRUE;
1259:         c = i;
1260:       }
1261:     }
1262:   } else c = 0;

1264:   /* calculate size of row */
1265:   nz = 0;
1266:   if (S)            nz += q;
1267:   if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q);

1269:   if (cols || values) {
1270:     PetscMalloc2(nz,&idx,nz,&v);
1271:     for (i=0; i<q; i++) {
1272:       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1273:       v[i] = 0.0;
1274:     }
1275:     if (b->isTI) {
1276:       for (i=0; i<ncolsaij; i++) {
1277:         for (j=0; j<q; j++) {
1278:           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1279:           v[i*q+j]   = (j==s ? vals[i] : 0.0);
1280:         }
1281:       }
1282:       for (i=0; i<ncolsoaij; i++) {
1283:         for (j=0; j<q; j++) {
1284:           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1285:           v[(i+ncolsaij)*q+j]   = (j==s ? ovals[i]: 0.0);
1286:         }
1287:       }
1288:     } else if (T) {
1289:       for (i=0; i<ncolsaij; i++) {
1290:         for (j=0; j<q; j++) {
1291:           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1292:           v[i*q+j]   = vals[i]*T[s+j*p];
1293:         }
1294:       }
1295:       for (i=0; i<ncolsoaij; i++) {
1296:         for (j=0; j<q; j++) {
1297:           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1298:           v[(i+ncolsaij)*q+j]   = ovals[i]*T[s+j*p];
1299:         }
1300:       }
1301:     }
1302:     if (S) {
1303:       for (j=0; j<q; j++) {
1304:         idx[c*q+j] = (r+rstart/p)*q+j;
1305:         v[c*q+j]  += S[s+j*p];
1306:       }
1307:     }
1308:   }

1310:   if (ncols)  *ncols  = nz;
1311:   if (cols)   *cols   = idx;
1312:   if (values) *values = v;
1313:   return 0;
1314: }

1316: PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1317: {
1318:   PetscFree2(*idx,*v);
1319:   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1320:   return 0;
1321: }

1323: PetscErrorCode  MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
1324: {
1325:   Mat            A;

1327:   MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);
1328:   MatCreateSubMatrix(A,isrow,iscol,cll,newmat);
1329:   MatDestroy(&A);
1330:   return 0;
1331: }

1333: /* ---------------------------------------------------------------------------------- */
1334: /*@C
1335:   MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form:

1337:     [I \otimes S + A \otimes T]

1339:   where
1340:     S is a dense (p \times q) matrix
1341:     T is a dense (p \times q) matrix
1342:     A is an AIJ  (n \times n) matrix
1343:     I is the identity matrix
1344:   The resulting matrix is (np \times nq)

1346:   S and T are always stored independently on all processes as PetscScalar arrays in column-major format.

1348:   Collective

1350:   Input Parameters:
1351: + A - the AIJ matrix
1352: . p - number of rows in S and T
1353: . q - number of columns in S and T
1354: . S - the S matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
1355: - T - the T matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)

1357:   Output Parameter:
1358: . kaij - the new KAIJ matrix

1360:   Notes:
1361:   This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
1362:   Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.

1364:   Developer Notes:
1365:   In the MATMPIKAIJ case, the internal 'AIJ' and 'OAIJ' sequential KAIJ matrices are kept up to date by tracking the object state
1366:   of the AIJ matrix 'A' that describes the blockwise action of the MATMPIKAIJ matrix and, if the object state has changed, lazily
1367:   rebuilding 'AIJ' and 'OAIJ' just before executing operations with the MATMPIKAIJ matrix. If new types of operations are added,
1368:   routines implementing those must also ensure these are rebuilt when needed (by calling the internal MatKAIJ_build_AIJ_OAIJ() routine).

1370:   Level: advanced

1372: .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ
1373: @*/
1374: PetscErrorCode  MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij)
1375: {
1376:   MatCreate(PetscObjectComm((PetscObject)A),kaij);
1377:   MatSetType(*kaij,MATKAIJ);
1378:   MatKAIJSetAIJ(*kaij,A);
1379:   MatKAIJSetS(*kaij,p,q,S);
1380:   MatKAIJSetT(*kaij,p,q,T);
1381:   MatSetUp(*kaij);
1382:   return 0;
1383: }

1385: /*MC
1386:   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form
1387:     [I \otimes S + A \otimes T],
1388:   where
1389:     S is a dense (p \times q) matrix,
1390:     T is a dense (p \times q) matrix,
1391:     A is an AIJ  (n \times n) matrix,
1392:     and I is the identity matrix.
1393:   The resulting matrix is (np \times nq).

1395:   S and T are always stored independently on all processes as PetscScalar arrays in column-major format.

1397:   Notes:
1398:   A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b,
1399:   where x and b are column vectors containing the row-major representations of X and B.

1401:   Level: advanced

1403: .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ()
1404: M*/

1406: PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
1407: {
1408:   Mat_MPIKAIJ    *b;
1409:   PetscMPIInt    size;

1411:   PetscNewLog(A,&b);
1412:   A->data  = (void*)b;

1414:   PetscMemzero(A->ops,sizeof(struct _MatOps));

1416:   b->w    = NULL;
1417:   MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
1418:   if (size == 1) {
1419:     PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);
1420:     A->ops->destroy             = MatDestroy_SeqKAIJ;
1421:     A->ops->mult                = MatMult_SeqKAIJ;
1422:     A->ops->multadd             = MatMultAdd_SeqKAIJ;
1423:     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ;
1424:     A->ops->getrow              = MatGetRow_SeqKAIJ;
1425:     A->ops->restorerow          = MatRestoreRow_SeqKAIJ;
1426:     A->ops->sor                 = MatSOR_SeqKAIJ;
1427:     PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqkaij_seqaij_C",MatConvert_KAIJ_AIJ);
1428:   } else {
1429:     PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);
1430:     A->ops->destroy             = MatDestroy_MPIKAIJ;
1431:     A->ops->mult                = MatMult_MPIKAIJ;
1432:     A->ops->multadd             = MatMultAdd_MPIKAIJ;
1433:     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ;
1434:     A->ops->getrow              = MatGetRow_MPIKAIJ;
1435:     A->ops->restorerow          = MatRestoreRow_MPIKAIJ;
1436:     PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);
1437:     PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpikaij_mpiaij_C",MatConvert_KAIJ_AIJ);
1438:   }
1439:   A->ops->setup           = MatSetUp_KAIJ;
1440:   A->ops->view            = MatView_KAIJ;
1441:   A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1442:   return 0;
1443: }