Actual source code: kaij.c


  2: /*
  3:   Defines the basic matrix operations for the KAIJ  matrix storage format.
  4:   This format is used to evaluate matrices of the form:

  6:     [I \otimes S + A \otimes T]

  8:   where
  9:     S is a dense (p \times q) matrix
 10:     T is a dense (p \times q) matrix
 11:     A is an AIJ  (n \times n) matrix
 12:     I is the identity matrix

 14:   The resulting matrix is (np \times nq)

 16:   We provide:
 17:      MatMult()
 18:      MatMultAdd()
 19:      MatInvertBlockDiagonal()
 20:   and
 21:      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)

 23:   This single directory handles both the sequential and parallel codes
 24: */

 26: #include <../src/mat/impls/kaij/kaij.h>
 27: #include <../src/mat/utils/freespace.h>
 28: #include <petsc/private/vecimpl.h>

 30: /*@C
 31:    MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix

 33:    Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel

 35:    Input Parameter:
 36: .  A - the KAIJ matrix

 38:    Output Parameter:
 39: .  B - the AIJ matrix

 41:    Level: advanced

 43:    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.

 45: .seealso: MatCreateKAIJ()
 46: @*/
 47: PetscErrorCode  MatKAIJGetAIJ(Mat A,Mat *B)
 48: {
 50:   PetscBool      ismpikaij,isseqkaij;

 53:   PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);
 54:   PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);
 55:   if (ismpikaij) {
 56:     Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;

 58:     *B = b->A;
 59:   } else if (isseqkaij) {
 60:     Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;

 62:     *B = b->AIJ;
 63:   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ");
 64:   return(0);
 65: }

 67: /*@C
 68:    MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix

 70:    Not Collective; the entire S is stored and returned independently on all processes.

 72:    Input Parameter:
 73: .  A - the KAIJ matrix

 75:    Output Parameters:
 76: +  m - the number of rows in S
 77: .  n - the number of columns in S
 78: -  S - the S matrix, in form of a scalar array in column-major format

 80:    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)

 82:    Level: advanced

 84: .seealso: MatCreateKAIJ(), MatGetBlockSizes()
 85: @*/
 86: PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S)
 87: {
 88:   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
 90:   if (m) *m = b->p;
 91:   if (n) *n = b->q;
 92:   if (S) *S = b->S;
 93:   return(0);
 94: }

 96: /*@C
 97:    MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix

 99:    Not Collective; the entire S is stored and returned independently on all processes.

101:    Input Parameter:
102: .  A - the KAIJ matrix

104:    Output Parameters:
105: +  m - the number of rows in S
106: .  n - the number of columns in S
107: -  S - the S matrix, in form of a scalar array in column-major format

109:    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)

111:    Level: advanced

113: .seealso: MatCreateKAIJ(), MatGetBlockSizes()
114: @*/
115: PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S)
116: {
117:   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
119:   if (m) *m = b->p;
120:   if (n) *n = b->q;
121:   if (S) *S = b->S;
122:   return(0);
123: }

125: /*@C
126:   MatKAIJRestoreS - Restore array obtained with MatKAIJGetS()

128:   Not collective

130:   Input Parameter:
131: . A - the KAIJ matrix

133:   Output Parameter:
134: . S - location of pointer to array obtained with MatKAIJGetS()

136:   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
137:   If NULL is passed, it will not attempt to zero the array pointer.

139:   Level: advanced
140: .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
141: @*/
142: PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S)
143: {

147:   if (S) *S = NULL;
148:   PetscObjectStateIncrease((PetscObject)A);
149:   return(0);
150: }

152: /*@C
153:   MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead()

155:   Not collective

157:   Input Parameter:
158: . A - the KAIJ matrix

160:   Output Parameter:
161: . S - location of pointer to array obtained with MatKAIJGetS()

163:   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
164:   If NULL is passed, it will not attempt to zero the array pointer.

166:   Level: advanced
167: .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
168: @*/
169: PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S)
170: {
172:   if (S) *S = NULL;
173:   return(0);
174: }

176: /*@C
177:    MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix

179:    Not Collective; the entire T is stored and returned independently on all processes

181:    Input Parameter:
182: .  A - the KAIJ matrix

184:    Output Parameters:
185: +  m - the number of rows in T
186: .  n - the number of columns in T
187: -  T - the T matrix, in form of a scalar array in column-major format

189:    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)

191:    Level: advanced

193: .seealso: MatCreateKAIJ(), MatGetBlockSizes()
194: @*/
195: PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T)
196: {
197:   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
199:   if (m) *m = b->p;
200:   if (n) *n = b->q;
201:   if (T) *T = b->T;
202:   return(0);
203: }

205: /*@C
206:    MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix

208:    Not Collective; the entire T is stored and returned independently on all processes

210:    Input Parameter:
211: .  A - the KAIJ matrix

213:    Output Parameters:
214: +  m - the number of rows in T
215: .  n - the number of columns in T
216: -  T - the T matrix, in form of a scalar array in column-major format

218:    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)

220:    Level: advanced

222: .seealso: MatCreateKAIJ(), MatGetBlockSizes()
223: @*/
224: PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T)
225: {
226:   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
228:   if (m) *m = b->p;
229:   if (n) *n = b->q;
230:   if (T) *T = b->T;
231:   return(0);
232: }

234: /*@C
235:   MatKAIJRestoreT - Restore array obtained with MatKAIJGetT()

237:   Not collective

239:   Input Parameter:
240: . A - the KAIJ matrix

242:   Output Parameter:
243: . T - location of pointer to array obtained with MatKAIJGetS()

245:   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
246:   If NULL is passed, it will not attempt to zero the array pointer.

248:   Level: advanced
249: .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
250: @*/
251: PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T)
252: {

256:   if (T) *T = NULL;
257:   PetscObjectStateIncrease((PetscObject)A);
258:   return(0);
259: }

261: /*@C
262:   MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead()

264:   Not collective

266:   Input Parameter:
267: . A - the KAIJ matrix

269:   Output Parameter:
270: . T - location of pointer to array obtained with MatKAIJGetS()

272:   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
273:   If NULL is passed, it will not attempt to zero the array pointer.

275:   Level: advanced
276: .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
277: @*/
278: PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T)
279: {
281:   if (T) *T = NULL;
282:   return(0);
283: }

285: /*@
286:    MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix

288:    Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel

290:    Input Parameters:
291: +  A - the KAIJ matrix
292: -  B - the AIJ matrix

294:    Notes:
295:    This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
296:    Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.

298:    Level: advanced

300: .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT()
301: @*/
302: PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B)
303: {
305:   PetscMPIInt    size;

308:   MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
309:   if (size == 1) {
310:     Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
311:     a->AIJ = B;
312:   } else {
313:     Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data;
314:     a->A = B;
315:   }
316:   PetscObjectReference((PetscObject)B);
317:   return(0);
318: }

320: /*@C
321:    MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix

323:    Logically Collective; the entire S is stored independently on all processes.

325:    Input Parameters:
326: +  A - the KAIJ matrix
327: .  p - the number of rows in S
328: .  q - the number of columns in S
329: -  S - the S matrix, in form of a scalar array in column-major format

331:    Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix.
332:    The S matrix is copied, so the user can destroy this array.

334:    Level: Advanced

336: .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ()
337: @*/
338: PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[])
339: {
341:   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;

344:   PetscFree(a->S);
345:   if (S) {
346:     PetscMalloc1(p*q*sizeof(PetscScalar),&a->S);
347:     PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));
348:   } else  a->S = NULL;

350:   a->p = p;
351:   a->q = q;
352:   return(0);
353: }

355: /*@C
356:    MatKAIJGetScaledIdentity - Check if both S and T are scaled identities.

358:    Logically Collective.

360:    Input Parameter:
361: .  A - the KAIJ matrix

363:   Output Parameter:
364: .  identity - the Boolean value

366:    Level: Advanced

368: .seealso: MatKAIJGetS(), MatKAIJGetT()
369: @*/
370: PetscErrorCode MatKAIJGetScaledIdentity(Mat A,PetscBool* identity)
371: {
372:   Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
373:   PetscInt    i,j;

376:   if (a->p != a->q) {
377:     *identity = PETSC_FALSE;
378:     return(0);
379:   } else *identity = PETSC_TRUE;
380:   if (!a->isTI || a->S) {
381:     for (i=0; i<a->p && *identity; i++) {
382:       for (j=0; j<a->p && *identity; j++) {
383:         if (i != j) {
384:           if (a->S && PetscAbsScalar(a->S[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
385:           if (a->T && PetscAbsScalar(a->T[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
386:         } else {
387:           if (a->S && PetscAbsScalar(a->S[i*(a->p+1)]-a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
388:           if (a->T && PetscAbsScalar(a->T[i*(a->p+1)]-a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
389:         }
390:       }
391:     }
392:   }
393:   return(0);
394: }

396: /*@C
397:    MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix

399:    Logically Collective; the entire T is stored independently on all processes.

401:    Input Parameters:
402: +  A - the KAIJ matrix
403: .  p - the number of rows in S
404: .  q - the number of columns in S
405: -  T - the T matrix, in form of a scalar array in column-major format

407:    Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix.
408:    The T matrix is copied, so the user can destroy this array.

410:    Level: Advanced

412: .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ()
413: @*/
414: PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[])
415: {
417:   PetscInt       i,j;
418:   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
419:   PetscBool      isTI = PETSC_FALSE;

422:   /* check if T is an identity matrix */
423:   if (T && (p == q)) {
424:     isTI = PETSC_TRUE;
425:     for (i=0; i<p; i++) {
426:       for (j=0; j<q; j++) {
427:         if (i == j) {
428:           /* diagonal term must be 1 */
429:           if (T[i+j*p] != 1.0) isTI = PETSC_FALSE;
430:         } else {
431:           /* off-diagonal term must be 0 */
432:           if (T[i+j*p] != 0.0) isTI = PETSC_FALSE;
433:         }
434:       }
435:     }
436:   }
437:   a->isTI = isTI;

439:   PetscFree(a->T);
440:   if (T && (!isTI)) {
441:     PetscMalloc1(p*q*sizeof(PetscScalar),&a->T);
442:     PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));
443:   } else a->T = NULL;

445:   a->p = p;
446:   a->q = q;
447:   return(0);
448: }

450: PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
451: {
453:   Mat_SeqKAIJ    *b = (Mat_SeqKAIJ*)A->data;

456:   MatDestroy(&b->AIJ);
457:   PetscFree(b->S);
458:   PetscFree(b->T);
459:   PetscFree(b->ibdiag);
460:   PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);
461:   PetscFree(A->data);
462:   return(0);
463: }

465: PETSC_INTERN PetscErrorCode MatKAIJ_build_AIJ_OAIJ(Mat A)
466: {
467:   PetscErrorCode   ierr;
468:   Mat_MPIKAIJ      *a;
469:   Mat_MPIAIJ       *mpiaij;
470:   PetscScalar      *T;
471:   PetscInt         i,j;
472:   PetscObjectState state;

475:   a = (Mat_MPIKAIJ*)A->data;
476:   mpiaij = (Mat_MPIAIJ*)a->A->data;

478:   PetscObjectStateGet((PetscObject)a->A,&state);
479:   if (state == a->state) {
480:     /* The existing AIJ and KAIJ members are up-to-date, so simply exit. */
481:     return(0);
482:   } else {
483:     MatDestroy(&a->AIJ);
484:     MatDestroy(&a->OAIJ);
485:     if (a->isTI) {
486:       /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
487:        * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
488:        * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
489:        * to pass in. */
490:       PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);
491:       for (i=0; i<a->p; i++) {
492:         for (j=0; j<a->q; j++) {
493:           if (i==j) T[i+j*a->p] = 1.0;
494:           else      T[i+j*a->p] = 0.0;
495:         }
496:       }
497:     } else T = a->T;
498:     MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);
499:     MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);
500:     if (a->isTI) {
501:       PetscFree(T);
502:     }
503:     a->state = state;
504:   }

506:   return(0);
507: }

509: PetscErrorCode MatSetUp_KAIJ(Mat A)
510: {
512:   PetscInt       n;
513:   PetscMPIInt    size;
514:   Mat_SeqKAIJ    *seqkaij = (Mat_SeqKAIJ*)A->data;

517:   MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
518:   if (size == 1) {
519:     MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);
520:     PetscLayoutSetBlockSize(A->rmap,seqkaij->p);
521:     PetscLayoutSetBlockSize(A->cmap,seqkaij->q);
522:     PetscLayoutSetUp(A->rmap);
523:     PetscLayoutSetUp(A->cmap);
524:   } else {
525:     Mat_MPIKAIJ *a;
526:     Mat_MPIAIJ  *mpiaij;
527:     IS          from,to;
528:     Vec         gvec;

530:     a = (Mat_MPIKAIJ*)A->data;
531:     mpiaij = (Mat_MPIAIJ*)a->A->data;
532:     MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);
533:     PetscLayoutSetBlockSize(A->rmap,seqkaij->p);
534:     PetscLayoutSetBlockSize(A->cmap,seqkaij->q);
535:     PetscLayoutSetUp(A->rmap);
536:     PetscLayoutSetUp(A->cmap);

538:     MatKAIJ_build_AIJ_OAIJ(A);

540:     VecGetSize(mpiaij->lvec,&n);
541:     VecCreate(PETSC_COMM_SELF,&a->w);
542:     VecSetSizes(a->w,n*a->q,n*a->q);
543:     VecSetBlockSize(a->w,a->q);
544:     VecSetType(a->w,VECSEQ);

546:     /* create two temporary Index sets for build scatter gather */
547:     ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);
548:     ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);

550:     /* create temporary global vector to generate scatter context */
551:     VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);

553:     /* generate the scatter context */
554:     VecScatterCreate(gvec,from,a->w,to,&a->ctx);

556:     ISDestroy(&from);
557:     ISDestroy(&to);
558:     VecDestroy(&gvec);
559:   }

561:   A->assembled = PETSC_TRUE;
562:   return(0);
563: }

565: PetscErrorCode MatView_KAIJ(Mat A,PetscViewer viewer)
566: {
567:   PetscViewerFormat format;
568:   Mat_SeqKAIJ       *a = (Mat_SeqKAIJ*)A->data;
569:   Mat               B;
570:   PetscInt          i;
571:   PetscErrorCode    ierr;
572:   PetscBool         ismpikaij;

575:   PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);
576:   PetscViewerGetFormat(viewer,&format);
577:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) {
578:     PetscViewerASCIIPrintf(viewer,"S and T have %D rows and %D columns\n",a->p,a->q);

580:     /* Print appropriate details for S. */
581:     if (!a->S) {
582:       PetscViewerASCIIPrintf(viewer,"S is NULL\n");
583:     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
584:       PetscViewerASCIIPrintf(viewer,"Entries of S are ");
585:       for (i=0; i<(a->p * a->q); i++) {
586: #if defined(PETSC_USE_COMPLEX)
587:         PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->S[i]),(double)PetscImaginaryPart(a->S[i]));
588: #else
589:         PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->S[i]));
590: #endif
591:       }
592:       PetscViewerASCIIPrintf(viewer,"\n");
593:     }

595:     /* Print appropriate details for T. */
596:     if (a->isTI) {
597:       PetscViewerASCIIPrintf(viewer,"T is the identity matrix\n");
598:     } else if (!a->T) {
599:       PetscViewerASCIIPrintf(viewer,"T is NULL\n");
600:     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
601:       PetscViewerASCIIPrintf(viewer,"Entries of T are ");
602:       for (i=0; i<(a->p * a->q); i++) {
603: #if defined(PETSC_USE_COMPLEX)
604:         PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->T[i]),(double)PetscImaginaryPart(a->T[i]));
605: #else
606:         PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->T[i]));
607: #endif
608:       }
609:       PetscViewerASCIIPrintf(viewer,"\n");
610:     }

612:     /* Now print details for the AIJ matrix, using the AIJ viewer. */
613:     PetscViewerASCIIPrintf(viewer,"Now viewing the associated AIJ matrix:\n");
614:     if (ismpikaij) {
615:       Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
616:       MatView(b->A,viewer);
617:     } else {
618:       MatView(a->AIJ,viewer);
619:     }

621:   } else {
622:     /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */
623:     if (ismpikaij) {
624:       MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
625:     } else {
626:       MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
627:     }
628:     MatView(B,viewer);
629:     MatDestroy(&B);
630:   }
631:   return(0);
632: }

634: PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
635: {
637:   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;

640:   MatDestroy(&b->AIJ);
641:   MatDestroy(&b->OAIJ);
642:   MatDestroy(&b->A);
643:   VecScatterDestroy(&b->ctx);
644:   VecDestroy(&b->w);
645:   PetscFree(b->S);
646:   PetscFree(b->T);
647:   PetscFree(b->ibdiag);
648:   PetscFree(A->data);
649:   return(0);
650: }

652: /* --------------------------------------------------------------------------------------*/

654: /* zz = yy + Axx */
655: PetscErrorCode MatMultAdd_SeqKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
656: {
657:   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ*)A->data;
658:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
659:   const PetscScalar *s = b->S, *t = b->T;
660:   const PetscScalar *x,*v,*bx;
661:   PetscScalar       *y,*sums;
662:   PetscErrorCode    ierr;
663:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
664:   PetscInt          n,i,jrow,j,l,p=b->p,q=b->q,k;

667:   if (!yy) {
668:     VecSet(zz,0.0);
669:   } else {
670:     VecCopy(yy,zz);
671:   }
672:   if ((!s) && (!t) && (!b->isTI)) return(0);

674:   VecGetArrayRead(xx,&x);
675:   VecGetArray(zz,&y);
676:   idx  = a->j;
677:   v    = a->a;
678:   ii   = a->i;

680:   if (b->isTI) {
681:     for (i=0; i<m; i++) {
682:       jrow = ii[i];
683:       n    = ii[i+1] - jrow;
684:       sums = y + p*i;
685:       for (j=0; j<n; j++) {
686:         for (k=0; k<p; k++) {
687:           sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k];
688:         }
689:       }
690:     }
691:     PetscLogFlops(3.0*(a->nz)*p);
692:   } else if (t) {
693:     for (i=0; i<m; i++) {
694:       jrow = ii[i];
695:       n    = ii[i+1] - jrow;
696:       sums = y + p*i;
697:       for (j=0; j<n; j++) {
698:         for (k=0; k<p; k++) {
699:           for (l=0; l<q; l++) {
700:             sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l];
701:           }
702:         }
703:       }
704:     }
705:     /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do),
706:      * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T),
707:      * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter
708:      * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */
709:     PetscLogFlops((2.0*p*q-p)*m+2.0*p*a->nz);
710:   }
711:   if (s) {
712:     for (i=0; i<m; i++) {
713:       sums = y + p*i;
714:       bx   = x + q*i;
715:       if (i < b->AIJ->cmap->n) {
716:         for (j=0; j<q; j++) {
717:           for (k=0; k<p; k++) {
718:             sums[k] += s[k+j*p]*bx[j];
719:           }
720:         }
721:       }
722:     }
723:     PetscLogFlops(2.0*m*p*q);
724:   }

726:   VecRestoreArrayRead(xx,&x);
727:   VecRestoreArray(zz,&y);
728:   return(0);
729: }

731: PetscErrorCode MatMult_SeqKAIJ(Mat A,Vec xx,Vec yy)
732: {
735:   MatMultAdd_SeqKAIJ(A,xx,PETSC_NULL,yy);
736:   return(0);
737: }

739: #include <petsc/private/kernels/blockinvert.h>

741: PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A,const PetscScalar **values)
742: {
743:   Mat_SeqKAIJ       *b  = (Mat_SeqKAIJ*)A->data;
744:   Mat_SeqAIJ        *a  = (Mat_SeqAIJ*)b->AIJ->data;
745:   const PetscScalar *S  = b->S;
746:   const PetscScalar *T  = b->T;
747:   const PetscScalar *v  = a->a;
748:   const PetscInt     p  = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
749:   PetscErrorCode    ierr;
750:   PetscInt          i,j,*v_pivots,dof,dof2;
751:   PetscScalar       *diag,aval,*v_work;

754:   if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse.");
755:   if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix.");

757:   dof  = p;
758:   dof2 = dof*dof;

760:   if (b->ibdiagvalid) {
761:     if (values) *values = b->ibdiag;
762:     return(0);
763:   }
764:   if (!b->ibdiag) {
765:     PetscMalloc1(dof2*m*sizeof(PetscScalar),&b->ibdiag);
766:     PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));
767:   }
768:   if (values) *values = b->ibdiag;
769:   diag = b->ibdiag;

771:   PetscMalloc2(dof,&v_work,dof,&v_pivots);
772:   for (i=0; i<m; i++) {
773:     if (S) {
774:       PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));
775:     } else {
776:       PetscMemzero(diag,dof2*sizeof(PetscScalar));
777:     }
778:     if (b->isTI) {
779:       aval = 0;
780:       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
781:       for (j=0; j<dof; j++) diag[j+dof*j] += aval;
782:     } else if (T) {
783:       aval = 0;
784:       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
785:       for (j=0; j<dof2; j++) diag[j] += aval*T[j];
786:     }
787:     PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);
788:     diag += dof2;
789:   }
790:   PetscFree2(v_work,v_pivots);

792:   b->ibdiagvalid = PETSC_TRUE;
793:   return(0);
794: }

796: static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B)
797: {
798:   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data;

801:   *B = kaij->AIJ;
802:   return(0);
803: }

805: PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
806: {
807:   PetscErrorCode    ierr;
808:   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ*) A->data;
809:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)kaij->AIJ->data;
810:   const PetscScalar *aa = a->a, *T = kaij->T, *v;
811:   const PetscInt    m  = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi;
812:   const PetscScalar *b, *xb, *idiag;
813:   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
814:   PetscInt          i, j, k, i2, bs, bs2, nz;

817:   its = its*lits;
818:   if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
819:   if (its <= 0)             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
820:   if (fshift)               SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift");
821:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts");
822:   if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: No support for non-square dense blocks");
823:   else        {bs = p; bs2 = bs*bs; }

825:   if (!m) return(0);

827:   if (!kaij->ibdiagvalid) { MatInvertBlockDiagonal_SeqKAIJ(A,NULL); }
828:   idiag = kaij->ibdiag;
829:   diag  = a->diag;

831:   if (!kaij->sor.setup) {
832:     PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);
833:     kaij->sor.setup = PETSC_TRUE;
834:   }
835:   y     = kaij->sor.y;
836:   w     = kaij->sor.w;
837:   work  = kaij->sor.work;
838:   t     = kaij->sor.t;
839:   arr   = kaij->sor.arr;

841:   VecGetArray(xx,&x);    
842:   VecGetArrayRead(bb,&b);

844:   if (flag & SOR_ZERO_INITIAL_GUESS) {
845:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
846:       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);                            /* x[0:bs] <- D^{-1} b[0:bs] */
847:        PetscMemcpy(t,b,bs*sizeof(PetscScalar));
848:       i2     =  bs;
849:       idiag  += bs2;
850:       for (i=1; i<m; i++) {
851:         v  = aa + ai[i];
852:         vi = aj + ai[i];
853:         nz = diag[i] - ai[i];

855:         if (T) {                /* b - T (Arow * x) */
856:           PetscMemzero(w,bs*sizeof(PetscScalar));
857:           for (j=0; j<nz; j++) {
858:             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
859:           }
860:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]);
861:           for (k=0; k<bs; k++) t[i2+k] += b[i2+k];
862:         } else if (kaij->isTI) {
863:           PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));
864:           for (j=0; j<nz; j++) {
865:             for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k];
866:           }
867:         } else {
868:           PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));
869:         }

871:         PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y);
872:         for (j=0; j<bs; j++) x[i2+j] = omega * y[j];

874:         idiag += bs2;
875:         i2    += bs;
876:       }
877:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
878:       PetscLogFlops(1.0*bs2*a->nz);
879:       xb = t;
880:     } else xb = b;
881:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
882:       idiag = kaij->ibdiag+bs2*(m-1);
883:       i2    = bs * (m-1);
884:       PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));
885:       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
886:       i2    -= bs;
887:       idiag -= bs2;
888:       for (i=m-2; i>=0; i--) {
889:         v  = aa + diag[i] + 1 ;
890:         vi = aj + diag[i] + 1;
891:         nz = ai[i+1] - diag[i] - 1;

893:         if (T) {                /* FIXME: This branch untested */
894:           PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));
895:           /* copy all rows of x that are needed into contiguous space */
896:           workt = work;
897:           for (j=0; j<nz; j++) {
898:             PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
899:             workt += bs;
900:           }
901:           arrt = arr;
902:           for (j=0; j<nz; j++) {
903:             PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));
904:             for (k=0; k<bs2; k++) arrt[k] *= v[j];
905:             arrt += bs2;
906:           }
907:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
908:         } else if (kaij->isTI) {
909:           PetscMemcpy(w,t+i2,bs*sizeof(PetscScalar));
910:           for (j=0; j<nz; j++) {
911:             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
912:           }
913:         }

915:         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */
916:         for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j];

918:         idiag -= bs2;
919:         i2    -= bs;
920:       }
921:       PetscLogFlops(1.0*bs2*(a->nz));
922:     }
923:     its--;
924:   }
925:   while (its--) {               /* FIXME: This branch not updated */
926:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
927:       i2     =  0;
928:       idiag  = kaij->ibdiag;
929:       for (i=0; i<m; i++) {
930:         PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));

932:         v  = aa + ai[i];
933:         vi = aj + ai[i];
934:         nz = diag[i] - ai[i];
935:         workt = work;
936:         for (j=0; j<nz; j++) {
937:           PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
938:           workt += bs;
939:         }
940:         arrt = arr;
941:         if (T) {
942:           for (j=0; j<nz; j++) {
943:             PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));
944:             for (k=0; k<bs2; k++) arrt[k] *= v[j];
945:             arrt += bs2;
946:           }
947:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
948:         } else if (kaij->isTI) {
949:           for (j=0; j<nz; j++) {
950:             PetscMemzero(arrt,bs2*sizeof(PetscScalar));
951:             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
952:             arrt += bs2;
953:           }
954:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
955:         }
956:         PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));

958:         v  = aa + diag[i] + 1;
959:         vi = aj + diag[i] + 1;
960:         nz = ai[i+1] - diag[i] - 1;
961:         workt = work;
962:         for (j=0; j<nz; j++) {
963:           PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
964:           workt += bs;
965:         }
966:         arrt = arr;
967:         if (T) {
968:           for (j=0; j<nz; j++) {
969:             PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));
970:             for (k=0; k<bs2; k++) arrt[k] *= v[j];
971:             arrt += bs2;
972:           }
973:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
974:         } else if (kaij->isTI) {
975:           for (j=0; j<nz; j++) {
976:             PetscMemzero(arrt,bs2*sizeof(PetscScalar));
977:             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
978:             arrt += bs2;
979:           }
980:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
981:         }

983:         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
984:         for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);

986:         idiag += bs2;
987:         i2    += bs;
988:       }
989:       xb = t;
990:     }
991:     else xb = b;
992:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
993:       idiag = kaij->ibdiag+bs2*(m-1);
994:       i2    = bs * (m-1);
995:       if (xb == b) {
996:         for (i=m-1; i>=0; i--) {
997:           PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));

999:           v  = aa + ai[i];
1000:           vi = aj + ai[i];
1001:           nz = diag[i] - ai[i];
1002:           workt = work;
1003:           for (j=0; j<nz; j++) {
1004:             PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
1005:             workt += bs;
1006:           }
1007:           arrt = arr;
1008:           if (T) {
1009:             for (j=0; j<nz; j++) {
1010:               PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));
1011:               for (k=0; k<bs2; k++) arrt[k] *= v[j];
1012:               arrt += bs2;
1013:             }
1014:             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1015:           } else if (kaij->isTI) {
1016:             for (j=0; j<nz; j++) {
1017:               PetscMemzero(arrt,bs2*sizeof(PetscScalar));
1018:               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1019:               arrt += bs2;
1020:             }
1021:             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1022:           }

1024:           v  = aa + diag[i] + 1;
1025:           vi = aj + diag[i] + 1;
1026:           nz = ai[i+1] - diag[i] - 1;
1027:           workt = work;
1028:           for (j=0; j<nz; j++) {
1029:             PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
1030:             workt += bs;
1031:           }
1032:           arrt = arr;
1033:           if (T) {
1034:             for (j=0; j<nz; j++) {
1035:               PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));
1036:               for (k=0; k<bs2; k++) arrt[k] *= v[j];
1037:               arrt += bs2;
1038:             }
1039:             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1040:           } else if (kaij->isTI) {
1041:             for (j=0; j<nz; j++) {
1042:               PetscMemzero(arrt,bs2*sizeof(PetscScalar));
1043:               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1044:               arrt += bs2;
1045:             }
1046:             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1047:           }

1049:           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
1050:           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
1051:         }
1052:       } else {
1053:         for (i=m-1; i>=0; i--) {
1054:           PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));
1055:           v  = aa + diag[i] + 1;
1056:           vi = aj + diag[i] + 1;
1057:           nz = ai[i+1] - diag[i] - 1;
1058:           workt = work;
1059:           for (j=0; j<nz; j++) {
1060:             PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
1061:             workt += bs;
1062:           }
1063:           arrt = arr;
1064:           if (T) {
1065:             for (j=0; j<nz; j++) {
1066:               PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));
1067:               for (k=0; k<bs2; k++) arrt[k] *= v[j];
1068:               arrt += bs2;
1069:             }
1070:             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1071:           } else if (kaij->isTI) {
1072:             for (j=0; j<nz; j++) {
1073:               PetscMemzero(arrt,bs2*sizeof(PetscScalar));
1074:               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1075:               arrt += bs2;
1076:             }
1077:             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1078:           }
1079:           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
1080:           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
1081:         }
1082:       }
1083:       PetscLogFlops(1.0*bs2*(a->nz));
1084:     }
1085:   }

1087:   VecRestoreArray(xx,&x);    
1088:   VecRestoreArrayRead(bb,&b);
1089:   return(0);
1090: }

1092: /*===================================================================================*/

1094: PetscErrorCode MatMultAdd_MPIKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1095: {
1096:   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;

1100:   if (!yy) {
1101:     VecSet(zz,0.0);
1102:   } else {
1103:     VecCopy(yy,zz);
1104:   }
1105:   MatKAIJ_build_AIJ_OAIJ(A); /* Ensure b->AIJ and b->OAIJ are up to date. */
1106:   /* start the scatter */
1107:   VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
1108:   (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);
1109:   VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
1110:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);
1111:   return(0);
1112: }

1114: PetscErrorCode MatMult_MPIKAIJ(Mat A,Vec xx,Vec yy)
1115: {
1118:   MatMultAdd_MPIKAIJ(A,xx,PETSC_NULL,yy);
1119:   return(0);
1120: }

1122: PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A,const PetscScalar **values)
1123: {
1124:   Mat_MPIKAIJ     *b = (Mat_MPIKAIJ*)A->data;
1125:   PetscErrorCode  ierr;

1128:   MatKAIJ_build_AIJ_OAIJ(A); /* Ensure b->AIJ is up to date. */
1129:   (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);
1130:   return(0);
1131: }

1133: /* ----------------------------------------------------------------*/

1135: PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1136: {
1137:   Mat_SeqKAIJ     *b   = (Mat_SeqKAIJ*) A->data;
1138:   PetscErrorCode  diag = PETSC_FALSE;
1139:   PetscErrorCode  ierr;
1140:   PetscInt        nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c;
1141:   PetscScalar     *vaij,*v,*S=b->S,*T=b->T;

1144:   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1145:   b->getrowactive = PETSC_TRUE;
1146:   if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row);

1148:   if ((!S) && (!T) && (!b->isTI)) {
1149:     if (ncols)    *ncols  = 0;
1150:     if (cols)     *cols   = NULL;
1151:     if (values)   *values = NULL;
1152:     return(0);
1153:   }

1155:   if (T || b->isTI) {
1156:     MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);
1157:     c     = nzaij;
1158:     for (i=0; i<nzaij; i++) {
1159:       /* check if this row contains a diagonal entry */
1160:       if (colsaij[i] == r) {
1161:         diag = PETSC_TRUE;
1162:         c = i;
1163:       }
1164:     }
1165:   } else nzaij = c = 0;

1167:   /* calculate size of row */
1168:   nz = 0;
1169:   if (S)            nz += q;
1170:   if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q);

1172:   if (cols || values) {
1173:     PetscMalloc2(nz,&idx,nz,&v);
1174:     for (i=0; i<q; i++) {
1175:       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1176:       v[i] = 0.0;
1177:     }
1178:     if (b->isTI) {
1179:       for (i=0; i<nzaij; i++) {
1180:         for (j=0; j<q; j++) {
1181:           idx[i*q+j] = colsaij[i]*q+j;
1182:           v[i*q+j]   = (j==s ? vaij[i] : 0);
1183:         }
1184:       }
1185:     } else if (T) {
1186:       for (i=0; i<nzaij; i++) {
1187:         for (j=0; j<q; j++) {
1188:           idx[i*q+j] = colsaij[i]*q+j;
1189:           v[i*q+j]   = vaij[i]*T[s+j*p];
1190:         }
1191:       }
1192:     }
1193:     if (S) {
1194:       for (j=0; j<q; j++) {
1195:         idx[c*q+j] = r*q+j;
1196:         v[c*q+j]  += S[s+j*p];
1197:       }
1198:     }
1199:   }

1201:   if (ncols)    *ncols  = nz;
1202:   if (cols)     *cols   = idx;
1203:   if (values)   *values = v;
1204:   return(0);
1205: }

1207: PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1208: {

1212:   if (nz) *nz = 0;
1213:   PetscFree2(*idx,*v);
1214:   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1215:   return(0);
1216: }

1218: PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1219: {
1220:   Mat_MPIKAIJ     *b      = (Mat_MPIKAIJ*) A->data;
1221:   Mat             AIJ     = b->A;
1222:   PetscBool       diag    = PETSC_FALSE;
1223:   Mat             MatAIJ,MatOAIJ;
1224:   PetscErrorCode  ierr;
1225:   const PetscInt  rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray;
1226:   PetscInt        nz,*idx,ncolsaij = 0,ncolsoaij = 0,*colsaij,*colsoaij,r,s,c,i,j,lrow;
1227:   PetscScalar     *v,*vals,*ovals,*S=b->S,*T=b->T;

1230:   MatKAIJ_build_AIJ_OAIJ(A); /* Ensure b->AIJ and b->OAIJ are up to date. */
1231:   MatAIJ  = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
1232:   MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
1233:   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1234:   b->getrowactive = PETSC_TRUE;
1235:   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1236:   lrow = row - rstart;

1238:   if ((!S) && (!T) && (!b->isTI)) {
1239:     if (ncols)    *ncols  = 0;
1240:     if (cols)     *cols   = NULL;
1241:     if (values)   *values = NULL;
1242:     return(0);
1243:   }

1245:   r = lrow/p;
1246:   s = lrow%p;

1248:   if (T || b->isTI) {
1249:     MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray);
1250:     MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);
1251:     MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);

1253:     c     = ncolsaij + ncolsoaij;
1254:     for (i=0; i<ncolsaij; i++) {
1255:       /* check if this row contains a diagonal entry */
1256:       if (colsaij[i] == r) {
1257:         diag = PETSC_TRUE;
1258:         c = i;
1259:       }
1260:     }
1261:   } else c = 0;

1263:   /* calculate size of row */
1264:   nz = 0;
1265:   if (S)            nz += q;
1266:   if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q);

1268:   if (cols || values) {
1269:     PetscMalloc2(nz,&idx,nz,&v);
1270:     for (i=0; i<q; i++) {
1271:       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1272:       v[i] = 0.0;
1273:     }
1274:     if (b->isTI) {
1275:       for (i=0; i<ncolsaij; i++) {
1276:         for (j=0; j<q; j++) {
1277:           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1278:           v[i*q+j]   = (j==s ? vals[i] : 0.0);
1279:         }
1280:       }
1281:       for (i=0; i<ncolsoaij; i++) {
1282:         for (j=0; j<q; j++) {
1283:           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1284:           v[(i+ncolsaij)*q+j]   = (j==s ? ovals[i]: 0.0);
1285:         }
1286:       }
1287:     } else if (T) {
1288:       for (i=0; i<ncolsaij; i++) {
1289:         for (j=0; j<q; j++) {
1290:           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1291:           v[i*q+j]   = vals[i]*T[s+j*p];
1292:         }
1293:       }
1294:       for (i=0; i<ncolsoaij; i++) {
1295:         for (j=0; j<q; j++) {
1296:           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1297:           v[(i+ncolsaij)*q+j]   = ovals[i]*T[s+j*p];
1298:         }
1299:       }
1300:     }
1301:     if (S) {
1302:       for (j=0; j<q; j++) {
1303:         idx[c*q+j] = (r+rstart/p)*q+j;
1304:         v[c*q+j]  += S[s+j*p];
1305:       }
1306:     }
1307:   }

1309:   if (ncols)  *ncols  = nz;
1310:   if (cols)   *cols   = idx;
1311:   if (values) *values = v;
1312:   return(0);
1313: }

1315: PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1316: {
1319:   PetscFree2(*idx,*v);
1320:   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1321:   return(0);
1322: }

1324: PetscErrorCode  MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
1325: {
1327:   Mat            A;

1330:   MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);
1331:   MatCreateSubMatrix(A,isrow,iscol,cll,newmat);
1332:   MatDestroy(&A);
1333:   return(0);
1334: }

1336: /* ---------------------------------------------------------------------------------- */
1337: /*@C
1338:   MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form:

1340:     [I \otimes S + A \otimes T]

1342:   where
1343:     S is a dense (p \times q) matrix
1344:     T is a dense (p \times q) matrix
1345:     A is an AIJ  (n \times n) matrix
1346:     I is the identity matrix
1347:   The resulting matrix is (np \times nq)

1349:   S and T are always stored independently on all processes as PetscScalar arrays in column-major format.

1351:   Collective

1353:   Input Parameters:
1354: + A - the AIJ matrix
1355: . p - number of rows in S and T
1356: . q - number of columns in S and T
1357: . S - the S matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
1358: - T - the T matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)

1360:   Output Parameter:
1361: . kaij - the new KAIJ matrix

1363:   Notes:
1364:   This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
1365:   Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.

1367:   Developer Notes:
1368:   In the MATMPIKAIJ case, the internal 'AIJ' and 'OAIJ' sequential KAIJ matrices are kept up to date by tracking the object state
1369:   of the AIJ matrix 'A' that describes the blockwise action of the MATMPIKAIJ matrix and, if the object state has changed, lazily
1370:   rebuilding 'AIJ' and 'OAIJ' just before executing operations with the MATMPIKAIJ matrix. If new types of operations are added,
1371:   routines implementing those must also ensure these are rebuilt when needed (by calling the internal MatKAIJ_build_AIJ_OAIJ() routine).

1373:   Level: advanced

1375: .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ
1376: @*/
1377: PetscErrorCode  MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij)
1378: {
1380:   PetscMPIInt    size;

1383:   MatCreate(PetscObjectComm((PetscObject)A),kaij);
1384:   MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
1385:   if (size == 1) {
1386:     MatSetType(*kaij,MATSEQKAIJ);
1387:   } else {
1388:     MatSetType(*kaij,MATMPIKAIJ);
1389:   }
1390:   MatKAIJSetAIJ(*kaij,A);
1391:   MatKAIJSetS(*kaij,p,q,S);
1392:   MatKAIJSetT(*kaij,p,q,T);
1393:   MatSetUp(*kaij);
1394:   return(0);
1395: }

1397: /*MC
1398:   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form
1399:     [I \otimes S + A \otimes T],
1400:   where
1401:     S is a dense (p \times q) matrix,
1402:     T is a dense (p \times q) matrix,
1403:     A is an AIJ  (n \times n) matrix,
1404:     and I is the identity matrix.
1405:   The resulting matrix is (np \times nq).

1407:   S and T are always stored independently on all processes as PetscScalar arrays in column-major format.

1409:   Notes:
1410:   A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b,
1411:   where x and b are column vectors containing the row-major representations of X and B.

1413:   Level: advanced

1415: .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ()
1416: M*/

1418: PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
1419: {
1421:   Mat_MPIKAIJ    *b;
1422:   PetscMPIInt    size;

1425:   PetscNewLog(A,&b);
1426:   A->data  = (void*)b;

1428:   PetscMemzero(A->ops,sizeof(struct _MatOps));

1430:   A->ops->setup = MatSetUp_KAIJ;

1432:   b->w    = NULL;
1433:   MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
1434:   if (size == 1) {
1435:     PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);
1436:     A->ops->setup               = MatSetUp_KAIJ;
1437:     A->ops->destroy             = MatDestroy_SeqKAIJ;
1438:     A->ops->view                = MatView_KAIJ;
1439:     A->ops->mult                = MatMult_SeqKAIJ;
1440:     A->ops->multadd             = MatMultAdd_SeqKAIJ;
1441:     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ;
1442:     A->ops->getrow              = MatGetRow_SeqKAIJ;
1443:     A->ops->restorerow          = MatRestoreRow_SeqKAIJ;
1444:     A->ops->sor                 = MatSOR_SeqKAIJ;
1445:   } else {
1446:     PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);
1447:     A->ops->setup               = MatSetUp_KAIJ;
1448:     A->ops->destroy             = MatDestroy_MPIKAIJ;
1449:     A->ops->view                = MatView_KAIJ;
1450:     A->ops->mult                = MatMult_MPIKAIJ;
1451:     A->ops->multadd             = MatMultAdd_MPIKAIJ;
1452:     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ;
1453:     A->ops->getrow              = MatGetRow_MPIKAIJ;
1454:     A->ops->restorerow          = MatRestoreRow_MPIKAIJ;
1455:     PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);
1456:   }
1457:   A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1458:   return(0);
1459: }