Actual source code: kaij.c


  2: /*
  3:   Defines the basic matrix operations for the KAIJ  matrix storage format.
  4:   This format is used to evaluate matrices of the form:

  6:     [I \otimes S + A \otimes T]

  8:   where
  9:     S is a dense (p \times q) matrix
 10:     T is a dense (p \times q) matrix
 11:     A is an AIJ  (n \times n) matrix
 12:     I is the identity matrix

 14:   The resulting matrix is (np \times nq)

 16:   We provide:
 17:      MatMult()
 18:      MatMultAdd()
 19:      MatInvertBlockDiagonal()
 20:   and
 21:      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)

 23:   This single directory handles both the sequential and parallel codes
 24: */

 26: #include <../src/mat/impls/kaij/kaij.h>
 27: #include <../src/mat/utils/freespace.h>
 28: #include <petsc/private/vecimpl.h>

 30: /*@C
 31:    MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix

 33:    Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel

 35:    Input Parameter:
 36: .  A - the KAIJ matrix

 38:    Output Parameter:
 39: .  B - the AIJ matrix

 41:    Level: advanced

 43:    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.

 45: .seealso: MatCreateKAIJ()
 46: @*/
 47: PetscErrorCode  MatKAIJGetAIJ(Mat A,Mat *B)
 48: {
 50:   PetscBool      ismpikaij,isseqkaij;

 53:   PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);
 54:   PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);
 55:   if (ismpikaij) {
 56:     Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;

 58:     *B = b->A;
 59:   } else if (isseqkaij) {
 60:     Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;

 62:     *B = b->AIJ;
 63:   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ");
 64:   return(0);
 65: }

 67: /*@C
 68:    MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix

 70:    Not Collective; the entire S is stored and returned independently on all processes.

 72:    Input Parameter:
 73: .  A - the KAIJ matrix

 75:    Output Parameters:
 76: +  m - the number of rows in S
 77: .  n - the number of columns in S
 78: -  S - the S matrix, in form of a scalar array in column-major format

 80:    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)

 82:    Level: advanced

 84: .seealso: MatCreateKAIJ(), MatGetBlockSizes()
 85: @*/
 86: PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S)
 87: {
 88:   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
 90:   if (m) *m = b->p;
 91:   if (n) *n = b->q;
 92:   if (S) *S = b->S;
 93:   return(0);
 94: }

 96: /*@C
 97:    MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix

 99:    Not Collective; the entire S is stored and returned independently on all processes.

101:    Input Parameter:
102: .  A - the KAIJ matrix

104:    Output Parameters:
105: +  m - the number of rows in S
106: .  n - the number of columns in S
107: -  S - the S matrix, in form of a scalar array in column-major format

109:    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)

111:    Level: advanced

113: .seealso: MatCreateKAIJ(), MatGetBlockSizes()
114: @*/
115: PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S)
116: {
117:   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
119:   if (m) *m = b->p;
120:   if (n) *n = b->q;
121:   if (S) *S = b->S;
122:   return(0);
123: }

125: /*@C
126:   MatKAIJRestoreS - Restore array obtained with MatKAIJGetS()

128:   Not collective

130:   Input Parameter:
131: . A - the KAIJ matrix

133:   Output Parameter:
134: . S - location of pointer to array obtained with MatKAIJGetS()

136:   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
137:   If NULL is passed, it will not attempt to zero the array pointer.

139:   Level: advanced
140: .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
141: @*/
142: PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S)
143: {

147:   if (S) *S = NULL;
148:   PetscObjectStateIncrease((PetscObject)A);
149:   return(0);
150: }

152: /*@C
153:   MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead()

155:   Not collective

157:   Input Parameter:
158: . A - the KAIJ matrix

160:   Output Parameter:
161: . S - location of pointer to array obtained with MatKAIJGetS()

163:   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
164:   If NULL is passed, it will not attempt to zero the array pointer.

166:   Level: advanced
167: .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
168: @*/
169: PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S)
170: {
172:   if (S) *S = NULL;
173:   return(0);
174: }

176: /*@C
177:    MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix

179:    Not Collective; the entire T is stored and returned independently on all processes

181:    Input Parameter:
182: .  A - the KAIJ matrix

184:    Output Parameter:
185: +  m - the number of rows in T
186: .  n - the number of columns in T
187: -  T - the T matrix, in form of a scalar array in column-major format

189:    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)

191:    Level: advanced

193: .seealso: MatCreateKAIJ(), MatGetBlockSizes()
194: @*/
195: PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T)
196: {
197:   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
199:   if (m) *m = b->p;
200:   if (n) *n = b->q;
201:   if (T) *T = b->T;
202:   return(0);
203: }

205: /*@C
206:    MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix

208:    Not Collective; the entire T is stored and returned independently on all processes

210:    Input Parameter:
211: .  A - the KAIJ matrix

213:    Output Parameter:
214: +  m - the number of rows in T
215: .  n - the number of columns in T
216: -  T - the T matrix, in form of a scalar array in column-major format

218:    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)

220:    Level: advanced

222: .seealso: MatCreateKAIJ(), MatGetBlockSizes()
223: @*/
224: PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T)
225: {
226:   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
228:   if (m) *m = b->p;
229:   if (n) *n = b->q;
230:   if (T) *T = b->T;
231:   return(0);
232: }

234: /*@C
235:   MatKAIJRestoreT - Restore array obtained with MatKAIJGetT()

237:   Not collective

239:   Input Parameter:
240: . A - the KAIJ matrix

242:   Output Parameter:
243: . T - location of pointer to array obtained with MatKAIJGetS()

245:   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
246:   If NULL is passed, it will not attempt to zero the array pointer.

248:   Level: advanced
249: .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
250: @*/
251: PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T)
252: {

256:   if (T) *T = NULL;
257:   PetscObjectStateIncrease((PetscObject)A);
258:   return(0);
259: }

261: /*@C
262:   MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead()

264:   Not collective

266:   Input Parameter:
267: . A - the KAIJ matrix

269:   Output Parameter:
270: . T - location of pointer to array obtained with MatKAIJGetS()

272:   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
273:   If NULL is passed, it will not attempt to zero the array pointer.

275:   Level: advanced
276: .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
277: @*/
278: PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T)
279: {
281:   if (T) *T = NULL;
282:   return(0);
283: }

285: /*@
286:    MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix

288:    Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel

290:    Input Parameters:
291: +  A - the KAIJ matrix
292: -  B - the AIJ matrix

294:    Notes:
295:    This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
296:    Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.

298:    Level: advanced

300: .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT()
301: @*/
302: PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B)
303: {
305:   PetscMPIInt    size;

308:   MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
309:   if (size == 1) {
310:     Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
311:     a->AIJ = B;
312:   } else {
313:     Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data;
314:     a->A = B;
315:   }
316:   PetscObjectReference((PetscObject)B);
317:   return(0);
318: }

320: /*@C
321:    MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix

323:    Logically Collective; the entire S is stored independently on all processes.

325:    Input Parameters:
326: +  A - the KAIJ matrix
327: .  p - the number of rows in S
328: .  q - the number of columns in S
329: -  S - the S matrix, in form of a scalar array in column-major format

331:    Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix.
332:    The S matrix is copied, so the user can destroy this array.

334:    Level: Advanced

336: .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ()
337: @*/
338: PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[])
339: {
341:   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;

344:   PetscFree(a->S);
345:   if (S) {
346:     PetscMalloc1(p*q*sizeof(PetscScalar),&a->S);
347:     PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));
348:   } else  a->S = NULL;

350:   a->p = p;
351:   a->q = q;
352:   return(0);
353: }

355: /*@C
356:    MatKAIJGetScaledIdentity - Check if both S and T are scaled identities.

358:    Logically Collective.

360:    Input Parameter:
361: .  A - the KAIJ matrix

363:   Output Parameter:
364: .  identity - the Boolean value

366:    Level: Advanced

368: .seealso: MatKAIJGetS(), MatKAIJGetT()
369: @*/
370: PetscErrorCode MatKAIJGetScaledIdentity(Mat A,PetscBool* identity)
371: {
372:   Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
373:   PetscInt    i,j;

376:   if (a->p != a->q) {
377:     *identity = PETSC_FALSE;
378:     return(0);
379:   } else *identity = PETSC_TRUE;
380:   if (!a->isTI || a->S) {
381:     for (i=0; i<a->p && *identity; i++) {
382:       for (j=0; j<a->p && *identity; j++) {
383:         if (i != j) {
384:           if (a->S && PetscAbsScalar(a->S[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
385:           if (a->T && PetscAbsScalar(a->T[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
386:         } else {
387:           if (a->S && PetscAbsScalar(a->S[i*(a->p+1)]-a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
388:           if (a->T && PetscAbsScalar(a->T[i*(a->p+1)]-a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
389:         }
390:       }
391:     }
392:   }
393:   return(0);
394: }

396: /*@C
397:    MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix

399:    Logically Collective; the entire T is stored independently on all processes.

401:    Input Parameters:
402: +  A - the KAIJ matrix
403: .  p - the number of rows in S
404: .  q - the number of columns in S
405: -  T - the T matrix, in form of a scalar array in column-major format

407:    Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix.
408:    The T matrix is copied, so the user can destroy this array.

410:    Level: Advanced

412: .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ()
413: @*/
414: PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[])
415: {
417:   PetscInt       i,j;
418:   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
419:   PetscBool      isTI = PETSC_FALSE;

422:   /* check if T is an identity matrix */
423:   if (T && (p == q)) {
424:     isTI = PETSC_TRUE;
425:     for (i=0; i<p; i++) {
426:       for (j=0; j<q; j++) {
427:         if (i == j) {
428:           /* diagonal term must be 1 */
429:           if (T[i+j*p] != 1.0) isTI = PETSC_FALSE;
430:         } else {
431:           /* off-diagonal term must be 0 */
432:           if (T[i+j*p] != 0.0) isTI = PETSC_FALSE;
433:         }
434:       }
435:     }
436:   }
437:   a->isTI = isTI;

439:   PetscFree(a->T);
440:   if (T && (!isTI)) {
441:     PetscMalloc1(p*q*sizeof(PetscScalar),&a->T);
442:     PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));
443:   } else a->T = NULL;

445:   a->p = p;
446:   a->q = q;
447:   return(0);
448: }

450: PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
451: {
453:   Mat_SeqKAIJ    *b = (Mat_SeqKAIJ*)A->data;

456:   MatDestroy(&b->AIJ);
457:   PetscFree(b->S);
458:   PetscFree(b->T);
459:   PetscFree(b->ibdiag);
460:   PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);
461:   PetscFree(A->data);
462:   return(0);
463: }

465: PetscErrorCode MatSetUp_KAIJ(Mat A)
466: {
468:   PetscInt       n;
469:   PetscMPIInt    size;
470:   Mat_SeqKAIJ    *seqkaij = (Mat_SeqKAIJ*)A->data;

473:   MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
474:   if (size == 1) {
475:     MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);
476:     PetscLayoutSetBlockSize(A->rmap,seqkaij->p);
477:     PetscLayoutSetBlockSize(A->cmap,seqkaij->q);
478:     PetscLayoutSetUp(A->rmap);
479:     PetscLayoutSetUp(A->cmap);
480:   } else {
481:     Mat_MPIKAIJ *a;
482:     Mat_MPIAIJ  *mpiaij;
483:     IS          from,to;
484:     Vec         gvec;
485:     PetscScalar *T;
486:     PetscInt    i,j;

488:     a = (Mat_MPIKAIJ*)A->data;
489:     mpiaij = (Mat_MPIAIJ*)a->A->data;
490:     MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);
491:     PetscLayoutSetBlockSize(A->rmap,seqkaij->p);
492:     PetscLayoutSetBlockSize(A->cmap,seqkaij->q);
493:     PetscLayoutSetUp(A->rmap);
494:     PetscLayoutSetUp(A->cmap);

496:     if (a->isTI) {
497:       /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
498:        * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
499:        * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
500:        * to pass in. */
501:       PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);
502:       for (i=0; i<a->p; i++) {
503:         for (j=0; j<a->q; j++) {
504:           if (i==j) T[i+j*a->p] = 1.0;
505:           else      T[i+j*a->p] = 0.0;
506:         }
507:       }
508:     } else T = a->T;
509:     MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);
510:     MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);
511:     if (a->isTI) {
512:       PetscFree(T);
513:     }

515:     VecGetSize(mpiaij->lvec,&n);
516:     VecCreate(PETSC_COMM_SELF,&a->w);
517:     VecSetSizes(a->w,n*a->q,n*a->q);
518:     VecSetBlockSize(a->w,a->q);
519:     VecSetType(a->w,VECSEQ);

521:     /* create two temporary Index sets for build scatter gather */
522:     ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);
523:     ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);

525:     /* create temporary global vector to generate scatter context */
526:     VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);

528:     /* generate the scatter context */
529:     VecScatterCreate(gvec,from,a->w,to,&a->ctx);

531:     ISDestroy(&from);
532:     ISDestroy(&to);
533:     VecDestroy(&gvec);
534:   }

536:   A->assembled = PETSC_TRUE;
537:   return(0);
538: }

540: PetscErrorCode MatView_KAIJ(Mat A,PetscViewer viewer)
541: {
542:   PetscViewerFormat format;
543:   Mat_SeqKAIJ       *a = (Mat_SeqKAIJ*)A->data;
544:   Mat               B;
545:   PetscInt          i;
546:   PetscErrorCode    ierr;
547:   PetscBool         ismpikaij;

550:   PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);
551:   PetscViewerGetFormat(viewer,&format);
552:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) {
553:     PetscViewerASCIIPrintf(viewer,"S and T have %D rows and %D columns\n",a->p,a->q);

555:     /* Print appropriate details for S. */
556:     if (!a->S) {
557:       PetscViewerASCIIPrintf(viewer,"S is NULL\n");
558:     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
559:       PetscViewerASCIIPrintf(viewer,"Entries of S are ");
560:       for (i=0; i<(a->p * a->q); i++) {
561: #if defined(PETSC_USE_COMPLEX)
562:         PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->S[i]),(double)PetscImaginaryPart(a->S[i]));
563: #else
564:         PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->S[i]));
565: #endif
566:       }
567:       PetscViewerASCIIPrintf(viewer,"\n");
568:     }

570:     /* Print appropriate details for T. */
571:     if (a->isTI) {
572:       PetscViewerASCIIPrintf(viewer,"T is the identity matrix\n");
573:     } else if (!a->T) {
574:       PetscViewerASCIIPrintf(viewer,"T is NULL\n");
575:     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
576:       PetscViewerASCIIPrintf(viewer,"Entries of T are ");
577:       for (i=0; i<(a->p * a->q); i++) {
578: #if defined(PETSC_USE_COMPLEX)
579:         PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->T[i]),(double)PetscImaginaryPart(a->T[i]));
580: #else
581:         PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->T[i]));
582: #endif
583:       }
584:       PetscViewerASCIIPrintf(viewer,"\n");
585:     }

587:     /* Now print details for the AIJ matrix, using the AIJ viewer. */
588:     PetscViewerASCIIPrintf(viewer,"Now viewing the associated AIJ matrix:\n");
589:     if (ismpikaij) {
590:       Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
591:       MatView(b->A,viewer);
592:     } else {
593:       MatView(a->AIJ,viewer);
594:     }

596:   } else {
597:     /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */
598:     if (ismpikaij) {
599:       MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
600:     } else {
601:       MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
602:     }
603:     MatView(B,viewer);
604:     MatDestroy(&B);
605:   }
606:   return(0);
607: }

609: PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
610: {
612:   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;

615:   MatDestroy(&b->AIJ);
616:   MatDestroy(&b->OAIJ);
617:   MatDestroy(&b->A);
618:   VecScatterDestroy(&b->ctx);
619:   VecDestroy(&b->w);
620:   PetscFree(b->S);
621:   PetscFree(b->T);
622:   PetscFree(b->ibdiag);
623:   PetscFree(A->data);
624:   return(0);
625: }

627: /* --------------------------------------------------------------------------------------*/

629: /* zz = yy + Axx */
630: PetscErrorCode MatMultAdd_SeqKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
631: {
632:   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ*)A->data;
633:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
634:   const PetscScalar *s = b->S, *t = b->T;
635:   const PetscScalar *x,*v,*bx;
636:   PetscScalar       *y,*sums;
637:   PetscErrorCode    ierr;
638:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
639:   PetscInt          n,i,jrow,j,l,p=b->p,q=b->q,k;

642:   if (!yy) {
643:     VecSet(zz,0.0);
644:   } else {
645:     VecCopy(yy,zz);
646:   }
647:   if ((!s) && (!t) && (!b->isTI)) return(0);

649:   VecGetArrayRead(xx,&x);
650:   VecGetArray(zz,&y);
651:   idx  = a->j;
652:   v    = a->a;
653:   ii   = a->i;

655:   if (b->isTI) {
656:     for (i=0; i<m; i++) {
657:       jrow = ii[i];
658:       n    = ii[i+1] - jrow;
659:       sums = y + p*i;
660:       for (j=0; j<n; j++) {
661:         for (k=0; k<p; k++) {
662:           sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k];
663:         }
664:       }
665:     }
666:     PetscLogFlops(3.0*(a->nz)*p);
667:   } else if (t) {
668:     for (i=0; i<m; i++) {
669:       jrow = ii[i];
670:       n    = ii[i+1] - jrow;
671:       sums = y + p*i;
672:       for (j=0; j<n; j++) {
673:         for (k=0; k<p; k++) {
674:           for (l=0; l<q; l++) {
675:             sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l];
676:           }
677:         }
678:       }
679:     }
680:     /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do),
681:      * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T),
682:      * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter
683:      * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */
684:     PetscLogFlops((2.0*p*q-p)*m+2.0*p*a->nz);
685:   }
686:   if (s) {
687:     for (i=0; i<m; i++) {
688:       sums = y + p*i;
689:       bx   = x + q*i;
690:       if (i < b->AIJ->cmap->n) {
691:         for (j=0; j<q; j++) {
692:           for (k=0; k<p; k++) {
693:             sums[k] += s[k+j*p]*bx[j];
694:           }
695:         }
696:       }
697:     }
698:     PetscLogFlops(2.0*m*p*q);
699:   }

701:   VecRestoreArrayRead(xx,&x);
702:   VecRestoreArray(zz,&y);
703:   return(0);
704: }

706: PetscErrorCode MatMult_SeqKAIJ(Mat A,Vec xx,Vec yy)
707: {
710:   MatMultAdd_SeqKAIJ(A,xx,PETSC_NULL,yy);
711:   return(0);
712: }

714: #include <petsc/private/kernels/blockinvert.h>

716: PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A,const PetscScalar **values)
717: {
718:   Mat_SeqKAIJ       *b  = (Mat_SeqKAIJ*)A->data;
719:   Mat_SeqAIJ        *a  = (Mat_SeqAIJ*)b->AIJ->data;
720:   const PetscScalar *S  = b->S;
721:   const PetscScalar *T  = b->T;
722:   const PetscScalar *v  = a->a;
723:   const PetscInt     p  = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
724:   PetscErrorCode    ierr;
725:   PetscInt          i,j,*v_pivots,dof,dof2;
726:   PetscScalar       *diag,aval,*v_work;

729:   if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse.");
730:   if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix.");

732:   dof  = p;
733:   dof2 = dof*dof;

735:   if (b->ibdiagvalid) {
736:     if (values) *values = b->ibdiag;
737:     return(0);
738:   }
739:   if (!b->ibdiag) {
740:     PetscMalloc1(dof2*m*sizeof(PetscScalar),&b->ibdiag);
741:     PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));
742:   }
743:   if (values) *values = b->ibdiag;
744:   diag = b->ibdiag;

746:   PetscMalloc2(dof,&v_work,dof,&v_pivots);
747:   for (i=0; i<m; i++) {
748:     if (S) {
749:       PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));
750:     } else {
751:       PetscMemzero(diag,dof2*sizeof(PetscScalar));
752:     }
753:     if (b->isTI) {
754:       aval = 0;
755:       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
756:       for (j=0; j<dof; j++) diag[j+dof*j] += aval;
757:     } else if (T) {
758:       aval = 0;
759:       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
760:       for (j=0; j<dof2; j++) diag[j] += aval*T[j];
761:     }
762:     PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);
763:     diag += dof2;
764:   }
765:   PetscFree2(v_work,v_pivots);

767:   b->ibdiagvalid = PETSC_TRUE;
768:   return(0);
769: }

771: static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B)
772: {
773:   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data;

776:   *B = kaij->AIJ;
777:   return(0);
778: }

780: PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
781: {
782:   PetscErrorCode    ierr;
783:   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ*) A->data;
784:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)kaij->AIJ->data;
785:   const PetscScalar *aa = a->a, *T = kaij->T, *v;
786:   const PetscInt    m  = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi;
787:   const PetscScalar *b, *xb, *idiag;
788:   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
789:   PetscInt          i, j, k, i2, bs, bs2, nz;

792:   its = its*lits;
793:   if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
794:   if (its <= 0)             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
795:   if (fshift)               SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift");
796:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts");
797:   if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: No support for non-square dense blocks");
798:   else        {bs = p; bs2 = bs*bs; }

800:   if (!m) return(0);

802:   if (!kaij->ibdiagvalid) { MatInvertBlockDiagonal_SeqKAIJ(A,NULL); }
803:   idiag = kaij->ibdiag;
804:   diag  = a->diag;

806:   if (!kaij->sor.setup) {
807:     PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);
808:     kaij->sor.setup = PETSC_TRUE;
809:   }
810:   y     = kaij->sor.y;
811:   w     = kaij->sor.w;
812:   work  = kaij->sor.work;
813:   t     = kaij->sor.t;
814:   arr   = kaij->sor.arr;

816:   VecGetArray(xx,&x);    
817:   VecGetArrayRead(bb,&b);

819:   if (flag & SOR_ZERO_INITIAL_GUESS) {
820:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
821:       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);                            /* x[0:bs] <- D^{-1} b[0:bs] */
822:        PetscMemcpy(t,b,bs*sizeof(PetscScalar));
823:       i2     =  bs;
824:       idiag  += bs2;
825:       for (i=1; i<m; i++) {
826:         v  = aa + ai[i];
827:         vi = aj + ai[i];
828:         nz = diag[i] - ai[i];

830:         if (T) {                /* b - T (Arow * x) */
831:           PetscMemzero(w,bs*sizeof(PetscScalar));
832:           for (j=0; j<nz; j++) {
833:             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
834:           }
835:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]);
836:           for (k=0; k<bs; k++) t[i2+k] += b[i2+k];
837:         } else if (kaij->isTI) {
838:           PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));
839:           for (j=0; j<nz; j++) {
840:             for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k];
841:           }
842:         } else {
843:           PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));
844:         }

846:         PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y);
847:         for (j=0; j<bs; j++) x[i2+j] = omega * y[j];

849:         idiag += bs2;
850:         i2    += bs;
851:       }
852:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
853:       PetscLogFlops(1.0*bs2*a->nz);
854:       xb = t;
855:     } else xb = b;
856:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
857:       idiag = kaij->ibdiag+bs2*(m-1);
858:       i2    = bs * (m-1);
859:       PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));
860:       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
861:       i2    -= bs;
862:       idiag -= bs2;
863:       for (i=m-2; i>=0; i--) {
864:         v  = aa + diag[i] + 1 ;
865:         vi = aj + diag[i] + 1;
866:         nz = ai[i+1] - diag[i] - 1;

868:         if (T) {                /* FIXME: This branch untested */
869:           PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));
870:           /* copy all rows of x that are needed into contiguous space */
871:           workt = work;
872:           for (j=0; j<nz; j++) {
873:             PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
874:             workt += bs;
875:           }
876:           arrt = arr;
877:           for (j=0; j<nz; j++) {
878:             PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));
879:             for (k=0; k<bs2; k++) arrt[k] *= v[j];
880:             arrt += bs2;
881:           }
882:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
883:         } else if (kaij->isTI) {
884:           PetscMemcpy(w,t+i2,bs*sizeof(PetscScalar));
885:           for (j=0; j<nz; j++) {
886:             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
887:           }
888:         }

890:         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */
891:         for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j];

893:         idiag -= bs2;
894:         i2    -= bs;
895:       }
896:       PetscLogFlops(1.0*bs2*(a->nz));
897:     }
898:     its--;
899:   }
900:   while (its--) {               /* FIXME: This branch not updated */
901:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
902:       i2     =  0;
903:       idiag  = kaij->ibdiag;
904:       for (i=0; i<m; i++) {
905:         PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));

907:         v  = aa + ai[i];
908:         vi = aj + ai[i];
909:         nz = diag[i] - ai[i];
910:         workt = work;
911:         for (j=0; j<nz; j++) {
912:           PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
913:           workt += bs;
914:         }
915:         arrt = arr;
916:         if (T) {
917:           for (j=0; j<nz; j++) {
918:             PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));
919:             for (k=0; k<bs2; k++) arrt[k] *= v[j];
920:             arrt += bs2;
921:           }
922:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
923:         } else if (kaij->isTI) {
924:           for (j=0; j<nz; j++) {
925:             PetscMemzero(arrt,bs2*sizeof(PetscScalar));
926:             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
927:             arrt += bs2;
928:           }
929:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
930:         }
931:         PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));

933:         v  = aa + diag[i] + 1;
934:         vi = aj + diag[i] + 1;
935:         nz = ai[i+1] - diag[i] - 1;
936:         workt = work;
937:         for (j=0; j<nz; j++) {
938:           PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
939:           workt += bs;
940:         }
941:         arrt = arr;
942:         if (T) {
943:           for (j=0; j<nz; j++) {
944:             PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));
945:             for (k=0; k<bs2; k++) arrt[k] *= v[j];
946:             arrt += bs2;
947:           }
948:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
949:         } else if (kaij->isTI) {
950:           for (j=0; j<nz; j++) {
951:             PetscMemzero(arrt,bs2*sizeof(PetscScalar));
952:             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
953:             arrt += bs2;
954:           }
955:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
956:         }

958:         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
959:         for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);

961:         idiag += bs2;
962:         i2    += bs;
963:       }
964:       xb = t;
965:     }
966:     else xb = b;
967:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
968:       idiag = kaij->ibdiag+bs2*(m-1);
969:       i2    = bs * (m-1);
970:       if (xb == b) {
971:         for (i=m-1; i>=0; i--) {
972:           PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));

974:           v  = aa + ai[i];
975:           vi = aj + ai[i];
976:           nz = diag[i] - ai[i];
977:           workt = work;
978:           for (j=0; j<nz; j++) {
979:             PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
980:             workt += bs;
981:           }
982:           arrt = arr;
983:           if (T) {
984:             for (j=0; j<nz; j++) {
985:               PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));
986:               for (k=0; k<bs2; k++) arrt[k] *= v[j];
987:               arrt += bs2;
988:             }
989:             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
990:           } else if (kaij->isTI) {
991:             for (j=0; j<nz; j++) {
992:               PetscMemzero(arrt,bs2*sizeof(PetscScalar));
993:               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
994:               arrt += bs2;
995:             }
996:             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
997:           }

999:           v  = aa + diag[i] + 1;
1000:           vi = aj + diag[i] + 1;
1001:           nz = ai[i+1] - diag[i] - 1;
1002:           workt = work;
1003:           for (j=0; j<nz; j++) {
1004:             PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
1005:             workt += bs;
1006:           }
1007:           arrt = arr;
1008:           if (T) {
1009:             for (j=0; j<nz; j++) {
1010:               PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));
1011:               for (k=0; k<bs2; k++) arrt[k] *= v[j];
1012:               arrt += bs2;
1013:             }
1014:             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1015:           } else if (kaij->isTI) {
1016:             for (j=0; j<nz; j++) {
1017:               PetscMemzero(arrt,bs2*sizeof(PetscScalar));
1018:               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1019:               arrt += bs2;
1020:             }
1021:             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1022:           }

1024:           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
1025:           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
1026:         }
1027:       } else {
1028:         for (i=m-1; i>=0; i--) {
1029:           PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));
1030:           v  = aa + diag[i] + 1;
1031:           vi = aj + diag[i] + 1;
1032:           nz = ai[i+1] - diag[i] - 1;
1033:           workt = work;
1034:           for (j=0; j<nz; j++) {
1035:             PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
1036:             workt += bs;
1037:           }
1038:           arrt = arr;
1039:           if (T) {
1040:             for (j=0; j<nz; j++) {
1041:               PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));
1042:               for (k=0; k<bs2; k++) arrt[k] *= v[j];
1043:               arrt += bs2;
1044:             }
1045:             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1046:           } else if (kaij->isTI) {
1047:             for (j=0; j<nz; j++) {
1048:               PetscMemzero(arrt,bs2*sizeof(PetscScalar));
1049:               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1050:               arrt += bs2;
1051:             }
1052:             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1053:           }
1054:           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
1055:           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
1056:         }
1057:       }
1058:       PetscLogFlops(1.0*bs2*(a->nz));
1059:     }
1060:   }

1062:   VecRestoreArray(xx,&x);    
1063:   VecRestoreArrayRead(bb,&b);
1064:   return(0);
1065: }

1067: /*===================================================================================*/

1069: PetscErrorCode MatMultAdd_MPIKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1070: {
1071:   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;

1075:   if (!yy) {
1076:     VecSet(zz,0.0);
1077:   } else {
1078:     VecCopy(yy,zz);
1079:   }
1080:   /* start the scatter */
1081:   VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
1082:   (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);
1083:   VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
1084:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);
1085:   return(0);
1086: }

1088: PetscErrorCode MatMult_MPIKAIJ(Mat A,Vec xx,Vec yy)
1089: {
1092:   MatMultAdd_MPIKAIJ(A,xx,PETSC_NULL,yy);
1093:   return(0);
1094: }

1096: PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A,const PetscScalar **values)
1097: {
1098:   Mat_MPIKAIJ     *b = (Mat_MPIKAIJ*)A->data;
1099:   PetscErrorCode  ierr;

1102:   (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);
1103:   return(0);
1104: }

1106: /* ----------------------------------------------------------------*/

1108: PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1109: {
1110:   Mat_SeqKAIJ     *b   = (Mat_SeqKAIJ*) A->data;
1111:   PetscErrorCode  diag = PETSC_FALSE;
1112:   PetscErrorCode  ierr;
1113:   PetscInt        nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c;
1114:   PetscScalar     *vaij,*v,*S=b->S,*T=b->T;

1117:   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1118:   b->getrowactive = PETSC_TRUE;
1119:   if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row);

1121:   if ((!S) && (!T) && (!b->isTI)) {
1122:     if (ncols)    *ncols  = 0;
1123:     if (cols)     *cols   = NULL;
1124:     if (values)   *values = NULL;
1125:     return(0);
1126:   }

1128:   if (T || b->isTI) {
1129:     MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);
1130:     c     = nzaij;
1131:     for (i=0; i<nzaij; i++) {
1132:       /* check if this row contains a diagonal entry */
1133:       if (colsaij[i] == r) {
1134:         diag = PETSC_TRUE;
1135:         c = i;
1136:       }
1137:     }
1138:   } else nzaij = c = 0;

1140:   /* calculate size of row */
1141:   nz = 0;
1142:   if (S)            nz += q;
1143:   if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q);

1145:   if (cols || values) {
1146:     PetscMalloc2(nz,&idx,nz,&v);
1147:     for (i=0; i<q; i++) {
1148:       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1149:       v[i] = 0.0;
1150:     }
1151:     if (b->isTI) {
1152:       for (i=0; i<nzaij; i++) {
1153:         for (j=0; j<q; j++) {
1154:           idx[i*q+j] = colsaij[i]*q+j;
1155:           v[i*q+j]   = (j==s ? vaij[i] : 0);
1156:         }
1157:       }
1158:     } else if (T) {
1159:       for (i=0; i<nzaij; i++) {
1160:         for (j=0; j<q; j++) {
1161:           idx[i*q+j] = colsaij[i]*q+j;
1162:           v[i*q+j]   = vaij[i]*T[s+j*p];
1163:         }
1164:       }
1165:     }
1166:     if (S) {
1167:       for (j=0; j<q; j++) {
1168:         idx[c*q+j] = r*q+j;
1169:         v[c*q+j]  += S[s+j*p];
1170:       }
1171:     }
1172:   }

1174:   if (ncols)    *ncols  = nz;
1175:   if (cols)     *cols   = idx;
1176:   if (values)   *values = v;
1177:   return(0);
1178: }

1180: PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1181: {

1185:   if (nz) *nz = 0;
1186:   PetscFree2(*idx,*v);
1187:   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1188:   return(0);
1189: }

1191: PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1192: {
1193:   Mat_MPIKAIJ     *b      = (Mat_MPIKAIJ*) A->data;
1194:   Mat             MatAIJ  = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
1195:   Mat             MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
1196:   Mat             AIJ     = b->A;
1197:   PetscBool       diag    = PETSC_FALSE;
1198:   PetscErrorCode  ierr;
1199:   const PetscInt  rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray;
1200:   PetscInt        nz,*idx,ncolsaij = 0,ncolsoaij = 0,*colsaij,*colsoaij,r,s,c,i,j,lrow;
1201:   PetscScalar     *v,*vals,*ovals,*S=b->S,*T=b->T;

1204:   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1205:   b->getrowactive = PETSC_TRUE;
1206:   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1207:   lrow = row - rstart;

1209:   if ((!S) && (!T) && (!b->isTI)) {
1210:     if (ncols)    *ncols  = 0;
1211:     if (cols)     *cols   = NULL;
1212:     if (values)   *values = NULL;
1213:     return(0);
1214:   }

1216:   r = lrow/p;
1217:   s = lrow%p;

1219:   if (T || b->isTI) {
1220:     MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray);
1221:     MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);
1222:     MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);

1224:     c     = ncolsaij + ncolsoaij;
1225:     for (i=0; i<ncolsaij; i++) {
1226:       /* check if this row contains a diagonal entry */
1227:       if (colsaij[i] == r) {
1228:         diag = PETSC_TRUE;
1229:         c = i;
1230:       }
1231:     }
1232:   } else c = 0;

1234:   /* calculate size of row */
1235:   nz = 0;
1236:   if (S)            nz += q;
1237:   if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q);

1239:   if (cols || values) {
1240:     PetscMalloc2(nz,&idx,nz,&v);
1241:     for (i=0; i<q; i++) {
1242:       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1243:       v[i] = 0.0;
1244:     }
1245:     if (b->isTI) {
1246:       for (i=0; i<ncolsaij; i++) {
1247:         for (j=0; j<q; j++) {
1248:           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1249:           v[i*q+j]   = (j==s ? vals[i] : 0.0);
1250:         }
1251:       }
1252:       for (i=0; i<ncolsoaij; i++) {
1253:         for (j=0; j<q; j++) {
1254:           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1255:           v[(i+ncolsaij)*q+j]   = (j==s ? ovals[i]: 0.0);
1256:         }
1257:       }
1258:     } else if (T) {
1259:       for (i=0; i<ncolsaij; i++) {
1260:         for (j=0; j<q; j++) {
1261:           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1262:           v[i*q+j]   = vals[i]*T[s+j*p];
1263:         }
1264:       }
1265:       for (i=0; i<ncolsoaij; i++) {
1266:         for (j=0; j<q; j++) {
1267:           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1268:           v[(i+ncolsaij)*q+j]   = ovals[i]*T[s+j*p];
1269:         }
1270:       }
1271:     }
1272:     if (S) {
1273:       for (j=0; j<q; j++) {
1274:         idx[c*q+j] = (r+rstart/p)*q+j;
1275:         v[c*q+j]  += S[s+j*p];
1276:       }
1277:     }
1278:   }

1280:   if (ncols)  *ncols  = nz;
1281:   if (cols)   *cols   = idx;
1282:   if (values) *values = v;
1283:   return(0);
1284: }

1286: PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1287: {
1290:   PetscFree2(*idx,*v);
1291:   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1292:   return(0);
1293: }

1295: PetscErrorCode  MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
1296: {
1298:   Mat            A;

1301:   MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);
1302:   MatCreateSubMatrix(A,isrow,iscol,cll,newmat);
1303:   MatDestroy(&A);
1304:   return(0);
1305: }

1307: /* ---------------------------------------------------------------------------------- */
1308: /*@C
1309:   MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form:

1311:     [I \otimes S + A \otimes T]

1313:   where
1314:     S is a dense (p \times q) matrix
1315:     T is a dense (p \times q) matrix
1316:     A is an AIJ  (n \times n) matrix
1317:     I is the identity matrix
1318:   The resulting matrix is (np \times nq)

1320:   S and T are always stored independently on all processes as PetscScalar arrays in column-major format.

1322:   Collective

1324:   Input Parameters:
1325: + A - the AIJ matrix
1326: . p - number of rows in S and T
1327: . q - number of columns in S and T
1328: . S - the S matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
1329: - T - the T matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)

1331:   Output Parameter:
1332: . kaij - the new KAIJ matrix

1334:   Notes:
1335:   This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
1336:   Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.

1338:   Level: advanced

1340: .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ
1341: @*/
1342: PetscErrorCode  MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij)
1343: {
1345:   PetscMPIInt    size;

1348:   MatCreate(PetscObjectComm((PetscObject)A),kaij);
1349:   MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
1350:   if (size == 1) {
1351:     MatSetType(*kaij,MATSEQKAIJ);
1352:   } else {
1353:     MatSetType(*kaij,MATMPIKAIJ);
1354:   }
1355:   MatKAIJSetAIJ(*kaij,A);
1356:   MatKAIJSetS(*kaij,p,q,S);
1357:   MatKAIJSetT(*kaij,p,q,T);
1358:   MatSetUp(*kaij);
1359:   return(0);
1360: }

1362: /*MC
1363:   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form
1364:     [I \otimes S + A \otimes T],
1365:   where
1366:     S is a dense (p \times q) matrix,
1367:     T is a dense (p \times q) matrix,
1368:     A is an AIJ  (n \times n) matrix,
1369:     and I is the identity matrix.
1370:   The resulting matrix is (np \times nq).

1372:   S and T are always stored independently on all processes as PetscScalar arrays in column-major format.

1374:   Notes:
1375:   A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b,
1376:   where x and b are column vectors containing the row-major representations of X and B.

1378:   Level: advanced

1380: .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ()
1381: M*/

1383: PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
1384: {
1386:   Mat_MPIKAIJ    *b;
1387:   PetscMPIInt    size;

1390:   PetscNewLog(A,&b);
1391:   A->data  = (void*)b;

1393:   PetscMemzero(A->ops,sizeof(struct _MatOps));

1395:   A->ops->setup = MatSetUp_KAIJ;

1397:   b->w    = NULL;
1398:   MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
1399:   if (size == 1) {
1400:     PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);
1401:     A->ops->setup               = MatSetUp_KAIJ;
1402:     A->ops->destroy             = MatDestroy_SeqKAIJ;
1403:     A->ops->view                = MatView_KAIJ;
1404:     A->ops->mult                = MatMult_SeqKAIJ;
1405:     A->ops->multadd             = MatMultAdd_SeqKAIJ;
1406:     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ;
1407:     A->ops->getrow              = MatGetRow_SeqKAIJ;
1408:     A->ops->restorerow          = MatRestoreRow_SeqKAIJ;
1409:     A->ops->sor                 = MatSOR_SeqKAIJ;
1410:   } else {
1411:     PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);
1412:     A->ops->setup               = MatSetUp_KAIJ;
1413:     A->ops->destroy             = MatDestroy_MPIKAIJ;
1414:     A->ops->view                = MatView_KAIJ;
1415:     A->ops->mult                = MatMult_MPIKAIJ;
1416:     A->ops->multadd             = MatMultAdd_MPIKAIJ;
1417:     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ;
1418:     A->ops->getrow              = MatGetRow_MPIKAIJ;
1419:     A->ops->restorerow          = MatRestoreRow_MPIKAIJ;
1420:     PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);
1421:   }
1422:   A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1423:   return(0);
1424: }