Actual source code: blockmat.c


  2: /*
  3:    This provides a matrix that consists of Mats
  4: */

  6: #include <petsc/private/matimpl.h>
  7: #include <../src/mat/impls/baij/seq/baij.h>

  9: typedef struct {
 10:   SEQAIJHEADER(Mat);
 11:   SEQBAIJHEADER;
 12:   Mat *diags;

 14:   Vec left,right,middle,workb;                 /* dummy vectors to perform local parts of product */
 15: } Mat_BlockMat;

 17: static PetscErrorCode MatSOR_BlockMat_Symmetric(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
 18: {
 19:   Mat_BlockMat      *a = (Mat_BlockMat*)A->data;
 20:   PetscScalar       *x;
 21:   const Mat         *v;
 22:   const PetscScalar *b;
 23:   PetscInt          n = A->cmap->n,i,mbs = n/A->rmap->bs,j,bs = A->rmap->bs;
 24:   const PetscInt    *idx;
 25:   IS                row,col;
 26:   MatFactorInfo     info;
 27:   Vec               left = a->left,right = a->right, middle = a->middle;
 28:   Mat               *diag;

 30:   its = its*lits;
 35:   if ((flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) && !(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)) {
 36:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot do backward sweep without forward sweep");
 37:   }

 39:   if (!a->diags) {
 40:     PetscMalloc1(mbs,&a->diags);
 41:     MatFactorInfoInitialize(&info);
 42:     for (i=0; i<mbs; i++) {
 43:       MatGetOrdering(a->a[a->diag[i]], MATORDERINGND,&row,&col);
 44:       MatCholeskyFactorSymbolic(a->diags[i],a->a[a->diag[i]],row,&info);
 45:       MatCholeskyFactorNumeric(a->diags[i],a->a[a->diag[i]],&info);
 46:       ISDestroy(&row);
 47:       ISDestroy(&col);
 48:     }
 49:     VecDuplicate(bb,&a->workb);
 50:   }
 51:   diag = a->diags;

 53:   VecSet(xx,0.0);
 54:   VecGetArray(xx,&x);
 55:   /* copy right hand side because it must be modified during iteration */
 56:   VecCopy(bb,a->workb);
 57:   VecGetArrayRead(a->workb,&b);

 59:   /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
 60:   while (its--) {
 61:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {

 63:       for (i=0; i<mbs; i++) {
 64:         n   = a->i[i+1] - a->i[i] - 1;
 65:         idx = a->j + a->i[i] + 1;
 66:         v   = a->a + a->i[i] + 1;

 68:         VecSet(left,0.0);
 69:         for (j=0; j<n; j++) {
 70:           VecPlaceArray(right,x + idx[j]*bs);
 71:           MatMultAdd(v[j],right,left,left);
 72:           VecResetArray(right);
 73:         }
 74:         VecPlaceArray(right,b + i*bs);
 75:         VecAYPX(left,-1.0,right);
 76:         VecResetArray(right);

 78:         VecPlaceArray(right,x + i*bs);
 79:         MatSolve(diag[i],left,right);

 81:         /* now adjust right hand side, see MatSOR_SeqSBAIJ */
 82:         for (j=0; j<n; j++) {
 83:           MatMultTranspose(v[j],right,left);
 84:           VecPlaceArray(middle,b + idx[j]*bs);
 85:           VecAXPY(middle,-1.0,left);
 86:           VecResetArray(middle);
 87:         }
 88:         VecResetArray(right);

 90:       }
 91:     }
 92:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {

 94:       for (i=mbs-1; i>=0; i--) {
 95:         n   = a->i[i+1] - a->i[i] - 1;
 96:         idx = a->j + a->i[i] + 1;
 97:         v   = a->a + a->i[i] + 1;

 99:         VecSet(left,0.0);
100:         for (j=0; j<n; j++) {
101:           VecPlaceArray(right,x + idx[j]*bs);
102:           MatMultAdd(v[j],right,left,left);
103:           VecResetArray(right);
104:         }
105:         VecPlaceArray(right,b + i*bs);
106:         VecAYPX(left,-1.0,right);
107:         VecResetArray(right);

109:         VecPlaceArray(right,x + i*bs);
110:         MatSolve(diag[i],left,right);
111:         VecResetArray(right);

113:       }
114:     }
115:   }
116:   VecRestoreArray(xx,&x);
117:   VecRestoreArrayRead(a->workb,&b);
118:   return 0;
119: }

121: static PetscErrorCode MatSOR_BlockMat(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
122: {
123:   Mat_BlockMat      *a = (Mat_BlockMat*)A->data;
124:   PetscScalar       *x;
125:   const Mat         *v;
126:   const PetscScalar *b;
127:   PetscInt          n = A->cmap->n,i,mbs = n/A->rmap->bs,j,bs = A->rmap->bs;
128:   const PetscInt    *idx;
129:   IS                row,col;
130:   MatFactorInfo     info;
131:   Vec               left = a->left,right = a->right;
132:   Mat               *diag;

134:   its = its*lits;

140:   if (!a->diags) {
141:     PetscMalloc1(mbs,&a->diags);
142:     MatFactorInfoInitialize(&info);
143:     for (i=0; i<mbs; i++) {
144:       MatGetOrdering(a->a[a->diag[i]], MATORDERINGND,&row,&col);
145:       MatLUFactorSymbolic(a->diags[i],a->a[a->diag[i]],row,col,&info);
146:       MatLUFactorNumeric(a->diags[i],a->a[a->diag[i]],&info);
147:       ISDestroy(&row);
148:       ISDestroy(&col);
149:     }
150:   }
151:   diag = a->diags;

153:   VecSet(xx,0.0);
154:   VecGetArray(xx,&x);
155:   VecGetArrayRead(bb,&b);

157:   /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
158:   while (its--) {
159:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {

161:       for (i=0; i<mbs; i++) {
162:         n   = a->i[i+1] - a->i[i];
163:         idx = a->j + a->i[i];
164:         v   = a->a + a->i[i];

166:         VecSet(left,0.0);
167:         for (j=0; j<n; j++) {
168:           if (idx[j] != i) {
169:             VecPlaceArray(right,x + idx[j]*bs);
170:             MatMultAdd(v[j],right,left,left);
171:             VecResetArray(right);
172:           }
173:         }
174:         VecPlaceArray(right,b + i*bs);
175:         VecAYPX(left,-1.0,right);
176:         VecResetArray(right);

178:         VecPlaceArray(right,x + i*bs);
179:         MatSolve(diag[i],left,right);
180:         VecResetArray(right);
181:       }
182:     }
183:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {

185:       for (i=mbs-1; i>=0; i--) {
186:         n   = a->i[i+1] - a->i[i];
187:         idx = a->j + a->i[i];
188:         v   = a->a + a->i[i];

190:         VecSet(left,0.0);
191:         for (j=0; j<n; j++) {
192:           if (idx[j] != i) {
193:             VecPlaceArray(right,x + idx[j]*bs);
194:             MatMultAdd(v[j],right,left,left);
195:             VecResetArray(right);
196:           }
197:         }
198:         VecPlaceArray(right,b + i*bs);
199:         VecAYPX(left,-1.0,right);
200:         VecResetArray(right);

202:         VecPlaceArray(right,x + i*bs);
203:         MatSolve(diag[i],left,right);
204:         VecResetArray(right);

206:       }
207:     }
208:   }
209:   VecRestoreArray(xx,&x);
210:   VecRestoreArrayRead(bb,&b);
211:   return 0;
212: }

214: static PetscErrorCode MatSetValues_BlockMat(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
215: {
216:   Mat_BlockMat   *a = (Mat_BlockMat*)A->data;
217:   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
218:   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
219:   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
220:   PetscInt       ridx,cidx;
221:   PetscBool      roworiented=a->roworiented;
222:   MatScalar      value;
223:   Mat            *ap,*aa = a->a;

225:   for (k=0; k<m; k++) { /* loop over added rows */
226:     row  = im[k];
227:     brow = row/bs;
228:     if (row < 0) continue;
230:     rp   = aj + ai[brow];
231:     ap   = aa + ai[brow];
232:     rmax = imax[brow];
233:     nrow = ailen[brow];
234:     low  = 0;
235:     high = nrow;
236:     for (l=0; l<n; l++) { /* loop over added columns */
237:       if (in[l] < 0) continue;
239:       col = in[l]; bcol = col/bs;
240:       if (A->symmetric && brow > bcol) continue;
241:       ridx = row % bs; cidx = col % bs;
242:       if (roworiented) value = v[l + k*n];
243:       else value = v[k + l*m];

245:       if (col <= lastcol) low = 0;
246:       else high = nrow;
247:       lastcol = col;
248:       while (high-low > 7) {
249:         t = (low+high)/2;
250:         if (rp[t] > bcol) high = t;
251:         else              low  = t;
252:       }
253:       for (i=low; i<high; i++) {
254:         if (rp[i] > bcol) break;
255:         if (rp[i] == bcol) goto noinsert1;
256:       }
257:       if (nonew == 1) goto noinsert1;
259:       MatSeqXAIJReallocateAIJ(A,a->mbs,1,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,Mat);
260:       N = nrow++ - 1; high++;
261:       /* shift up all the later entries in this row */
262:       for (ii=N; ii>=i; ii--) {
263:         rp[ii+1] = rp[ii];
264:         ap[ii+1] = ap[ii];
265:       }
266:       if (N>=i) ap[i] = NULL;
267:       rp[i] = bcol;
268:       a->nz++;
269:       A->nonzerostate++;
270: noinsert1:;
271:       if (!*(ap+i)) {
272:         MatCreateSeqAIJ(PETSC_COMM_SELF,bs,bs,0,NULL,ap+i);
273:       }
274:       MatSetValues(ap[i],1,&ridx,1,&cidx,&value,is);
275:       low  = i;
276:     }
277:     ailen[brow] = nrow;
278:   }
279:   return 0;
280: }

282: static PetscErrorCode MatLoad_BlockMat(Mat newmat, PetscViewer viewer)
283: {
284:   PetscErrorCode    ierr;
285:   Mat               tmpA;
286:   PetscInt          i,j,m,n,bs = 1,ncols,*lens,currentcol,mbs,**ii,*ilens,nextcol,*llens,cnt = 0;
287:   const PetscInt    *cols;
288:   const PetscScalar *values;
289:   PetscBool         flg = PETSC_FALSE,notdone;
290:   Mat_SeqAIJ        *a;
291:   Mat_BlockMat      *amat;

293:   /* force binary viewer to load .info file if it has not yet done so */
294:   PetscViewerSetUp(viewer);
295:   MatCreate(PETSC_COMM_SELF,&tmpA);
296:   MatSetType(tmpA,MATSEQAIJ);
297:   MatLoad_SeqAIJ(tmpA,viewer);

299:   MatGetLocalSize(tmpA,&m,&n);
300:   PetscOptionsBegin(PETSC_COMM_SELF,NULL,"Options for loading BlockMat matrix 1","Mat");
301:   PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);
302:   PetscOptionsBool("-matload_symmetric","Store the matrix as symmetric","MatLoad",flg,&flg,NULL);
303:   PetscOptionsEnd();

305:   /* Determine number of nonzero blocks for each block row */
306:   a    = (Mat_SeqAIJ*) tmpA->data;
307:   mbs  = m/bs;
308:   PetscMalloc3(mbs,&lens,bs,&ii,bs,&ilens);
309:   PetscArrayzero(lens,mbs);

311:   for (i=0; i<mbs; i++) {
312:     for (j=0; j<bs; j++) {
313:       ii[j]    = a->j + a->i[i*bs + j];
314:       ilens[j] = a->i[i*bs + j + 1] - a->i[i*bs + j];
315:     }

317:     currentcol = -1;
318:     while (PETSC_TRUE) {
319:       notdone = PETSC_FALSE;
320:       nextcol = 1000000000;
321:       for (j=0; j<bs; j++) {
322:         while ((ilens[j] > 0 && ii[j][0]/bs <= currentcol)) {
323:           ii[j]++;
324:           ilens[j]--;
325:         }
326:         if (ilens[j] > 0) {
327:           notdone = PETSC_TRUE;
328:           nextcol = PetscMin(nextcol,ii[j][0]/bs);
329:         }
330:       }
331:       if (!notdone) break;
332:       if (!flg || (nextcol >= i)) lens[i]++;
333:       currentcol = nextcol;
334:     }
335:   }

337:   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
338:     MatSetSizes(newmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
339:   }
340:   MatBlockMatSetPreallocation(newmat,bs,0,lens);
341:   if (flg) {
342:     MatSetOption(newmat,MAT_SYMMETRIC,PETSC_TRUE);
343:   }
344:   amat = (Mat_BlockMat*)(newmat)->data;

346:   /* preallocate the submatrices */
347:   PetscMalloc1(bs,&llens);
348:   for (i=0; i<mbs; i++) { /* loops for block rows */
349:     for (j=0; j<bs; j++) {
350:       ii[j]    = a->j + a->i[i*bs + j];
351:       ilens[j] = a->i[i*bs + j + 1] - a->i[i*bs + j];
352:     }

354:     currentcol = 1000000000;
355:     for (j=0; j<bs; j++) { /* loop over rows in block finding first nonzero block */
356:       if (ilens[j] > 0) {
357:         currentcol = PetscMin(currentcol,ii[j][0]/bs);
358:       }
359:     }

361:     while (PETSC_TRUE) {  /* loops over blocks in block row */
362:       notdone = PETSC_FALSE;
363:       nextcol = 1000000000;
364:       PetscArrayzero(llens,bs);
365:       for (j=0; j<bs; j++) { /* loop over rows in block */
366:         while ((ilens[j] > 0 && ii[j][0]/bs <= currentcol)) { /* loop over columns in row */
367:           ii[j]++;
368:           ilens[j]--;
369:           llens[j]++;
370:         }
371:         if (ilens[j] > 0) {
372:           notdone = PETSC_TRUE;
373:           nextcol = PetscMin(nextcol,ii[j][0]/bs);
374:         }
375:       }
377:       if (!flg || currentcol >= i) {
378:         amat->j[cnt] = currentcol;
379:         MatCreateSeqAIJ(PETSC_COMM_SELF,bs,bs,0,llens,amat->a+cnt++);
380:       }

382:       if (!notdone) break;
383:       currentcol = nextcol;
384:     }
385:     amat->ilen[i] = lens[i];
386:   }

388:   PetscFree3(lens,ii,ilens);
389:   PetscFree(llens);

391:   /* copy over the matrix, one row at a time */
392:   for (i=0; i<m; i++) {
393:     MatGetRow(tmpA,i,&ncols,&cols,&values);
394:     MatSetValues(newmat,1,&i,ncols,cols,values,INSERT_VALUES);
395:     MatRestoreRow(tmpA,i,&ncols,&cols,&values);
396:   }
397:   MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
398:   MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
399:   return 0;
400: }

402: static PetscErrorCode MatView_BlockMat(Mat A,PetscViewer viewer)
403: {
404:   Mat_BlockMat      *a = (Mat_BlockMat*)A->data;
405:   const char        *name;
406:   PetscViewerFormat format;

408:   PetscObjectGetName((PetscObject)A,&name);
409:   PetscViewerGetFormat(viewer,&format);
410:   if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) {
411:     PetscViewerASCIIPrintf(viewer,"Nonzero block matrices = %" PetscInt_FMT " \n",a->nz);
412:     if (A->symmetric) {
413:       PetscViewerASCIIPrintf(viewer,"Only upper triangular part of symmetric matrix is stored\n");
414:     }
415:   }
416:   return 0;
417: }

419: static PetscErrorCode MatDestroy_BlockMat(Mat mat)
420: {
421:   Mat_BlockMat   *bmat = (Mat_BlockMat*)mat->data;
422:   PetscInt       i;

424:   VecDestroy(&bmat->right);
425:   VecDestroy(&bmat->left);
426:   VecDestroy(&bmat->middle);
427:   VecDestroy(&bmat->workb);
428:   if (bmat->diags) {
429:     for (i=0; i<mat->rmap->n/mat->rmap->bs; i++) {
430:       MatDestroy(&bmat->diags[i]);
431:     }
432:   }
433:   if (bmat->a) {
434:     for (i=0; i<bmat->nz; i++) {
435:       MatDestroy(&bmat->a[i]);
436:     }
437:   }
438:   MatSeqXAIJFreeAIJ(mat,(PetscScalar**)&bmat->a,&bmat->j,&bmat->i);
439:   PetscFree(mat->data);
440:   return 0;
441: }

443: static PetscErrorCode MatMult_BlockMat(Mat A,Vec x,Vec y)
444: {
445:   Mat_BlockMat   *bmat = (Mat_BlockMat*)A->data;
446:   PetscScalar    *xx,*yy;
447:   PetscInt       *aj,i,*ii,jrow,m = A->rmap->n/A->rmap->bs,bs = A->rmap->bs,n,j;
448:   Mat            *aa;

450:   /*
451:      Standard CSR multiply except each entry is a Mat
452:   */
453:   VecGetArray(x,&xx);

455:   VecSet(y,0.0);
456:   VecGetArray(y,&yy);
457:   aj   = bmat->j;
458:   aa   = bmat->a;
459:   ii   = bmat->i;
460:   for (i=0; i<m; i++) {
461:     jrow = ii[i];
462:     VecPlaceArray(bmat->left,yy + bs*i);
463:     n    = ii[i+1] - jrow;
464:     for (j=0; j<n; j++) {
465:       VecPlaceArray(bmat->right,xx + bs*aj[jrow]);
466:       MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left);
467:       VecResetArray(bmat->right);
468:       jrow++;
469:     }
470:     VecResetArray(bmat->left);
471:   }
472:   VecRestoreArray(x,&xx);
473:   VecRestoreArray(y,&yy);
474:   return 0;
475: }

477: PetscErrorCode MatMult_BlockMat_Symmetric(Mat A,Vec x,Vec y)
478: {
479:   Mat_BlockMat   *bmat = (Mat_BlockMat*)A->data;
480:   PetscScalar    *xx,*yy;
481:   PetscInt       *aj,i,*ii,jrow,m = A->rmap->n/A->rmap->bs,bs = A->rmap->bs,n,j;
482:   Mat            *aa;

484:   /*
485:      Standard CSR multiply except each entry is a Mat
486:   */
487:   VecGetArray(x,&xx);

489:   VecSet(y,0.0);
490:   VecGetArray(y,&yy);
491:   aj   = bmat->j;
492:   aa   = bmat->a;
493:   ii   = bmat->i;
494:   for (i=0; i<m; i++) {
495:     jrow = ii[i];
496:     n    = ii[i+1] - jrow;
497:     VecPlaceArray(bmat->left,yy + bs*i);
498:     VecPlaceArray(bmat->middle,xx + bs*i);
499:     /* if we ALWAYS required a diagonal entry then could remove this if test */
500:     if (aj[jrow] == i) {
501:       VecPlaceArray(bmat->right,xx + bs*aj[jrow]);
502:       MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left);
503:       VecResetArray(bmat->right);
504:       jrow++;
505:       n--;
506:     }
507:     for (j=0; j<n; j++) {
508:       VecPlaceArray(bmat->right,xx + bs*aj[jrow]);            /* upper triangular part */
509:       MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left);
510:       VecResetArray(bmat->right);

512:       VecPlaceArray(bmat->right,yy + bs*aj[jrow]);            /* lower triangular part */
513:       MatMultTransposeAdd(aa[jrow],bmat->middle,bmat->right,bmat->right);
514:       VecResetArray(bmat->right);
515:       jrow++;
516:     }
517:     VecResetArray(bmat->left);
518:     VecResetArray(bmat->middle);
519:   }
520:   VecRestoreArray(x,&xx);
521:   VecRestoreArray(y,&yy);
522:   return 0;
523: }

525: static PetscErrorCode MatMultAdd_BlockMat(Mat A,Vec x,Vec y,Vec z)
526: {
527:   return 0;
528: }

530: static PetscErrorCode MatMultTranspose_BlockMat(Mat A,Vec x,Vec y)
531: {
532:   return 0;
533: }

535: static PetscErrorCode MatMultTransposeAdd_BlockMat(Mat A,Vec x,Vec y,Vec z)
536: {
537:   return 0;
538: }

540: /*
541:      Adds diagonal pointers to sparse matrix structure.
542: */
543: static PetscErrorCode MatMarkDiagonal_BlockMat(Mat A)
544: {
545:   Mat_BlockMat   *a = (Mat_BlockMat*)A->data;
546:   PetscInt       i,j,mbs = A->rmap->n/A->rmap->bs;

548:   if (!a->diag) {
549:     PetscMalloc1(mbs,&a->diag);
550:   }
551:   for (i=0; i<mbs; i++) {
552:     a->diag[i] = a->i[i+1];
553:     for (j=a->i[i]; j<a->i[i+1]; j++) {
554:       if (a->j[j] == i) {
555:         a->diag[i] = j;
556:         break;
557:       }
558:     }
559:   }
560:   return 0;
561: }

563: static PetscErrorCode MatCreateSubMatrix_BlockMat(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
564: {
565:   Mat_BlockMat   *a = (Mat_BlockMat*)A->data;
566:   Mat_SeqAIJ     *c;
567:   PetscInt       i,k,first,step,lensi,nrows,ncols;
568:   PetscInt       *j_new,*i_new,*aj = a->j,*ailen = a->ilen;
569:   PetscScalar    *a_new;
570:   Mat            C,*aa = a->a;
571:   PetscBool      stride,equal;

573:   ISEqual(isrow,iscol,&equal);
575:   PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&stride);
577:   ISStrideGetInfo(iscol,&first,&step);

580:   ISGetLocalSize(isrow,&nrows);
581:   ncols = nrows;

583:   /* create submatrix */
584:   if (scall == MAT_REUSE_MATRIX) {
585:     PetscInt n_cols,n_rows;
586:     C    = *B;
587:     MatGetSize(C,&n_rows,&n_cols);
589:     MatZeroEntries(C);
590:   } else {
591:     MatCreate(PetscObjectComm((PetscObject)A),&C);
592:     MatSetSizes(C,nrows,ncols,PETSC_DETERMINE,PETSC_DETERMINE);
593:     if (A->symmetric) {
594:       MatSetType(C,MATSEQSBAIJ);
595:     } else {
596:       MatSetType(C,MATSEQAIJ);
597:     }
598:     MatSeqAIJSetPreallocation(C,0,ailen);
599:     MatSeqSBAIJSetPreallocation(C,1,0,ailen);
600:   }
601:   c = (Mat_SeqAIJ*)C->data;

603:   /* loop over rows inserting into submatrix */
604:   a_new = c->a;
605:   j_new = c->j;
606:   i_new = c->i;

608:   for (i=0; i<nrows; i++) {
609:     lensi = ailen[i];
610:     for (k=0; k<lensi; k++) {
611:       *j_new++ = *aj++;
612:       MatGetValue(*aa++,first,first,a_new++);
613:     }
614:     i_new[i+1] = i_new[i] + lensi;
615:     c->ilen[i] = lensi;
616:   }

618:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
619:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
620:   *B   = C;
621:   return 0;
622: }

624: static PetscErrorCode MatAssemblyEnd_BlockMat(Mat A,MatAssemblyType mode)
625: {
626:   Mat_BlockMat   *a = (Mat_BlockMat*)A->data;
627:   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
628:   PetscInt       m      = a->mbs,*ip,N,*ailen = a->ilen,rmax = 0;
629:   Mat            *aa    = a->a,*ap;

631:   if (mode == MAT_FLUSH_ASSEMBLY) return 0;

633:   if (m) rmax = ailen[0]; /* determine row with most nonzeros */
634:   for (i=1; i<m; i++) {
635:     /* move each row back by the amount of empty slots (fshift) before it*/
636:     fshift += imax[i-1] - ailen[i-1];
637:     rmax    = PetscMax(rmax,ailen[i]);
638:     if (fshift) {
639:       ip = aj + ai[i];
640:       ap = aa + ai[i];
641:       N  = ailen[i];
642:       for (j=0; j<N; j++) {
643:         ip[j-fshift] = ip[j];
644:         ap[j-fshift] = ap[j];
645:       }
646:     }
647:     ai[i] = ai[i-1] + ailen[i-1];
648:   }
649:   if (m) {
650:     fshift += imax[m-1] - ailen[m-1];
651:     ai[m]   = ai[m-1] + ailen[m-1];
652:   }
653:   /* reset ilen and imax for each row */
654:   for (i=0; i<m; i++) {
655:     ailen[i] = imax[i] = ai[i+1] - ai[i];
656:   }
657:   a->nz = ai[m];
658:   for (i=0; i<a->nz; i++) {
659:     PetscAssert(aa[i],PETSC_COMM_SELF,PETSC_ERR_PLIB,"Null matrix at location %" PetscInt_FMT " column %" PetscInt_FMT " nz %" PetscInt_FMT,i,aj[i],a->nz);
660:     MatAssemblyBegin(aa[i],MAT_FINAL_ASSEMBLY);
661:     MatAssemblyEnd(aa[i],MAT_FINAL_ASSEMBLY);
662:   }
663:   PetscInfo(A,"Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded,%" PetscInt_FMT " used\n",m,A->cmap->n/A->cmap->bs,fshift,a->nz);
664:   PetscInfo(A,"Number of mallocs during MatSetValues() is %" PetscInt_FMT "\n",a->reallocs);
665:   PetscInfo(A,"Maximum nonzeros in any row is %" PetscInt_FMT "\n",rmax);

667:   A->info.mallocs    += a->reallocs;
668:   a->reallocs         = 0;
669:   A->info.nz_unneeded = (double)fshift;
670:   a->rmax             = rmax;
671:   MatMarkDiagonal_BlockMat(A);
672:   return 0;
673: }

675: static PetscErrorCode MatSetOption_BlockMat(Mat A,MatOption opt,PetscBool flg)
676: {
677:   if (opt == MAT_SYMMETRIC && flg) {
678:     A->ops->sor  = MatSOR_BlockMat_Symmetric;
679:     A->ops->mult = MatMult_BlockMat_Symmetric;
680:   } else {
681:     PetscInfo(A,"Unused matrix option %s\n",MatOptions[opt]);
682:   }
683:   return 0;
684: }

686: static struct _MatOps MatOps_Values = {MatSetValues_BlockMat,
687:                                        NULL,
688:                                        NULL,
689:                                        MatMult_BlockMat,
690:                                /*  4*/ MatMultAdd_BlockMat,
691:                                        MatMultTranspose_BlockMat,
692:                                        MatMultTransposeAdd_BlockMat,
693:                                        NULL,
694:                                        NULL,
695:                                        NULL,
696:                                /* 10*/ NULL,
697:                                        NULL,
698:                                        NULL,
699:                                        MatSOR_BlockMat,
700:                                        NULL,
701:                                /* 15*/ NULL,
702:                                        NULL,
703:                                        NULL,
704:                                        NULL,
705:                                        NULL,
706:                                /* 20*/ NULL,
707:                                        MatAssemblyEnd_BlockMat,
708:                                        MatSetOption_BlockMat,
709:                                        NULL,
710:                                /* 24*/ NULL,
711:                                        NULL,
712:                                        NULL,
713:                                        NULL,
714:                                        NULL,
715:                                /* 29*/ NULL,
716:                                        NULL,
717:                                        NULL,
718:                                        NULL,
719:                                        NULL,
720:                                /* 34*/ NULL,
721:                                        NULL,
722:                                        NULL,
723:                                        NULL,
724:                                        NULL,
725:                                /* 39*/ NULL,
726:                                        NULL,
727:                                        NULL,
728:                                        NULL,
729:                                        NULL,
730:                                /* 44*/ NULL,
731:                                        NULL,
732:                                        MatShift_Basic,
733:                                        NULL,
734:                                        NULL,
735:                                /* 49*/ NULL,
736:                                        NULL,
737:                                        NULL,
738:                                        NULL,
739:                                        NULL,
740:                                /* 54*/ NULL,
741:                                        NULL,
742:                                        NULL,
743:                                        NULL,
744:                                        NULL,
745:                                /* 59*/ MatCreateSubMatrix_BlockMat,
746:                                        MatDestroy_BlockMat,
747:                                        MatView_BlockMat,
748:                                        NULL,
749:                                        NULL,
750:                                /* 64*/ NULL,
751:                                        NULL,
752:                                        NULL,
753:                                        NULL,
754:                                        NULL,
755:                                /* 69*/ NULL,
756:                                        NULL,
757:                                        NULL,
758:                                        NULL,
759:                                        NULL,
760:                                /* 74*/ NULL,
761:                                        NULL,
762:                                        NULL,
763:                                        NULL,
764:                                        NULL,
765:                                /* 79*/ NULL,
766:                                        NULL,
767:                                        NULL,
768:                                        NULL,
769:                                        MatLoad_BlockMat,
770:                                /* 84*/ NULL,
771:                                        NULL,
772:                                        NULL,
773:                                        NULL,
774:                                        NULL,
775:                                /* 89*/ NULL,
776:                                        NULL,
777:                                        NULL,
778:                                        NULL,
779:                                        NULL,
780:                                /* 94*/ NULL,
781:                                        NULL,
782:                                        NULL,
783:                                        NULL,
784:                                        NULL,
785:                                /* 99*/ NULL,
786:                                        NULL,
787:                                        NULL,
788:                                        NULL,
789:                                        NULL,
790:                                /*104*/ NULL,
791:                                        NULL,
792:                                        NULL,
793:                                        NULL,
794:                                        NULL,
795:                                /*109*/ NULL,
796:                                        NULL,
797:                                        NULL,
798:                                        NULL,
799:                                        NULL,
800:                                /*114*/ NULL,
801:                                        NULL,
802:                                        NULL,
803:                                        NULL,
804:                                        NULL,
805:                                /*119*/ NULL,
806:                                        NULL,
807:                                        NULL,
808:                                        NULL,
809:                                        NULL,
810:                                /*124*/ NULL,
811:                                        NULL,
812:                                        NULL,
813:                                        NULL,
814:                                        NULL,
815:                                /*129*/ NULL,
816:                                        NULL,
817:                                        NULL,
818:                                        NULL,
819:                                        NULL,
820:                                /*134*/ NULL,
821:                                        NULL,
822:                                        NULL,
823:                                        NULL,
824:                                        NULL,
825:                                /*139*/ NULL,
826:                                        NULL,
827:                                        NULL,
828:                                        NULL,
829:                                        NULL,
830:                                /*144*/ NULL,
831:                                        NULL,
832:                                        NULL,
833:                                        NULL
834: };

836: /*@C
837:    MatBlockMatSetPreallocation - For good matrix assembly performance
838:    the user should preallocate the matrix storage by setting the parameter nz
839:    (or the array nnz).  By setting these parameters accurately, performance
840:    during matrix assembly can be increased by more than a factor of 50.

842:    Collective

844:    Input Parameters:
845: +  B - The matrix
846: .  bs - size of each block in matrix
847: .  nz - number of nonzeros per block row (same for all rows)
848: -  nnz - array containing the number of nonzeros in the various block rows
849:          (possibly different for each row) or NULL

851:    Notes:
852:      If nnz is given then nz is ignored

854:    Specify the preallocated storage with either nz or nnz (not both).
855:    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
856:    allocation.  For large problems you MUST preallocate memory or you
857:    will get TERRIBLE performance, see the users' manual chapter on matrices.

859:    Level: intermediate

861: .seealso: MatCreate(), MatCreateBlockMat(), MatSetValues()

863: @*/
864: PetscErrorCode  MatBlockMatSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
865: {
866:   PetscTryMethod(B,"MatBlockMatSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));
867:   return 0;
868: }

870: static PetscErrorCode  MatBlockMatSetPreallocation_BlockMat(Mat A,PetscInt bs,PetscInt nz,PetscInt *nnz)
871: {
872:   Mat_BlockMat   *bmat = (Mat_BlockMat*)A->data;
873:   PetscInt       i;

875:   PetscLayoutSetBlockSize(A->rmap,bs);
876:   PetscLayoutSetBlockSize(A->cmap,bs);
877:   PetscLayoutSetUp(A->rmap);
878:   PetscLayoutSetUp(A->cmap);
879:   PetscLayoutGetBlockSize(A->rmap,&bs);

881:   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
883:   if (nnz) {
884:     for (i=0; i<A->rmap->n/bs; i++) {
887:     }
888:   }
889:   bmat->mbs = A->rmap->n/bs;

891:   VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs,NULL,&bmat->right);
892:   VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs,NULL,&bmat->middle);
893:   VecCreateSeq(PETSC_COMM_SELF,bs,&bmat->left);

895:   if (!bmat->imax) {
896:     PetscMalloc2(A->rmap->n,&bmat->imax,A->rmap->n,&bmat->ilen);
897:     PetscLogObjectMemory((PetscObject)A,2*A->rmap->n*sizeof(PetscInt));
898:   }
899:   if (PetscLikely(nnz)) {
900:     nz = 0;
901:     for (i=0; i<A->rmap->n/A->rmap->bs; i++) {
902:       bmat->imax[i] = nnz[i];
903:       nz           += nnz[i];
904:     }
905:   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Currently requires block row by row preallocation");

907:   /* bmat->ilen will count nonzeros in each row so far. */
908:   PetscArrayzero(bmat->ilen,bmat->mbs);

910:   /* allocate the matrix space */
911:   MatSeqXAIJFreeAIJ(A,(PetscScalar**)&bmat->a,&bmat->j,&bmat->i);
912:   PetscMalloc3(nz,&bmat->a,nz,&bmat->j,A->rmap->n+1,&bmat->i);
913:   PetscLogObjectMemory((PetscObject)A,(A->rmap->n+1)*sizeof(PetscInt)+nz*(sizeof(PetscScalar)+sizeof(PetscInt)));
914:   bmat->i[0] = 0;
915:   for (i=1; i<bmat->mbs+1; i++) {
916:     bmat->i[i] = bmat->i[i-1] + bmat->imax[i-1];
917:   }
918:   bmat->singlemalloc = PETSC_TRUE;
919:   bmat->free_a       = PETSC_TRUE;
920:   bmat->free_ij      = PETSC_TRUE;

922:   bmat->nz            = 0;
923:   bmat->maxnz         = nz;
924:   A->info.nz_unneeded = (double)bmat->maxnz;
925:   MatSetOption(A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
926:   return 0;
927: }

929: /*MC
930:    MATBLOCKMAT - A matrix that is defined by a set of Mat's that represents a sparse block matrix
931:                  consisting of (usually) sparse blocks.

933:   Level: advanced

935: .seealso: MatCreateBlockMat()

937: M*/

939: PETSC_EXTERN PetscErrorCode MatCreate_BlockMat(Mat A)
940: {
941:   Mat_BlockMat   *b;

943:   PetscNewLog(A,&b);
944:   A->data = (void*)b;
945:   PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));

947:   A->assembled    = PETSC_TRUE;
948:   A->preallocated = PETSC_FALSE;
949:   PetscObjectChangeTypeName((PetscObject)A,MATBLOCKMAT);

951:   PetscObjectComposeFunction((PetscObject)A,"MatBlockMatSetPreallocation_C",MatBlockMatSetPreallocation_BlockMat);
952:   return 0;
953: }

955: /*@C
956:    MatCreateBlockMat - Creates a new matrix in which each block contains a uniform-size sequential Mat object

958:   Collective

960:    Input Parameters:
961: +  comm - MPI communicator
962: .  m - number of rows
963: .  n  - number of columns
964: .  bs - size of each submatrix
965: .  nz  - expected maximum number of nonzero blocks in row (use PETSC_DEFAULT if not known)
966: -  nnz - expected number of nonzers per block row if known (use NULL otherwise)

968:    Output Parameter:
969: .  A - the matrix

971:    Level: intermediate

973:    Notes:
974:     Matrices of this type are nominally-sparse matrices in which each "entry" is a Mat object.  Each Mat must
975:    have the same size and be sequential.  The local and global sizes must be compatible with this decomposition.

977:    For matrices containing parallel submatrices and variable block sizes, see MATNEST.

979: .seealso: MATBLOCKMAT, MatCreateNest()
980: @*/
981: PetscErrorCode  MatCreateBlockMat(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt bs,PetscInt nz,PetscInt *nnz, Mat *A)
982: {
983:   MatCreate(comm,A);
984:   MatSetSizes(*A,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
985:   MatSetType(*A,MATBLOCKMAT);
986:   MatBlockMatSetPreallocation(*A,bs,nz,nnz);
987:   return 0;
988: }