Actual source code: blockmat.c


  2: /*
  3:    This provides a matrix that consists of Mats
  4: */

  6: #include <petsc/private/matimpl.h>
  7: #include <../src/mat/impls/baij/seq/baij.h>

  9: typedef struct {
 10:   SEQAIJHEADER(Mat);
 11:   SEQBAIJHEADER;
 12:   Mat *diags;

 14:   Vec left,right,middle,workb;                 /* dummy vectors to perform local parts of product */
 15: } Mat_BlockMat;

 17: static PetscErrorCode MatSOR_BlockMat_Symmetric(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
 18: {
 19:   Mat_BlockMat      *a = (Mat_BlockMat*)A->data;
 20:   PetscScalar       *x;
 21:   const Mat         *v;
 22:   const PetscScalar *b;
 23:   PetscErrorCode    ierr;
 24:   PetscInt          n = A->cmap->n,i,mbs = n/A->rmap->bs,j,bs = A->rmap->bs;
 25:   const PetscInt    *idx;
 26:   IS                row,col;
 27:   MatFactorInfo     info;
 28:   Vec               left = a->left,right = a->right, middle = a->middle;
 29:   Mat               *diag;

 32:   its = its*lits;
 33:   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
 34:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
 35:   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for omega not equal to 1.0");
 36:   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for fshift");
 37:   if ((flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) && !(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)) {
 38:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot do backward sweep without forward sweep");
 39:   }

 41:   if (!a->diags) {
 42:     PetscMalloc1(mbs,&a->diags);
 43:     MatFactorInfoInitialize(&info);
 44:     for (i=0; i<mbs; i++) {
 45:       MatGetOrdering(a->a[a->diag[i]], MATORDERINGND,&row,&col);
 46:       MatCholeskyFactorSymbolic(a->diags[i],a->a[a->diag[i]],row,&info);
 47:       MatCholeskyFactorNumeric(a->diags[i],a->a[a->diag[i]],&info);
 48:       ISDestroy(&row);
 49:       ISDestroy(&col);
 50:     }
 51:     VecDuplicate(bb,&a->workb);
 52:   }
 53:   diag = a->diags;

 55:   VecSet(xx,0.0);
 56:   VecGetArray(xx,&x);
 57:   /* copy right hand side because it must be modified during iteration */
 58:   VecCopy(bb,a->workb);
 59:   VecGetArrayRead(a->workb,&b);

 61:   /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
 62:   while (its--) {
 63:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {

 65:       for (i=0; i<mbs; i++) {
 66:         n   = a->i[i+1] - a->i[i] - 1;
 67:         idx = a->j + a->i[i] + 1;
 68:         v   = a->a + a->i[i] + 1;

 70:         VecSet(left,0.0);
 71:         for (j=0; j<n; j++) {
 72:           VecPlaceArray(right,x + idx[j]*bs);
 73:           MatMultAdd(v[j],right,left,left);
 74:           VecResetArray(right);
 75:         }
 76:         VecPlaceArray(right,b + i*bs);
 77:         VecAYPX(left,-1.0,right);
 78:         VecResetArray(right);

 80:         VecPlaceArray(right,x + i*bs);
 81:         MatSolve(diag[i],left,right);

 83:         /* now adjust right hand side, see MatSOR_SeqSBAIJ */
 84:         for (j=0; j<n; j++) {
 85:           MatMultTranspose(v[j],right,left);
 86:           VecPlaceArray(middle,b + idx[j]*bs);
 87:           VecAXPY(middle,-1.0,left);
 88:           VecResetArray(middle);
 89:         }
 90:         VecResetArray(right);

 92:       }
 93:     }
 94:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {

 96:       for (i=mbs-1; i>=0; i--) {
 97:         n   = a->i[i+1] - a->i[i] - 1;
 98:         idx = a->j + a->i[i] + 1;
 99:         v   = a->a + a->i[i] + 1;

101:         VecSet(left,0.0);
102:         for (j=0; j<n; j++) {
103:           VecPlaceArray(right,x + idx[j]*bs);
104:           MatMultAdd(v[j],right,left,left);
105:           VecResetArray(right);
106:         }
107:         VecPlaceArray(right,b + i*bs);
108:         VecAYPX(left,-1.0,right);
109:         VecResetArray(right);

111:         VecPlaceArray(right,x + i*bs);
112:         MatSolve(diag[i],left,right);
113:         VecResetArray(right);

115:       }
116:     }
117:   }
118:   VecRestoreArray(xx,&x);
119:   VecRestoreArrayRead(a->workb,&b);
120:   return(0);
121: }

123: static PetscErrorCode MatSOR_BlockMat(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
124: {
125:   Mat_BlockMat      *a = (Mat_BlockMat*)A->data;
126:   PetscScalar       *x;
127:   const Mat         *v;
128:   const PetscScalar *b;
129:   PetscErrorCode    ierr;
130:   PetscInt          n = A->cmap->n,i,mbs = n/A->rmap->bs,j,bs = A->rmap->bs;
131:   const PetscInt    *idx;
132:   IS                row,col;
133:   MatFactorInfo     info;
134:   Vec               left = a->left,right = a->right;
135:   Mat               *diag;

138:   its = its*lits;
139:   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
140:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
141:   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for omega not equal to 1.0");
142:   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for fshift");

144:   if (!a->diags) {
145:     PetscMalloc1(mbs,&a->diags);
146:     MatFactorInfoInitialize(&info);
147:     for (i=0; i<mbs; i++) {
148:       MatGetOrdering(a->a[a->diag[i]], MATORDERINGND,&row,&col);
149:       MatLUFactorSymbolic(a->diags[i],a->a[a->diag[i]],row,col,&info);
150:       MatLUFactorNumeric(a->diags[i],a->a[a->diag[i]],&info);
151:       ISDestroy(&row);
152:       ISDestroy(&col);
153:     }
154:   }
155:   diag = a->diags;

157:   VecSet(xx,0.0);
158:   VecGetArray(xx,&x);
159:   VecGetArrayRead(bb,&b);

161:   /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
162:   while (its--) {
163:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {

165:       for (i=0; i<mbs; i++) {
166:         n   = a->i[i+1] - a->i[i];
167:         idx = a->j + a->i[i];
168:         v   = a->a + a->i[i];

170:         VecSet(left,0.0);
171:         for (j=0; j<n; j++) {
172:           if (idx[j] != i) {
173:             VecPlaceArray(right,x + idx[j]*bs);
174:             MatMultAdd(v[j],right,left,left);
175:             VecResetArray(right);
176:           }
177:         }
178:         VecPlaceArray(right,b + i*bs);
179:         VecAYPX(left,-1.0,right);
180:         VecResetArray(right);

182:         VecPlaceArray(right,x + i*bs);
183:         MatSolve(diag[i],left,right);
184:         VecResetArray(right);
185:       }
186:     }
187:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {

189:       for (i=mbs-1; i>=0; i--) {
190:         n   = a->i[i+1] - a->i[i];
191:         idx = a->j + a->i[i];
192:         v   = a->a + a->i[i];

194:         VecSet(left,0.0);
195:         for (j=0; j<n; j++) {
196:           if (idx[j] != i) {
197:             VecPlaceArray(right,x + idx[j]*bs);
198:             MatMultAdd(v[j],right,left,left);
199:             VecResetArray(right);
200:           }
201:         }
202:         VecPlaceArray(right,b + i*bs);
203:         VecAYPX(left,-1.0,right);
204:         VecResetArray(right);

206:         VecPlaceArray(right,x + i*bs);
207:         MatSolve(diag[i],left,right);
208:         VecResetArray(right);

210:       }
211:     }
212:   }
213:   VecRestoreArray(xx,&x);
214:   VecRestoreArrayRead(bb,&b);
215:   return(0);
216: }

218: static PetscErrorCode MatSetValues_BlockMat(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
219: {
220:   Mat_BlockMat   *a = (Mat_BlockMat*)A->data;
221:   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
222:   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
223:   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
225:   PetscInt       ridx,cidx;
226:   PetscBool      roworiented=a->roworiented;
227:   MatScalar      value;
228:   Mat            *ap,*aa = a->a;

231:   for (k=0; k<m; k++) { /* loop over added rows */
232:     row  = im[k];
233:     brow = row/bs;
234:     if (row < 0) continue;
235:     if (PetscUnlikelyDebug(row >= A->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
236:     rp   = aj + ai[brow];
237:     ap   = aa + ai[brow];
238:     rmax = imax[brow];
239:     nrow = ailen[brow];
240:     low  = 0;
241:     high = nrow;
242:     for (l=0; l<n; l++) { /* loop over added columns */
243:       if (in[l] < 0) continue;
244:       if (PetscUnlikelyDebug(in[l] >= A->cmap->n)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
245:       col = in[l]; bcol = col/bs;
246:       if (A->symmetric && brow > bcol) continue;
247:       ridx = row % bs; cidx = col % bs;
248:       if (roworiented) value = v[l + k*n];
249:       else value = v[k + l*m];

251:       if (col <= lastcol) low = 0;
252:       else high = nrow;
253:       lastcol = col;
254:       while (high-low > 7) {
255:         t = (low+high)/2;
256:         if (rp[t] > bcol) high = t;
257:         else              low  = t;
258:       }
259:       for (i=low; i<high; i++) {
260:         if (rp[i] > bcol) break;
261:         if (rp[i] == bcol) goto noinsert1;
262:       }
263:       if (nonew == 1) goto noinsert1;
264:       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
265:       MatSeqXAIJReallocateAIJ(A,a->mbs,1,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,Mat);
266:       N = nrow++ - 1; high++;
267:       /* shift up all the later entries in this row */
268:       for (ii=N; ii>=i; ii--) {
269:         rp[ii+1] = rp[ii];
270:         ap[ii+1] = ap[ii];
271:       }
272:       if (N>=i) ap[i] = NULL;
273:       rp[i] = bcol;
274:       a->nz++;
275:       A->nonzerostate++;
276: noinsert1:;
277:       if (!*(ap+i)) {
278:         MatCreateSeqAIJ(PETSC_COMM_SELF,bs,bs,0,NULL,ap+i);
279:       }
280:       MatSetValues(ap[i],1,&ridx,1,&cidx,&value,is);
281:       low  = i;
282:     }
283:     ailen[brow] = nrow;
284:   }
285:   return(0);
286: }

288: static PetscErrorCode MatLoad_BlockMat(Mat newmat, PetscViewer viewer)
289: {
290:   PetscErrorCode    ierr;
291:   Mat               tmpA;
292:   PetscInt          i,j,m,n,bs = 1,ncols,*lens,currentcol,mbs,**ii,*ilens,nextcol,*llens,cnt = 0;
293:   const PetscInt    *cols;
294:   const PetscScalar *values;
295:   PetscBool         flg = PETSC_FALSE,notdone;
296:   Mat_SeqAIJ        *a;
297:   Mat_BlockMat      *amat;

300:   /* force binary viewer to load .info file if it has not yet done so */
301:   PetscViewerSetUp(viewer);
302:   MatCreate(PETSC_COMM_SELF,&tmpA);
303:   MatSetType(tmpA,MATSEQAIJ);
304:   MatLoad_SeqAIJ(tmpA,viewer);

306:   MatGetLocalSize(tmpA,&m,&n);
307:   PetscOptionsBegin(PETSC_COMM_SELF,NULL,"Options for loading BlockMat matrix 1","Mat");
308:   PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);
309:   PetscOptionsBool("-matload_symmetric","Store the matrix as symmetric","MatLoad",flg,&flg,NULL);
310:   PetscOptionsEnd();

312:   /* Determine number of nonzero blocks for each block row */
313:   a    = (Mat_SeqAIJ*) tmpA->data;
314:   mbs  = m/bs;
315:   PetscMalloc3(mbs,&lens,bs,&ii,bs,&ilens);
316:   PetscArrayzero(lens,mbs);

318:   for (i=0; i<mbs; i++) {
319:     for (j=0; j<bs; j++) {
320:       ii[j]    = a->j + a->i[i*bs + j];
321:       ilens[j] = a->i[i*bs + j + 1] - a->i[i*bs + j];
322:     }

324:     currentcol = -1;
325:     while (PETSC_TRUE) {
326:       notdone = PETSC_FALSE;
327:       nextcol = 1000000000;
328:       for (j=0; j<bs; j++) {
329:         while ((ilens[j] > 0 && ii[j][0]/bs <= currentcol)) {
330:           ii[j]++;
331:           ilens[j]--;
332:         }
333:         if (ilens[j] > 0) {
334:           notdone = PETSC_TRUE;
335:           nextcol = PetscMin(nextcol,ii[j][0]/bs);
336:         }
337:       }
338:       if (!notdone) break;
339:       if (!flg || (nextcol >= i)) lens[i]++;
340:       currentcol = nextcol;
341:     }
342:   }

344:   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
345:     MatSetSizes(newmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
346:   }
347:   MatBlockMatSetPreallocation(newmat,bs,0,lens);
348:   if (flg) {
349:     MatSetOption(newmat,MAT_SYMMETRIC,PETSC_TRUE);
350:   }
351:   amat = (Mat_BlockMat*)(newmat)->data;

353:   /* preallocate the submatrices */
354:   PetscMalloc1(bs,&llens);
355:   for (i=0; i<mbs; i++) { /* loops for block rows */
356:     for (j=0; j<bs; j++) {
357:       ii[j]    = a->j + a->i[i*bs + j];
358:       ilens[j] = a->i[i*bs + j + 1] - a->i[i*bs + j];
359:     }

361:     currentcol = 1000000000;
362:     for (j=0; j<bs; j++) { /* loop over rows in block finding first nonzero block */
363:       if (ilens[j] > 0) {
364:         currentcol = PetscMin(currentcol,ii[j][0]/bs);
365:       }
366:     }

368:     while (PETSC_TRUE) {  /* loops over blocks in block row */
369:       notdone = PETSC_FALSE;
370:       nextcol = 1000000000;
371:       PetscArrayzero(llens,bs);
372:       for (j=0; j<bs; j++) { /* loop over rows in block */
373:         while ((ilens[j] > 0 && ii[j][0]/bs <= currentcol)) { /* loop over columns in row */
374:           ii[j]++;
375:           ilens[j]--;
376:           llens[j]++;
377:         }
378:         if (ilens[j] > 0) {
379:           notdone = PETSC_TRUE;
380:           nextcol = PetscMin(nextcol,ii[j][0]/bs);
381:         }
382:       }
383:       if (cnt >= amat->maxnz) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of blocks found greater than expected %D",cnt);
384:       if (!flg || currentcol >= i) {
385:         amat->j[cnt] = currentcol;
386:         MatCreateSeqAIJ(PETSC_COMM_SELF,bs,bs,0,llens,amat->a+cnt++);
387:       }

389:       if (!notdone) break;
390:       currentcol = nextcol;
391:     }
392:     amat->ilen[i] = lens[i];
393:   }

395:   PetscFree3(lens,ii,ilens);
396:   PetscFree(llens);

398:   /* copy over the matrix, one row at a time */
399:   for (i=0; i<m; i++) {
400:     MatGetRow(tmpA,i,&ncols,&cols,&values);
401:     MatSetValues(newmat,1,&i,ncols,cols,values,INSERT_VALUES);
402:     MatRestoreRow(tmpA,i,&ncols,&cols,&values);
403:   }
404:   MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
405:   MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
406:   return(0);
407: }

409: static PetscErrorCode MatView_BlockMat(Mat A,PetscViewer viewer)
410: {
411:   Mat_BlockMat      *a = (Mat_BlockMat*)A->data;
412:   PetscErrorCode    ierr;
413:   const char        *name;
414:   PetscViewerFormat format;

417:   PetscObjectGetName((PetscObject)A,&name);
418:   PetscViewerGetFormat(viewer,&format);
419:   if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) {
420:     PetscViewerASCIIPrintf(viewer,"Nonzero block matrices = %D \n",a->nz);
421:     if (A->symmetric) {
422:       PetscViewerASCIIPrintf(viewer,"Only upper triangular part of symmetric matrix is stored\n");
423:     }
424:   }
425:   return(0);
426: }

428: static PetscErrorCode MatDestroy_BlockMat(Mat mat)
429: {
431:   Mat_BlockMat   *bmat = (Mat_BlockMat*)mat->data;
432:   PetscInt       i;

435:   VecDestroy(&bmat->right);
436:   VecDestroy(&bmat->left);
437:   VecDestroy(&bmat->middle);
438:   VecDestroy(&bmat->workb);
439:   if (bmat->diags) {
440:     for (i=0; i<mat->rmap->n/mat->rmap->bs; i++) {
441:       MatDestroy(&bmat->diags[i]);
442:     }
443:   }
444:   if (bmat->a) {
445:     for (i=0; i<bmat->nz; i++) {
446:       MatDestroy(&bmat->a[i]);
447:     }
448:   }
449:   MatSeqXAIJFreeAIJ(mat,(PetscScalar**)&bmat->a,&bmat->j,&bmat->i);
450:   PetscFree(mat->data);
451:   return(0);
452: }

454: static PetscErrorCode MatMult_BlockMat(Mat A,Vec x,Vec y)
455: {
456:   Mat_BlockMat   *bmat = (Mat_BlockMat*)A->data;
458:   PetscScalar    *xx,*yy;
459:   PetscInt       *aj,i,*ii,jrow,m = A->rmap->n/A->rmap->bs,bs = A->rmap->bs,n,j;
460:   Mat            *aa;

463:   /*
464:      Standard CSR multiply except each entry is a Mat
465:   */
466:   VecGetArray(x,&xx);

468:   VecSet(y,0.0);
469:   VecGetArray(y,&yy);
470:   aj   = bmat->j;
471:   aa   = bmat->a;
472:   ii   = bmat->i;
473:   for (i=0; i<m; i++) {
474:     jrow = ii[i];
475:     VecPlaceArray(bmat->left,yy + bs*i);
476:     n    = ii[i+1] - jrow;
477:     for (j=0; j<n; j++) {
478:       VecPlaceArray(bmat->right,xx + bs*aj[jrow]);
479:       MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left);
480:       VecResetArray(bmat->right);
481:       jrow++;
482:     }
483:     VecResetArray(bmat->left);
484:   }
485:   VecRestoreArray(x,&xx);
486:   VecRestoreArray(y,&yy);
487:   return(0);
488: }

490: PetscErrorCode MatMult_BlockMat_Symmetric(Mat A,Vec x,Vec y)
491: {
492:   Mat_BlockMat   *bmat = (Mat_BlockMat*)A->data;
494:   PetscScalar    *xx,*yy;
495:   PetscInt       *aj,i,*ii,jrow,m = A->rmap->n/A->rmap->bs,bs = A->rmap->bs,n,j;
496:   Mat            *aa;

499:   /*
500:      Standard CSR multiply except each entry is a Mat
501:   */
502:   VecGetArray(x,&xx);

504:   VecSet(y,0.0);
505:   VecGetArray(y,&yy);
506:   aj   = bmat->j;
507:   aa   = bmat->a;
508:   ii   = bmat->i;
509:   for (i=0; i<m; i++) {
510:     jrow = ii[i];
511:     n    = ii[i+1] - jrow;
512:     VecPlaceArray(bmat->left,yy + bs*i);
513:     VecPlaceArray(bmat->middle,xx + bs*i);
514:     /* if we ALWAYS required a diagonal entry then could remove this if test */
515:     if (aj[jrow] == i) {
516:       VecPlaceArray(bmat->right,xx + bs*aj[jrow]);
517:       MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left);
518:       VecResetArray(bmat->right);
519:       jrow++;
520:       n--;
521:     }
522:     for (j=0; j<n; j++) {
523:       VecPlaceArray(bmat->right,xx + bs*aj[jrow]);            /* upper triangular part */
524:       MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left);
525:       VecResetArray(bmat->right);

527:       VecPlaceArray(bmat->right,yy + bs*aj[jrow]);            /* lower triangular part */
528:       MatMultTransposeAdd(aa[jrow],bmat->middle,bmat->right,bmat->right);
529:       VecResetArray(bmat->right);
530:       jrow++;
531:     }
532:     VecResetArray(bmat->left);
533:     VecResetArray(bmat->middle);
534:   }
535:   VecRestoreArray(x,&xx);
536:   VecRestoreArray(y,&yy);
537:   return(0);
538: }

540: static PetscErrorCode MatMultAdd_BlockMat(Mat A,Vec x,Vec y,Vec z)
541: {
543:   return(0);
544: }

546: static PetscErrorCode MatMultTranspose_BlockMat(Mat A,Vec x,Vec y)
547: {
549:   return(0);
550: }

552: static PetscErrorCode MatMultTransposeAdd_BlockMat(Mat A,Vec x,Vec y,Vec z)
553: {
555:   return(0);
556: }

558: /*
559:      Adds diagonal pointers to sparse matrix structure.
560: */
561: static PetscErrorCode MatMarkDiagonal_BlockMat(Mat A)
562: {
563:   Mat_BlockMat   *a = (Mat_BlockMat*)A->data;
565:   PetscInt       i,j,mbs = A->rmap->n/A->rmap->bs;

568:   if (!a->diag) {
569:     PetscMalloc1(mbs,&a->diag);
570:   }
571:   for (i=0; i<mbs; i++) {
572:     a->diag[i] = a->i[i+1];
573:     for (j=a->i[i]; j<a->i[i+1]; j++) {
574:       if (a->j[j] == i) {
575:         a->diag[i] = j;
576:         break;
577:       }
578:     }
579:   }
580:   return(0);
581: }

583: static PetscErrorCode MatCreateSubMatrix_BlockMat(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
584: {
585:   Mat_BlockMat   *a = (Mat_BlockMat*)A->data;
586:   Mat_SeqAIJ     *c;
588:   PetscInt       i,k,first,step,lensi,nrows,ncols;
589:   PetscInt       *j_new,*i_new,*aj = a->j,*ailen = a->ilen;
590:   PetscScalar    *a_new;
591:   Mat            C,*aa = a->a;
592:   PetscBool      stride,equal;

595:   ISEqual(isrow,iscol,&equal);
596:   if (!equal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only for identical column and row indices");
597:   PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&stride);
598:   if (!stride) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only for stride indices");
599:   ISStrideGetInfo(iscol,&first,&step);
600:   if (step != A->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only select one entry from each block");

602:   ISGetLocalSize(isrow,&nrows);
603:   ncols = nrows;

605:   /* create submatrix */
606:   if (scall == MAT_REUSE_MATRIX) {
607:     PetscInt n_cols,n_rows;
608:     C    = *B;
609:     MatGetSize(C,&n_rows,&n_cols);
610:     if (n_rows != nrows || n_cols != ncols) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size");
611:     MatZeroEntries(C);
612:   } else {
613:     MatCreate(PetscObjectComm((PetscObject)A),&C);
614:     MatSetSizes(C,nrows,ncols,PETSC_DETERMINE,PETSC_DETERMINE);
615:     if (A->symmetric) {
616:       MatSetType(C,MATSEQSBAIJ);
617:     } else {
618:       MatSetType(C,MATSEQAIJ);
619:     }
620:     MatSeqAIJSetPreallocation(C,0,ailen);
621:     MatSeqSBAIJSetPreallocation(C,1,0,ailen);
622:   }
623:   c = (Mat_SeqAIJ*)C->data;

625:   /* loop over rows inserting into submatrix */
626:   a_new = c->a;
627:   j_new = c->j;
628:   i_new = c->i;

630:   for (i=0; i<nrows; i++) {
631:     lensi = ailen[i];
632:     for (k=0; k<lensi; k++) {
633:       *j_new++ = *aj++;
634:       MatGetValue(*aa++,first,first,a_new++);
635:     }
636:     i_new[i+1] = i_new[i] + lensi;
637:     c->ilen[i] = lensi;
638:   }

640:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
641:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
642:   *B   = C;
643:   return(0);
644: }

646: static PetscErrorCode MatAssemblyEnd_BlockMat(Mat A,MatAssemblyType mode)
647: {
648:   Mat_BlockMat   *a = (Mat_BlockMat*)A->data;
650:   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
651:   PetscInt       m      = a->mbs,*ip,N,*ailen = a->ilen,rmax = 0;
652:   Mat            *aa    = a->a,*ap;

655:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);

657:   if (m) rmax = ailen[0]; /* determine row with most nonzeros */
658:   for (i=1; i<m; i++) {
659:     /* move each row back by the amount of empty slots (fshift) before it*/
660:     fshift += imax[i-1] - ailen[i-1];
661:     rmax    = PetscMax(rmax,ailen[i]);
662:     if (fshift) {
663:       ip = aj + ai[i];
664:       ap = aa + ai[i];
665:       N  = ailen[i];
666:       for (j=0; j<N; j++) {
667:         ip[j-fshift] = ip[j];
668:         ap[j-fshift] = ap[j];
669:       }
670:     }
671:     ai[i] = ai[i-1] + ailen[i-1];
672:   }
673:   if (m) {
674:     fshift += imax[m-1] - ailen[m-1];
675:     ai[m]   = ai[m-1] + ailen[m-1];
676:   }
677:   /* reset ilen and imax for each row */
678:   for (i=0; i<m; i++) {
679:     ailen[i] = imax[i] = ai[i+1] - ai[i];
680:   }
681:   a->nz = ai[m];
682:   for (i=0; i<a->nz; i++) {
683:     if (PetscUnlikelyDebug(!aa[i])) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Null matrix at location %D column %D nz %D",i,aj[i],a->nz);
684:     MatAssemblyBegin(aa[i],MAT_FINAL_ASSEMBLY);
685:     MatAssemblyEnd(aa[i],MAT_FINAL_ASSEMBLY);
686:   }
687:   PetscInfo4(A,"Matrix size: %D X %D; storage space: %D unneeded,%D used\n",m,A->cmap->n/A->cmap->bs,fshift,a->nz);
688:   PetscInfo1(A,"Number of mallocs during MatSetValues() is %D\n",a->reallocs);
689:   PetscInfo1(A,"Maximum nonzeros in any row is %D\n",rmax);

691:   A->info.mallocs    += a->reallocs;
692:   a->reallocs         = 0;
693:   A->info.nz_unneeded = (double)fshift;
694:   a->rmax             = rmax;
695:   MatMarkDiagonal_BlockMat(A);
696:   return(0);
697: }

699: static PetscErrorCode MatSetOption_BlockMat(Mat A,MatOption opt,PetscBool flg)
700: {
703:   if (opt == MAT_SYMMETRIC && flg) {
704:     A->ops->sor  = MatSOR_BlockMat_Symmetric;
705:     A->ops->mult = MatMult_BlockMat_Symmetric;
706:   } else {
707:     PetscInfo1(A,"Unused matrix option %s\n",MatOptions[opt]);
708:   }
709:   return(0);
710: }

712: static struct _MatOps MatOps_Values = {MatSetValues_BlockMat,
713:                                        NULL,
714:                                        NULL,
715:                                        MatMult_BlockMat,
716:                                /*  4*/ MatMultAdd_BlockMat,
717:                                        MatMultTranspose_BlockMat,
718:                                        MatMultTransposeAdd_BlockMat,
719:                                        NULL,
720:                                        NULL,
721:                                        NULL,
722:                                /* 10*/ NULL,
723:                                        NULL,
724:                                        NULL,
725:                                        MatSOR_BlockMat,
726:                                        NULL,
727:                                /* 15*/ NULL,
728:                                        NULL,
729:                                        NULL,
730:                                        NULL,
731:                                        NULL,
732:                                /* 20*/ NULL,
733:                                        MatAssemblyEnd_BlockMat,
734:                                        MatSetOption_BlockMat,
735:                                        NULL,
736:                                /* 24*/ NULL,
737:                                        NULL,
738:                                        NULL,
739:                                        NULL,
740:                                        NULL,
741:                                /* 29*/ NULL,
742:                                        NULL,
743:                                        NULL,
744:                                        NULL,
745:                                        NULL,
746:                                /* 34*/ NULL,
747:                                        NULL,
748:                                        NULL,
749:                                        NULL,
750:                                        NULL,
751:                                /* 39*/ NULL,
752:                                        NULL,
753:                                        NULL,
754:                                        NULL,
755:                                        NULL,
756:                                /* 44*/ NULL,
757:                                        NULL,
758:                                        MatShift_Basic,
759:                                        NULL,
760:                                        NULL,
761:                                /* 49*/ NULL,
762:                                        NULL,
763:                                        NULL,
764:                                        NULL,
765:                                        NULL,
766:                                /* 54*/ NULL,
767:                                        NULL,
768:                                        NULL,
769:                                        NULL,
770:                                        NULL,
771:                                /* 59*/ MatCreateSubMatrix_BlockMat,
772:                                        MatDestroy_BlockMat,
773:                                        MatView_BlockMat,
774:                                        NULL,
775:                                        NULL,
776:                                /* 64*/ NULL,
777:                                        NULL,
778:                                        NULL,
779:                                        NULL,
780:                                        NULL,
781:                                /* 69*/ NULL,
782:                                        NULL,
783:                                        NULL,
784:                                        NULL,
785:                                        NULL,
786:                                /* 74*/ NULL,
787:                                        NULL,
788:                                        NULL,
789:                                        NULL,
790:                                        NULL,
791:                                /* 79*/ NULL,
792:                                        NULL,
793:                                        NULL,
794:                                        NULL,
795:                                        MatLoad_BlockMat,
796:                                /* 84*/ NULL,
797:                                        NULL,
798:                                        NULL,
799:                                        NULL,
800:                                        NULL,
801:                                /* 89*/ NULL,
802:                                        NULL,
803:                                        NULL,
804:                                        NULL,
805:                                        NULL,
806:                                /* 94*/ NULL,
807:                                        NULL,
808:                                        NULL,
809:                                        NULL,
810:                                        NULL,
811:                                /* 99*/ NULL,
812:                                        NULL,
813:                                        NULL,
814:                                        NULL,
815:                                        NULL,
816:                                /*104*/ NULL,
817:                                        NULL,
818:                                        NULL,
819:                                        NULL,
820:                                        NULL,
821:                                /*109*/ NULL,
822:                                        NULL,
823:                                        NULL,
824:                                        NULL,
825:                                        NULL,
826:                                /*114*/ NULL,
827:                                        NULL,
828:                                        NULL,
829:                                        NULL,
830:                                        NULL,
831:                                /*119*/ NULL,
832:                                        NULL,
833:                                        NULL,
834:                                        NULL,
835:                                        NULL,
836:                                /*124*/ NULL,
837:                                        NULL,
838:                                        NULL,
839:                                        NULL,
840:                                        NULL,
841:                                /*129*/ NULL,
842:                                        NULL,
843:                                        NULL,
844:                                        NULL,
845:                                        NULL,
846:                                /*134*/ NULL,
847:                                        NULL,
848:                                        NULL,
849:                                        NULL,
850:                                        NULL,
851:                                /*139*/ NULL,
852:                                        NULL,
853:                                        NULL
854: };

856: /*@C
857:    MatBlockMatSetPreallocation - For good matrix assembly performance
858:    the user should preallocate the matrix storage by setting the parameter nz
859:    (or the array nnz).  By setting these parameters accurately, performance
860:    during matrix assembly can be increased by more than a factor of 50.

862:    Collective

864:    Input Parameters:
865: +  B - The matrix
866: .  bs - size of each block in matrix
867: .  nz - number of nonzeros per block row (same for all rows)
868: -  nnz - array containing the number of nonzeros in the various block rows
869:          (possibly different for each row) or NULL

871:    Notes:
872:      If nnz is given then nz is ignored

874:    Specify the preallocated storage with either nz or nnz (not both).
875:    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
876:    allocation.  For large problems you MUST preallocate memory or you
877:    will get TERRIBLE performance, see the users' manual chapter on matrices.

879:    Level: intermediate

881: .seealso: MatCreate(), MatCreateBlockMat(), MatSetValues()

883: @*/
884: PetscErrorCode  MatBlockMatSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
885: {

889:   PetscTryMethod(B,"MatBlockMatSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));
890:   return(0);
891: }

893: static PetscErrorCode  MatBlockMatSetPreallocation_BlockMat(Mat A,PetscInt bs,PetscInt nz,PetscInt *nnz)
894: {
895:   Mat_BlockMat   *bmat = (Mat_BlockMat*)A->data;
897:   PetscInt       i;

900:   PetscLayoutSetBlockSize(A->rmap,bs);
901:   PetscLayoutSetBlockSize(A->cmap,bs);
902:   PetscLayoutSetUp(A->rmap);
903:   PetscLayoutSetUp(A->cmap);
904:   PetscLayoutGetBlockSize(A->rmap,&bs);

906:   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
907:   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
908:   if (nnz) {
909:     for (i=0; i<A->rmap->n/bs; i++) {
910:       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
911:       if (nnz[i] > A->cmap->n/bs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than row length: local row %d value %d rowlength %d",i,nnz[i],A->cmap->n/bs);
912:     }
913:   }
914:   bmat->mbs = A->rmap->n/bs;

916:   VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs,NULL,&bmat->right);
917:   VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs,NULL,&bmat->middle);
918:   VecCreateSeq(PETSC_COMM_SELF,bs,&bmat->left);

920:   if (!bmat->imax) {
921:     PetscMalloc2(A->rmap->n,&bmat->imax,A->rmap->n,&bmat->ilen);
922:     PetscLogObjectMemory((PetscObject)A,2*A->rmap->n*sizeof(PetscInt));
923:   }
924:   if (nnz) {
925:     nz = 0;
926:     for (i=0; i<A->rmap->n/A->rmap->bs; i++) {
927:       bmat->imax[i] = nnz[i];
928:       nz           += nnz[i];
929:     }
930:   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Currently requires block row by row preallocation");

932:   /* bmat->ilen will count nonzeros in each row so far. */
933:   for (i=0; i<bmat->mbs; i++) bmat->ilen[i] = 0;

935:   /* allocate the matrix space */
936:   MatSeqXAIJFreeAIJ(A,(PetscScalar**)&bmat->a,&bmat->j,&bmat->i);
937:   PetscMalloc3(nz,&bmat->a,nz,&bmat->j,A->rmap->n+1,&bmat->i);
938:   PetscLogObjectMemory((PetscObject)A,(A->rmap->n+1)*sizeof(PetscInt)+nz*(sizeof(PetscScalar)+sizeof(PetscInt)));
939:   bmat->i[0] = 0;
940:   for (i=1; i<bmat->mbs+1; i++) {
941:     bmat->i[i] = bmat->i[i-1] + bmat->imax[i-1];
942:   }
943:   bmat->singlemalloc = PETSC_TRUE;
944:   bmat->free_a       = PETSC_TRUE;
945:   bmat->free_ij      = PETSC_TRUE;

947:   bmat->nz            = 0;
948:   bmat->maxnz         = nz;
949:   A->info.nz_unneeded = (double)bmat->maxnz;
950:   MatSetOption(A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
951:   return(0);
952: }

954: /*MC
955:    MATBLOCKMAT - A matrix that is defined by a set of Mat's that represents a sparse block matrix
956:                  consisting of (usually) sparse blocks.

958:   Level: advanced

960: .seealso: MatCreateBlockMat()

962: M*/

964: PETSC_EXTERN PetscErrorCode MatCreate_BlockMat(Mat A)
965: {
966:   Mat_BlockMat   *b;

970:   PetscNewLog(A,&b);
971:   A->data = (void*)b;
972:   PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));

974:   A->assembled    = PETSC_TRUE;
975:   A->preallocated = PETSC_FALSE;
976:   PetscObjectChangeTypeName((PetscObject)A,MATBLOCKMAT);

978:   PetscObjectComposeFunction((PetscObject)A,"MatBlockMatSetPreallocation_C",MatBlockMatSetPreallocation_BlockMat);
979:   return(0);
980: }

982: /*@C
983:    MatCreateBlockMat - Creates a new matrix in which each block contains a uniform-size sequential Mat object

985:   Collective

987:    Input Parameters:
988: +  comm - MPI communicator
989: .  m - number of rows
990: .  n  - number of columns
991: .  bs - size of each submatrix
992: .  nz  - expected maximum number of nonzero blocks in row (use PETSC_DEFAULT if not known)
993: -  nnz - expected number of nonzers per block row if known (use NULL otherwise)

995:    Output Parameter:
996: .  A - the matrix

998:    Level: intermediate

1000:    Notes:
1001:     Matrices of this type are nominally-sparse matrices in which each "entry" is a Mat object.  Each Mat must
1002:    have the same size and be sequential.  The local and global sizes must be compatible with this decomposition.

1004:    For matrices containing parallel submatrices and variable block sizes, see MATNEST.

1006: .seealso: MATBLOCKMAT, MatCreateNest()
1007: @*/
1008: PetscErrorCode  MatCreateBlockMat(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt bs,PetscInt nz,PetscInt *nnz, Mat *A)
1009: {

1013:   MatCreate(comm,A);
1014:   MatSetSizes(*A,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
1015:   MatSetType(*A,MATBLOCKMAT);
1016:   MatBlockMatSetPreallocation(*A,bs,nz,nnz);
1017:   return(0);
1018: }