Actual source code: blockmat.c

petsc-3.8.4 2018-03-24
Report Typos and Errors

  2: /*
  3:    This provides a matrix that consists of Mats
  4: */

  6:  #include <petsc/private/matimpl.h>
  7: #include <../src/mat/impls/baij/seq/baij.h>    /* use the common AIJ data-structure */

  9: typedef struct {
 10:   SEQAIJHEADER(Mat);
 11:   SEQBAIJHEADER;
 12:   Mat *diags;

 14:   Vec left,right,middle,workb;                 /* dummy vectors to perform local parts of product */
 15: } Mat_BlockMat;

 17: extern PetscErrorCode  MatBlockMatSetPreallocation(Mat,PetscInt,PetscInt,const PetscInt*);

 19: static PetscErrorCode MatSOR_BlockMat_Symmetric(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
 20: {
 21:   Mat_BlockMat      *a = (Mat_BlockMat*)A->data;
 22:   PetscScalar       *x;
 23:   const Mat         *v;
 24:   const PetscScalar *b;
 25:   PetscErrorCode    ierr;
 26:   PetscInt          n = A->cmap->n,i,mbs = n/A->rmap->bs,j,bs = A->rmap->bs;
 27:   const PetscInt    *idx;
 28:   IS                row,col;
 29:   MatFactorInfo     info;
 30:   Vec               left = a->left,right = a->right, middle = a->middle;
 31:   Mat               *diag;

 34:   its = its*lits;
 35:   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
 36:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
 37:   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for omega not equal to 1.0");
 38:   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for fshift");
 39:   if ((flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) && !(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)) {
 40:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot do backward sweep without forward sweep");
 41:   }

 43:   if (!a->diags) {
 44:     PetscMalloc1(mbs,&a->diags);
 45:     MatFactorInfoInitialize(&info);
 46:     for (i=0; i<mbs; i++) {
 47:       MatGetOrdering(a->a[a->diag[i]], MATORDERINGND,&row,&col);
 48:       MatCholeskyFactorSymbolic(a->diags[i],a->a[a->diag[i]],row,&info);
 49:       MatCholeskyFactorNumeric(a->diags[i],a->a[a->diag[i]],&info);
 50:       ISDestroy(&row);
 51:       ISDestroy(&col);
 52:     }
 53:     VecDuplicate(bb,&a->workb);
 54:   }
 55:   diag = a->diags;

 57:   VecSet(xx,0.0);
 58:   VecGetArray(xx,&x);
 59:   /* copy right hand side because it must be modified during iteration */
 60:   VecCopy(bb,a->workb);
 61:   VecGetArrayRead(a->workb,&b);

 63:   /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
 64:   while (its--) {
 65:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {

 67:       for (i=0; i<mbs; i++) {
 68:         n   = a->i[i+1] - a->i[i] - 1;
 69:         idx = a->j + a->i[i] + 1;
 70:         v   = a->a + a->i[i] + 1;

 72:         VecSet(left,0.0);
 73:         for (j=0; j<n; j++) {
 74:           VecPlaceArray(right,x + idx[j]*bs);
 75:           MatMultAdd(v[j],right,left,left);
 76:           VecResetArray(right);
 77:         }
 78:         VecPlaceArray(right,b + i*bs);
 79:         VecAYPX(left,-1.0,right);
 80:         VecResetArray(right);

 82:         VecPlaceArray(right,x + i*bs);
 83:         MatSolve(diag[i],left,right);

 85:         /* now adjust right hand side, see MatSOR_SeqSBAIJ */
 86:         for (j=0; j<n; j++) {
 87:           MatMultTranspose(v[j],right,left);
 88:           VecPlaceArray(middle,b + idx[j]*bs);
 89:           VecAXPY(middle,-1.0,left);
 90:           VecResetArray(middle);
 91:         }
 92:         VecResetArray(right);

 94:       }
 95:     }
 96:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {

 98:       for (i=mbs-1; i>=0; i--) {
 99:         n   = a->i[i+1] - a->i[i] - 1;
100:         idx = a->j + a->i[i] + 1;
101:         v   = a->a + a->i[i] + 1;

103:         VecSet(left,0.0);
104:         for (j=0; j<n; j++) {
105:           VecPlaceArray(right,x + idx[j]*bs);
106:           MatMultAdd(v[j],right,left,left);
107:           VecResetArray(right);
108:         }
109:         VecPlaceArray(right,b + i*bs);
110:         VecAYPX(left,-1.0,right);
111:         VecResetArray(right);

113:         VecPlaceArray(right,x + i*bs);
114:         MatSolve(diag[i],left,right);
115:         VecResetArray(right);

117:       }
118:     }
119:   }
120:   VecRestoreArray(xx,&x);
121:   VecRestoreArrayRead(a->workb,&b);
122:   return(0);
123: }

125: static PetscErrorCode MatSOR_BlockMat(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
126: {
127:   Mat_BlockMat      *a = (Mat_BlockMat*)A->data;
128:   PetscScalar       *x;
129:   const Mat         *v;
130:   const PetscScalar *b;
131:   PetscErrorCode    ierr;
132:   PetscInt          n = A->cmap->n,i,mbs = n/A->rmap->bs,j,bs = A->rmap->bs;
133:   const PetscInt    *idx;
134:   IS                row,col;
135:   MatFactorInfo     info;
136:   Vec               left = a->left,right = a->right;
137:   Mat               *diag;

140:   its = its*lits;
141:   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
142:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
143:   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for omega not equal to 1.0");
144:   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for fshift");

146:   if (!a->diags) {
147:     PetscMalloc1(mbs,&a->diags);
148:     MatFactorInfoInitialize(&info);
149:     for (i=0; i<mbs; i++) {
150:       MatGetOrdering(a->a[a->diag[i]], MATORDERINGND,&row,&col);
151:       MatLUFactorSymbolic(a->diags[i],a->a[a->diag[i]],row,col,&info);
152:       MatLUFactorNumeric(a->diags[i],a->a[a->diag[i]],&info);
153:       ISDestroy(&row);
154:       ISDestroy(&col);
155:     }
156:   }
157:   diag = a->diags;

159:   VecSet(xx,0.0);
160:   VecGetArray(xx,&x);
161:   VecGetArrayRead(bb,&b);

163:   /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
164:   while (its--) {
165:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {

167:       for (i=0; i<mbs; i++) {
168:         n   = a->i[i+1] - a->i[i];
169:         idx = a->j + a->i[i];
170:         v   = a->a + a->i[i];

172:         VecSet(left,0.0);
173:         for (j=0; j<n; j++) {
174:           if (idx[j] != i) {
175:             VecPlaceArray(right,x + idx[j]*bs);
176:             MatMultAdd(v[j],right,left,left);
177:             VecResetArray(right);
178:           }
179:         }
180:         VecPlaceArray(right,b + i*bs);
181:         VecAYPX(left,-1.0,right);
182:         VecResetArray(right);

184:         VecPlaceArray(right,x + i*bs);
185:         MatSolve(diag[i],left,right);
186:         VecResetArray(right);
187:       }
188:     }
189:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {

191:       for (i=mbs-1; i>=0; i--) {
192:         n   = a->i[i+1] - a->i[i];
193:         idx = a->j + a->i[i];
194:         v   = a->a + a->i[i];

196:         VecSet(left,0.0);
197:         for (j=0; j<n; j++) {
198:           if (idx[j] != i) {
199:             VecPlaceArray(right,x + idx[j]*bs);
200:             MatMultAdd(v[j],right,left,left);
201:             VecResetArray(right);
202:           }
203:         }
204:         VecPlaceArray(right,b + i*bs);
205:         VecAYPX(left,-1.0,right);
206:         VecResetArray(right);

208:         VecPlaceArray(right,x + i*bs);
209:         MatSolve(diag[i],left,right);
210:         VecResetArray(right);

212:       }
213:     }
214:   }
215:   VecRestoreArray(xx,&x);
216:   VecRestoreArrayRead(bb,&b);
217:   return(0);
218: }

220: static PetscErrorCode MatSetValues_BlockMat(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
221: {
222:   Mat_BlockMat   *a = (Mat_BlockMat*)A->data;
223:   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
224:   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
225:   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
227:   PetscInt       ridx,cidx;
228:   PetscBool      roworiented=a->roworiented;
229:   MatScalar      value;
230:   Mat            *ap,*aa = a->a;

233:   for (k=0; k<m; k++) { /* loop over added rows */
234:     row  = im[k];
235:     brow = row/bs;
236:     if (row < 0) continue;
237: #if defined(PETSC_USE_DEBUG)
238:     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
239: #endif
240:     rp   = aj + ai[brow];
241:     ap   = aa + ai[brow];
242:     rmax = imax[brow];
243:     nrow = ailen[brow];
244:     low  = 0;
245:     high = nrow;
246:     for (l=0; l<n; l++) { /* loop over added columns */
247:       if (in[l] < 0) continue;
248: #if defined(PETSC_USE_DEBUG)
249:       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
250: #endif
251:       col = in[l]; bcol = col/bs;
252:       if (A->symmetric && brow > bcol) continue;
253:       ridx = row % bs; cidx = col % bs;
254:       if (roworiented) value = v[l + k*n];
255:       else value = v[k + l*m];

257:       if (col <= lastcol) low = 0;
258:       else high = nrow;
259:       lastcol = col;
260:       while (high-low > 7) {
261:         t = (low+high)/2;
262:         if (rp[t] > bcol) high = t;
263:         else              low  = t;
264:       }
265:       for (i=low; i<high; i++) {
266:         if (rp[i] > bcol) break;
267:         if (rp[i] == bcol) goto noinsert1;
268:       }
269:       if (nonew == 1) goto noinsert1;
270:       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
271:       MatSeqXAIJReallocateAIJ(A,a->mbs,1,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,Mat);
272:       N = nrow++ - 1; high++;
273:       /* shift up all the later entries in this row */
274:       for (ii=N; ii>=i; ii--) {
275:         rp[ii+1] = rp[ii];
276:         ap[ii+1] = ap[ii];
277:       }
278:       if (N>=i) ap[i] = 0;
279:       rp[i] = bcol;
280:       a->nz++;
281:       A->nonzerostate++;
282: noinsert1:;
283:       if (!*(ap+i)) {
284:         MatCreateSeqAIJ(PETSC_COMM_SELF,bs,bs,0,0,ap+i);
285:       }
286:       MatSetValues(ap[i],1,&ridx,1,&cidx,&value,is);
287:       low  = i;
288:     }
289:     ailen[brow] = nrow;
290:   }
291:   return(0);
292: }

294: static PetscErrorCode MatLoad_BlockMat(Mat newmat, PetscViewer viewer)
295: {
296:   PetscErrorCode    ierr;
297:   Mat               tmpA;
298:   PetscInt          i,j,m,n,bs = 1,ncols,*lens,currentcol,mbs,**ii,*ilens,nextcol,*llens,cnt = 0;
299:   const PetscInt    *cols;
300:   const PetscScalar *values;
301:   PetscBool         flg = PETSC_FALSE,notdone;
302:   Mat_SeqAIJ        *a;
303:   Mat_BlockMat      *amat;

306:   /* force binary viewer to load .info file if it has not yet done so */
307:   PetscViewerSetUp(viewer);
308:   MatCreate(PETSC_COMM_SELF,&tmpA);
309:   MatSetType(tmpA,MATSEQAIJ);
310:   MatLoad_SeqAIJ(tmpA,viewer);

312:   MatGetLocalSize(tmpA,&m,&n);
313:   PetscOptionsBegin(PETSC_COMM_SELF,NULL,"Options for loading BlockMat matrix 1","Mat");
314:   PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);
315:   PetscOptionsBool("-matload_symmetric","Store the matrix as symmetric","MatLoad",flg,&flg,NULL);
316:   PetscOptionsEnd();

318:   /* Determine number of nonzero blocks for each block row */
319:   a    = (Mat_SeqAIJ*) tmpA->data;
320:   mbs  = m/bs;
321:   PetscMalloc3(mbs,&lens,bs,&ii,bs,&ilens);
322:   PetscMemzero(lens,mbs*sizeof(PetscInt));

324:   for (i=0; i<mbs; i++) {
325:     for (j=0; j<bs; j++) {
326:       ii[j]    = a->j + a->i[i*bs + j];
327:       ilens[j] = a->i[i*bs + j + 1] - a->i[i*bs + j];
328:     }

330:     currentcol = -1;
331:     while (PETSC_TRUE) {
332:       notdone = PETSC_FALSE;
333:       nextcol = 1000000000;
334:       for (j=0; j<bs; j++) {
335:         while ((ilens[j] > 0 && ii[j][0]/bs <= currentcol)) {
336:           ii[j]++;
337:           ilens[j]--;
338:         }
339:         if (ilens[j] > 0) {
340:           notdone = PETSC_TRUE;
341:           nextcol = PetscMin(nextcol,ii[j][0]/bs);
342:         }
343:       }
344:       if (!notdone) break;
345:       if (!flg || (nextcol >= i)) lens[i]++;
346:       currentcol = nextcol;
347:     }
348:   }

350:   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
351:     MatSetSizes(newmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
352:   }
353:   MatBlockMatSetPreallocation(newmat,bs,0,lens);
354:   if (flg) {
355:     MatSetOption(newmat,MAT_SYMMETRIC,PETSC_TRUE);
356:   }
357:   amat = (Mat_BlockMat*)(newmat)->data;

359:   /* preallocate the submatrices */
360:   PetscMalloc1(bs,&llens);
361:   for (i=0; i<mbs; i++) { /* loops for block rows */
362:     for (j=0; j<bs; j++) {
363:       ii[j]    = a->j + a->i[i*bs + j];
364:       ilens[j] = a->i[i*bs + j + 1] - a->i[i*bs + j];
365:     }

367:     currentcol = 1000000000;
368:     for (j=0; j<bs; j++) { /* loop over rows in block finding first nonzero block */
369:       if (ilens[j] > 0) {
370:         currentcol = PetscMin(currentcol,ii[j][0]/bs);
371:       }
372:     }

374:     while (PETSC_TRUE) {  /* loops over blocks in block row */
375:       notdone = PETSC_FALSE;
376:       nextcol = 1000000000;
377:       PetscMemzero(llens,bs*sizeof(PetscInt));
378:       for (j=0; j<bs; j++) { /* loop over rows in block */
379:         while ((ilens[j] > 0 && ii[j][0]/bs <= currentcol)) { /* loop over columns in row */
380:           ii[j]++;
381:           ilens[j]--;
382:           llens[j]++;
383:         }
384:         if (ilens[j] > 0) {
385:           notdone = PETSC_TRUE;
386:           nextcol = PetscMin(nextcol,ii[j][0]/bs);
387:         }
388:       }
389:       if (cnt >= amat->maxnz) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of blocks found greater than expected %D",cnt);
390:       if (!flg || currentcol >= i) {
391:         amat->j[cnt] = currentcol;
392:         MatCreateSeqAIJ(PETSC_COMM_SELF,bs,bs,0,llens,amat->a+cnt++);
393:       }

395:       if (!notdone) break;
396:       currentcol = nextcol;
397:     }
398:     amat->ilen[i] = lens[i];
399:   }

401:   PetscFree3(lens,ii,ilens);
402:   PetscFree(llens);

404:   /* copy over the matrix, one row at a time */
405:   for (i=0; i<m; i++) {
406:     MatGetRow(tmpA,i,&ncols,&cols,&values);
407:     MatSetValues(newmat,1,&i,ncols,cols,values,INSERT_VALUES);
408:     MatRestoreRow(tmpA,i,&ncols,&cols,&values);
409:   }
410:   MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
411:   MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
412:   return(0);
413: }

415: static PetscErrorCode MatView_BlockMat(Mat A,PetscViewer viewer)
416: {
417:   Mat_BlockMat      *a = (Mat_BlockMat*)A->data;
418:   PetscErrorCode    ierr;
419:   const char        *name;
420:   PetscViewerFormat format;

423:   PetscObjectGetName((PetscObject)A,&name);
424:   PetscViewerGetFormat(viewer,&format);
425:   if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) {
426:     PetscViewerASCIIPrintf(viewer,"Nonzero block matrices = %D \n",a->nz);
427:     if (A->symmetric) {
428:       PetscViewerASCIIPrintf(viewer,"Only upper triangular part of symmetric matrix is stored\n");
429:     }
430:   }
431:   return(0);
432: }

434: static PetscErrorCode MatDestroy_BlockMat(Mat mat)
435: {
437:   Mat_BlockMat   *bmat = (Mat_BlockMat*)mat->data;
438:   PetscInt       i;

441:   VecDestroy(&bmat->right);
442:   VecDestroy(&bmat->left);
443:   VecDestroy(&bmat->middle);
444:   VecDestroy(&bmat->workb);
445:   if (bmat->diags) {
446:     for (i=0; i<mat->rmap->n/mat->rmap->bs; i++) {
447:       MatDestroy(&bmat->diags[i]);
448:     }
449:   }
450:   if (bmat->a) {
451:     for (i=0; i<bmat->nz; i++) {
452:       MatDestroy(&bmat->a[i]);
453:     }
454:   }
455:   MatSeqXAIJFreeAIJ(mat,(PetscScalar**)&bmat->a,&bmat->j,&bmat->i);
456:   PetscFree(mat->data);
457:   return(0);
458: }

460: static PetscErrorCode MatMult_BlockMat(Mat A,Vec x,Vec y)
461: {
462:   Mat_BlockMat   *bmat = (Mat_BlockMat*)A->data;
464:   PetscScalar    *xx,*yy;
465:   PetscInt       *aj,i,*ii,jrow,m = A->rmap->n/A->rmap->bs,bs = A->rmap->bs,n,j;
466:   Mat            *aa;

469:   /*
470:      Standard CSR multiply except each entry is a Mat
471:   */
472:   VecGetArray(x,&xx);

474:   VecSet(y,0.0);
475:   VecGetArray(y,&yy);
476:   aj   = bmat->j;
477:   aa   = bmat->a;
478:   ii   = bmat->i;
479:   for (i=0; i<m; i++) {
480:     jrow = ii[i];
481:     VecPlaceArray(bmat->left,yy + bs*i);
482:     n    = ii[i+1] - jrow;
483:     for (j=0; j<n; j++) {
484:       VecPlaceArray(bmat->right,xx + bs*aj[jrow]);
485:       MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left);
486:       VecResetArray(bmat->right);
487:       jrow++;
488:     }
489:     VecResetArray(bmat->left);
490:   }
491:   VecRestoreArray(x,&xx);
492:   VecRestoreArray(y,&yy);
493:   return(0);
494: }

496: PetscErrorCode MatMult_BlockMat_Symmetric(Mat A,Vec x,Vec y)
497: {
498:   Mat_BlockMat   *bmat = (Mat_BlockMat*)A->data;
500:   PetscScalar    *xx,*yy;
501:   PetscInt       *aj,i,*ii,jrow,m = A->rmap->n/A->rmap->bs,bs = A->rmap->bs,n,j;
502:   Mat            *aa;

505:   /*
506:      Standard CSR multiply except each entry is a Mat
507:   */
508:   VecGetArray(x,&xx);

510:   VecSet(y,0.0);
511:   VecGetArray(y,&yy);
512:   aj   = bmat->j;
513:   aa   = bmat->a;
514:   ii   = bmat->i;
515:   for (i=0; i<m; i++) {
516:     jrow = ii[i];
517:     n    = ii[i+1] - jrow;
518:     VecPlaceArray(bmat->left,yy + bs*i);
519:     VecPlaceArray(bmat->middle,xx + bs*i);
520:     /* if we ALWAYS required a diagonal entry then could remove this if test */
521:     if (aj[jrow] == i) {
522:       VecPlaceArray(bmat->right,xx + bs*aj[jrow]);
523:       MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left);
524:       VecResetArray(bmat->right);
525:       jrow++;
526:       n--;
527:     }
528:     for (j=0; j<n; j++) {
529:       VecPlaceArray(bmat->right,xx + bs*aj[jrow]);            /* upper triangular part */
530:       MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left);
531:       VecResetArray(bmat->right);

533:       VecPlaceArray(bmat->right,yy + bs*aj[jrow]);            /* lower triangular part */
534:       MatMultTransposeAdd(aa[jrow],bmat->middle,bmat->right,bmat->right);
535:       VecResetArray(bmat->right);
536:       jrow++;
537:     }
538:     VecResetArray(bmat->left);
539:     VecResetArray(bmat->middle);
540:   }
541:   VecRestoreArray(x,&xx);
542:   VecRestoreArray(y,&yy);
543:   return(0);
544: }

546: static PetscErrorCode MatMultAdd_BlockMat(Mat A,Vec x,Vec y,Vec z)
547: {
549:   return(0);
550: }

552: static PetscErrorCode MatMultTranspose_BlockMat(Mat A,Vec x,Vec y)
553: {
555:   return(0);
556: }

558: static PetscErrorCode MatMultTransposeAdd_BlockMat(Mat A,Vec x,Vec y,Vec z)
559: {
561:   return(0);
562: }

564: /*
565:      Adds diagonal pointers to sparse matrix structure.
566: */
567: static PetscErrorCode MatMarkDiagonal_BlockMat(Mat A)
568: {
569:   Mat_BlockMat   *a = (Mat_BlockMat*)A->data;
571:   PetscInt       i,j,mbs = A->rmap->n/A->rmap->bs;

574:   if (!a->diag) {
575:     PetscMalloc1(mbs,&a->diag);
576:   }
577:   for (i=0; i<mbs; i++) {
578:     a->diag[i] = a->i[i+1];
579:     for (j=a->i[i]; j<a->i[i+1]; j++) {
580:       if (a->j[j] == i) {
581:         a->diag[i] = j;
582:         break;
583:       }
584:     }
585:   }
586:   return(0);
587: }

589: static PetscErrorCode MatCreateSubMatrix_BlockMat(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
590: {
591:   Mat_BlockMat   *a = (Mat_BlockMat*)A->data;
592:   Mat_SeqAIJ     *c;
594:   PetscInt       i,k,first,step,lensi,nrows,ncols;
595:   PetscInt       *j_new,*i_new,*aj = a->j,*ailen = a->ilen;
596:   PetscScalar    *a_new;
597:   Mat            C,*aa = a->a;
598:   PetscBool      stride,equal;

601:   ISEqual(isrow,iscol,&equal);
602:   if (!equal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only for idential column and row indices");
603:   PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&stride);
604:   if (!stride) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only for stride indices");
605:   ISStrideGetInfo(iscol,&first,&step);
606:   if (step != A->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only select one entry from each block");

608:   ISGetLocalSize(isrow,&nrows);
609:   ncols = nrows;

611:   /* create submatrix */
612:   if (scall == MAT_REUSE_MATRIX) {
613:     PetscInt n_cols,n_rows;
614:     C    = *B;
615:     MatGetSize(C,&n_rows,&n_cols);
616:     if (n_rows != nrows || n_cols != ncols) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size");
617:     MatZeroEntries(C);
618:   } else {
619:     MatCreate(PetscObjectComm((PetscObject)A),&C);
620:     MatSetSizes(C,nrows,ncols,PETSC_DETERMINE,PETSC_DETERMINE);
621:     if (A->symmetric) {
622:       MatSetType(C,MATSEQSBAIJ);
623:     } else {
624:       MatSetType(C,MATSEQAIJ);
625:     }
626:     MatSeqAIJSetPreallocation(C,0,ailen);
627:     MatSeqSBAIJSetPreallocation(C,1,0,ailen);
628:   }
629:   c = (Mat_SeqAIJ*)C->data;

631:   /* loop over rows inserting into submatrix */
632:   a_new = c->a;
633:   j_new = c->j;
634:   i_new = c->i;

636:   for (i=0; i<nrows; i++) {
637:     lensi = ailen[i];
638:     for (k=0; k<lensi; k++) {
639:       *j_new++ = *aj++;
640:       MatGetValue(*aa++,first,first,a_new++);
641:     }
642:     i_new[i+1] = i_new[i] + lensi;
643:     c->ilen[i] = lensi;
644:   }

646:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
647:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
648:   *B   = C;
649:   return(0);
650: }

652: static PetscErrorCode MatAssemblyEnd_BlockMat(Mat A,MatAssemblyType mode)
653: {
654:   Mat_BlockMat   *a = (Mat_BlockMat*)A->data;
656:   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
657:   PetscInt       m      = a->mbs,*ip,N,*ailen = a->ilen,rmax = 0;
658:   Mat            *aa    = a->a,*ap;

661:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);

663:   if (m) rmax = ailen[0]; /* determine row with most nonzeros */
664:   for (i=1; i<m; i++) {
665:     /* move each row back by the amount of empty slots (fshift) before it*/
666:     fshift += imax[i-1] - ailen[i-1];
667:     rmax    = PetscMax(rmax,ailen[i]);
668:     if (fshift) {
669:       ip = aj + ai[i];
670:       ap = aa + ai[i];
671:       N  = ailen[i];
672:       for (j=0; j<N; j++) {
673:         ip[j-fshift] = ip[j];
674:         ap[j-fshift] = ap[j];
675:       }
676:     }
677:     ai[i] = ai[i-1] + ailen[i-1];
678:   }
679:   if (m) {
680:     fshift += imax[m-1] - ailen[m-1];
681:     ai[m]   = ai[m-1] + ailen[m-1];
682:   }
683:   /* reset ilen and imax for each row */
684:   for (i=0; i<m; i++) {
685:     ailen[i] = imax[i] = ai[i+1] - ai[i];
686:   }
687:   a->nz = ai[m];
688:   for (i=0; i<a->nz; i++) {
689: #if defined(PETSC_USE_DEBUG)
690:     if (!aa[i]) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Null matrix at location %D column %D nz %D",i,aj[i],a->nz);
691: #endif
692:     MatAssemblyBegin(aa[i],MAT_FINAL_ASSEMBLY);
693:     MatAssemblyEnd(aa[i],MAT_FINAL_ASSEMBLY);
694:   }
695:   PetscInfo4(A,"Matrix size: %D X %D; storage space: %D unneeded,%D used\n",m,A->cmap->n/A->cmap->bs,fshift,a->nz);
696:   PetscInfo1(A,"Number of mallocs during MatSetValues() is %D\n",a->reallocs);
697:   PetscInfo1(A,"Maximum nonzeros in any row is %D\n",rmax);

699:   A->info.mallocs    += a->reallocs;
700:   a->reallocs         = 0;
701:   A->info.nz_unneeded = (double)fshift;
702:   a->rmax             = rmax;
703:   MatMarkDiagonal_BlockMat(A);
704:   return(0);
705: }

707: static PetscErrorCode MatSetOption_BlockMat(Mat A,MatOption opt,PetscBool flg)
708: {
710:   if (opt == MAT_SYMMETRIC && flg) {
711:     A->ops->sor  = MatSOR_BlockMat_Symmetric;
712:     A->ops->mult = MatMult_BlockMat_Symmetric;
713:   } else {
714:     PetscInfo1(A,"Unused matrix option %s\n",MatOptions[opt]);
715:   }
716:   return(0);
717: }


720: static struct _MatOps MatOps_Values = {MatSetValues_BlockMat,
721:                                        0,
722:                                        0,
723:                                        MatMult_BlockMat,
724:                                /*  4*/ MatMultAdd_BlockMat,
725:                                        MatMultTranspose_BlockMat,
726:                                        MatMultTransposeAdd_BlockMat,
727:                                        0,
728:                                        0,
729:                                        0,
730:                                /* 10*/ 0,
731:                                        0,
732:                                        0,
733:                                        MatSOR_BlockMat,
734:                                        0,
735:                                /* 15*/ 0,
736:                                        0,
737:                                        0,
738:                                        0,
739:                                        0,
740:                                /* 20*/ 0,
741:                                        MatAssemblyEnd_BlockMat,
742:                                        MatSetOption_BlockMat,
743:                                        0,
744:                                /* 24*/ 0,
745:                                        0,
746:                                        0,
747:                                        0,
748:                                        0,
749:                                /* 29*/ 0,
750:                                        0,
751:                                        0,
752:                                        0,
753:                                        0,
754:                                /* 34*/ 0,
755:                                        0,
756:                                        0,
757:                                        0,
758:                                        0,
759:                                /* 39*/ 0,
760:                                        0,
761:                                        0,
762:                                        0,
763:                                        0,
764:                                /* 44*/ 0,
765:                                        0,
766:                                        MatShift_Basic,
767:                                        0,
768:                                        0,
769:                                /* 49*/ 0,
770:                                        0,
771:                                        0,
772:                                        0,
773:                                        0,
774:                                /* 54*/ 0,
775:                                        0,
776:                                        0,
777:                                        0,
778:                                        0,
779:                                /* 59*/ MatCreateSubMatrix_BlockMat,
780:                                        MatDestroy_BlockMat,
781:                                        MatView_BlockMat,
782:                                        0,
783:                                        0,
784:                                /* 64*/ 0,
785:                                        0,
786:                                        0,
787:                                        0,
788:                                        0,
789:                                /* 69*/ 0,
790:                                        0,
791:                                        0,
792:                                        0,
793:                                        0,
794:                                /* 74*/ 0,
795:                                        0,
796:                                        0,
797:                                        0,
798:                                        0,
799:                                /* 79*/ 0,
800:                                        0,
801:                                        0,
802:                                        0,
803:                                        MatLoad_BlockMat,
804:                                /* 84*/ 0,
805:                                        0,
806:                                        0,
807:                                        0,
808:                                        0,
809:                                /* 89*/ 0,
810:                                        0,
811:                                        0,
812:                                        0,
813:                                        0,
814:                                /* 94*/ 0,
815:                                        0,
816:                                        0,
817:                                        0,
818:                                        0,
819:                                /* 99*/ 0,
820:                                        0,
821:                                        0,
822:                                        0,
823:                                        0,
824:                                /*104*/ 0,
825:                                        0,
826:                                        0,
827:                                        0,
828:                                        0,
829:                                /*109*/ 0,
830:                                        0,
831:                                        0,
832:                                        0,
833:                                        0,
834:                                /*114*/ 0,
835:                                        0,
836:                                        0,
837:                                        0,
838:                                        0,
839:                                /*119*/ 0,
840:                                        0,
841:                                        0,
842:                                        0,
843:                                        0,
844:                                /*124*/ 0,
845:                                        0,
846:                                        0,
847:                                        0,
848:                                        0,
849:                                /*129*/ 0,
850:                                        0,
851:                                        0,
852:                                        0,
853:                                        0,
854:                                /*134*/ 0,
855:                                        0,
856:                                        0,
857:                                        0,
858:                                        0,
859:                                /*139*/ 0,
860:                                        0,
861:                                        0
862: };

864: /*@C
865:    MatBlockMatSetPreallocation - For good matrix assembly performance
866:    the user should preallocate the matrix storage by setting the parameter nz
867:    (or the array nnz).  By setting these parameters accurately, performance
868:    during matrix assembly can be increased by more than a factor of 50.

870:    Collective on MPI_Comm

872:    Input Parameters:
873: +  B - The matrix
874: .  bs - size of each block in matrix
875: .  nz - number of nonzeros per block row (same for all rows)
876: -  nnz - array containing the number of nonzeros in the various block rows
877:          (possibly different for each row) or NULL

879:    Notes:
880:      If nnz is given then nz is ignored

882:    Specify the preallocated storage with either nz or nnz (not both).
883:    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
884:    allocation.  For large problems you MUST preallocate memory or you
885:    will get TERRIBLE performance, see the users' manual chapter on matrices.

887:    Level: intermediate

889: .seealso: MatCreate(), MatCreateBlockMat(), MatSetValues()

891: @*/
892: PetscErrorCode  MatBlockMatSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
893: {

897:   PetscTryMethod(B,"MatBlockMatSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));
898:   return(0);
899: }

901: static PetscErrorCode  MatBlockMatSetPreallocation_BlockMat(Mat A,PetscInt bs,PetscInt nz,PetscInt *nnz)
902: {
903:   Mat_BlockMat   *bmat = (Mat_BlockMat*)A->data;
905:   PetscInt       i;

908:   PetscLayoutSetBlockSize(A->rmap,bs);
909:   PetscLayoutSetBlockSize(A->cmap,bs);
910:   PetscLayoutSetUp(A->rmap);
911:   PetscLayoutSetUp(A->cmap);
912:   PetscLayoutGetBlockSize(A->rmap,&bs);

914:   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
915:   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
916:   if (nnz) {
917:     for (i=0; i<A->rmap->n/bs; i++) {
918:       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
919:       if (nnz[i] > A->cmap->n/bs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than row length: local row %d value %d rowlength %d",i,nnz[i],A->cmap->n/bs);
920:     }
921:   }
922:   bmat->mbs = A->rmap->n/bs;

924:   VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs,NULL,&bmat->right);
925:   VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs,NULL,&bmat->middle);
926:   VecCreateSeq(PETSC_COMM_SELF,bs,&bmat->left);

928:   if (!bmat->imax) {
929:     PetscMalloc2(A->rmap->n,&bmat->imax,A->rmap->n,&bmat->ilen);
930:     PetscLogObjectMemory((PetscObject)A,2*A->rmap->n*sizeof(PetscInt));
931:   }
932:   if (nnz) {
933:     nz = 0;
934:     for (i=0; i<A->rmap->n/A->rmap->bs; i++) {
935:       bmat->imax[i] = nnz[i];
936:       nz           += nnz[i];
937:     }
938:   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Currently requires block row by row preallocation");

940:   /* bmat->ilen will count nonzeros in each row so far. */
941:   for (i=0; i<bmat->mbs; i++) bmat->ilen[i] = 0;

943:   /* allocate the matrix space */
944:   MatSeqXAIJFreeAIJ(A,(PetscScalar**)&bmat->a,&bmat->j,&bmat->i);
945:   PetscMalloc3(nz,&bmat->a,nz,&bmat->j,A->rmap->n+1,&bmat->i);
946:   PetscLogObjectMemory((PetscObject)A,(A->rmap->n+1)*sizeof(PetscInt)+nz*(sizeof(PetscScalar)+sizeof(PetscInt)));
947:   bmat->i[0] = 0;
948:   for (i=1; i<bmat->mbs+1; i++) {
949:     bmat->i[i] = bmat->i[i-1] + bmat->imax[i-1];
950:   }
951:   bmat->singlemalloc = PETSC_TRUE;
952:   bmat->free_a       = PETSC_TRUE;
953:   bmat->free_ij      = PETSC_TRUE;

955:   bmat->nz            = 0;
956:   bmat->maxnz         = nz;
957:   A->info.nz_unneeded = (double)bmat->maxnz;
958:   MatSetOption(A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
959:   return(0);
960: }

962: /*MC
963:    MATBLOCKMAT - A matrix that is defined by a set of Mat's that represents a sparse block matrix
964:                  consisting of (usually) sparse blocks.

966:   Level: advanced

968: .seealso: MatCreateBlockMat()

970: M*/

972: PETSC_EXTERN PetscErrorCode MatCreate_BlockMat(Mat A)
973: {
974:   Mat_BlockMat   *b;

978:   PetscNewLog(A,&b);
979:   A->data = (void*)b;
980:   PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));

982:   A->assembled    = PETSC_TRUE;
983:   A->preallocated = PETSC_FALSE;
984:   PetscObjectChangeTypeName((PetscObject)A,MATBLOCKMAT);

986:   PetscObjectComposeFunction((PetscObject)A,"MatBlockMatSetPreallocation_C",MatBlockMatSetPreallocation_BlockMat);
987:   return(0);
988: }

990: /*@C
991:    MatCreateBlockMat - Creates a new matrix in which each block contains a uniform-size sequential Mat object

993:   Collective on MPI_Comm

995:    Input Parameters:
996: +  comm - MPI communicator
997: .  m - number of rows
998: .  n  - number of columns
999: .  bs - size of each submatrix
1000: .  nz  - expected maximum number of nonzero blocks in row (use PETSC_DEFAULT if not known)
1001: -  nnz - expected number of nonzers per block row if known (use NULL otherwise)


1004:    Output Parameter:
1005: .  A - the matrix

1007:    Level: intermediate

1009:    Notes: Matrices of this type are nominally-sparse matrices in which each "entry" is a Mat object.  Each Mat must
1010:    have the same size and be sequential.  The local and global sizes must be compatible with this decomposition.

1012:    For matrices containing parallel submatrices and variable block sizes, see MATNEST.

1014: .keywords: matrix, bmat, create

1016: .seealso: MATBLOCKMAT, MatCreateNest()
1017: @*/
1018: PetscErrorCode  MatCreateBlockMat(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt bs,PetscInt nz,PetscInt *nnz, Mat *A)
1019: {

1023:   MatCreate(comm,A);
1024:   MatSetSizes(*A,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
1025:   MatSetType(*A,MATBLOCKMAT);
1026:   MatBlockMatSetPreallocation(*A,bs,nz,nnz);
1027:   return(0);
1028: }