Actual source code: fdda.c


  2: #include <petsc/private/dmdaimpl.h>
  3: #include <petscmat.h>

  5: extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*);
  6: extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*);
  7: extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*);
  8: extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*);

 10: /*
 11:    For ghost i that may be negative or greater than the upper bound this
 12:   maps it into the 0:m-1 range using periodicity
 13: */
 14: #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i))

 16: static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill)
 17: {
 19:   PetscInt       i,j,nz,*fill;

 22:   if (!dfill) return(0);

 24:   /* count number nonzeros */
 25:   nz = 0;
 26:   for (i=0; i<w; i++) {
 27:     for (j=0; j<w; j++) {
 28:       if (dfill[w*i+j]) nz++;
 29:     }
 30:   }
 31:   PetscMalloc1(nz + w + 1,&fill);
 32:   /* construct modified CSR storage of nonzero structure */
 33:   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
 34:    so fill[1] - fill[0] gives number of nonzeros in first row etc */
 35:   nz = w + 1;
 36:   for (i=0; i<w; i++) {
 37:     fill[i] = nz;
 38:     for (j=0; j<w; j++) {
 39:       if (dfill[w*i+j]) {
 40:         fill[nz] = j;
 41:         nz++;
 42:       }
 43:     }
 44:   }
 45:   fill[w] = nz;

 47:   *rfill = fill;
 48:   return(0);
 49: }

 51: static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse,PetscInt w,PetscInt **rfill)
 52: {
 54:   PetscInt       nz;

 57:   if (!dfillsparse) return(0);

 59:   /* Determine number of non-zeros */
 60:   nz = (dfillsparse[w] - w - 1);

 62:   /* Allocate space for our copy of the given sparse matrix representation. */
 63:   PetscMalloc1(nz + w + 1,rfill);
 64:   PetscArraycpy(*rfill,dfillsparse,nz+w+1);
 65:   return(0);
 66: }

 68: static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
 69: {
 71:   PetscInt       i,k,cnt = 1;


 75:   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
 76:    columns to the left with any nonzeros in them plus 1 */
 77:   PetscCalloc1(dd->w,&dd->ofillcols);
 78:   for (i=0; i<dd->w; i++) {
 79:     for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
 80:   }
 81:   for (i=0; i<dd->w; i++) {
 82:     if (dd->ofillcols[i]) {
 83:       dd->ofillcols[i] = cnt++;
 84:     }
 85:   }
 86:   return(0);
 87: }

 89: /*@
 90:     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
 91:     of the matrix returned by DMCreateMatrix().

 93:     Logically Collective on da

 95:     Input Parameters:
 96: +   da - the distributed array
 97: .   dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
 98: -   ofill - the fill pattern in the off-diagonal blocks

100:     Level: developer

102:     Notes:
103:     This only makes sense when you are doing multicomponent problems but using the
104:        MPIAIJ matrix format

106:            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
107:        representing coupling and 0 entries for missing coupling. For example
108: $             dfill[9] = {1, 0, 0,
109: $                         1, 1, 0,
110: $                         0, 1, 1}
111:        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
112:        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
113:        diagonal block).

115:      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
116:      can be represented in the dfill, ofill format

118:    Contributed by Glenn Hammond

120: .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()

122: @*/
123: PetscErrorCode  DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill)
124: {
125:   DM_DA          *dd = (DM_DA*)da->data;

129:   /* save the given dfill and ofill information */
130:   DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);
131:   DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);

133:   /* count nonzeros in ofill columns */
134:   DMDASetBlockFills_Private2(dd);

136:   return(0);
137: }

139: /*@
140:     DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
141:     of the matrix returned by DMCreateMatrix(), using sparse representations
142:     of fill patterns.

144:     Logically Collective on da

146:     Input Parameters:
147: +   da - the distributed array
148: .   dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block)
149: -   ofill - the sparse fill pattern in the off-diagonal blocks

151:     Level: developer

153:     Notes: This only makes sense when you are doing multicomponent problems but using the
154:        MPIAIJ matrix format

156:            The format for dfill and ofill is a sparse representation of a
157:            dof-by-dof matrix with 1 entries representing coupling and 0 entries
158:            for missing coupling.  The sparse representation is a 1 dimensional
159:            array of length nz + dof + 1, where nz is the number of non-zeros in
160:            the matrix.  The first dof entries in the array give the
161:            starting array indices of each row's items in the rest of the array,
162:            the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
163:            and the remaining nz items give the column indices of each of
164:            the 1s within the logical 2D matrix.  Each row's items within
165:            the array are the column indices of the 1s within that row
166:            of the 2D matrix.  PETSc developers may recognize that this is the
167:            same format as that computed by the DMDASetBlockFills_Private()
168:            function from a dense 2D matrix representation.

170:      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
171:      can be represented in the dfill, ofill format

173:    Contributed by Philip C. Roth

175: .seealso DMDASetBlockFills(), DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()

177: @*/
178: PetscErrorCode  DMDASetBlockFillsSparse(DM da,const PetscInt *dfillsparse,const PetscInt *ofillsparse)
179: {
180:   DM_DA          *dd = (DM_DA*)da->data;

184:   /* save the given dfill and ofill information */
185:   DMDASetBlockFillsSparse_Private(dfillsparse,dd->w,&dd->dfill);
186:   DMDASetBlockFillsSparse_Private(ofillsparse,dd->w,&dd->ofill);

188:   /* count nonzeros in ofill columns */
189:   DMDASetBlockFills_Private2(dd);

191:   return(0);
192: }

194: PetscErrorCode  DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring *coloring)
195: {
196:   PetscErrorCode   ierr;
197:   PetscInt         dim,m,n,p,nc;
198:   DMBoundaryType   bx,by,bz;
199:   MPI_Comm         comm;
200:   PetscMPIInt      size;
201:   PetscBool        isBAIJ;
202:   DM_DA            *dd = (DM_DA*)da->data;

205:   /*
206:                                   m
207:           ------------------------------------------------------
208:          |                                                     |
209:          |                                                     |
210:          |               ----------------------                |
211:          |               |                    |                |
212:       n  |           yn  |                    |                |
213:          |               |                    |                |
214:          |               .---------------------                |
215:          |             (xs,ys)     xn                          |
216:          |            .                                        |
217:          |         (gxs,gys)                                   |
218:          |                                                     |
219:           -----------------------------------------------------
220:   */

222:   /*
223:          nc - number of components per grid point
224:          col - number of colors needed in one direction for single component problem

226:   */
227:   DMDAGetInfo(da,&dim,NULL,NULL,NULL,&m,&n,&p,&nc,NULL,&bx,&by,&bz,NULL);

229:   PetscObjectGetComm((PetscObject)da,&comm);
230:   MPI_Comm_size(comm,&size);
231:   if (ctype == IS_COLORING_LOCAL) {
232:     if (size == 1) {
233:       ctype = IS_COLORING_GLOBAL;
234:     } else if (dim > 1) {
235:       if ((m==1 && bx == DM_BOUNDARY_PERIODIC) || (n==1 && by == DM_BOUNDARY_PERIODIC) || (p==1 && bz == DM_BOUNDARY_PERIODIC)) {
236:         SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain  on the same process");
237:       }
238:     }
239:   }

241:   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
242:      matrices is for the blocks, not the individual matrix elements  */
243:   PetscStrbeginswith(da->mattype,MATBAIJ,&isBAIJ);
244:   if (!isBAIJ) {PetscStrbeginswith(da->mattype,MATMPIBAIJ,&isBAIJ);}
245:   if (!isBAIJ) {PetscStrbeginswith(da->mattype,MATSEQBAIJ,&isBAIJ);}
246:   if (isBAIJ) {
247:     dd->w  = 1;
248:     dd->xs = dd->xs/nc;
249:     dd->xe = dd->xe/nc;
250:     dd->Xs = dd->Xs/nc;
251:     dd->Xe = dd->Xe/nc;
252:   }

254:   /*
255:      We do not provide a getcoloring function in the DMDA operations because
256:    the basic DMDA does not know about matrices. We think of DMDA as being
257:    more low-level then matrices.
258:   */
259:   if (dim == 1) {
260:     DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);
261:   } else if (dim == 2) {
262:     DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);
263:   } else if (dim == 3) {
264:     DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);
265:   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
266:   if (isBAIJ) {
267:     dd->w  = nc;
268:     dd->xs = dd->xs*nc;
269:     dd->xe = dd->xe*nc;
270:     dd->Xs = dd->Xs*nc;
271:     dd->Xe = dd->Xe*nc;
272:   }
273:   return(0);
274: }

276: /* ---------------------------------------------------------------------------------*/

278: PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
279: {
280:   PetscErrorCode  ierr;
281:   PetscInt        xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
282:   PetscInt        ncolors;
283:   MPI_Comm        comm;
284:   DMBoundaryType  bx,by;
285:   DMDAStencilType st;
286:   ISColoringValue *colors;
287:   DM_DA           *dd = (DM_DA*)da->data;

290:   /*
291:          nc - number of components per grid point
292:          col - number of colors needed in one direction for single component problem

294:   */
295:   DMDAGetInfo(da,&dim,&m,&n,NULL,&M,&N,NULL,&nc,&s,&bx,&by,NULL,&st);
296:   col  = 2*s + 1;
297:   DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);
298:   DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);
299:   PetscObjectGetComm((PetscObject)da,&comm);

301:   /* special case as taught to us by Paul Hovland */
302:   if (st == DMDA_STENCIL_STAR && s == 1) {
303:     DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);
304:   } else {
305:     if (ctype == IS_COLORING_GLOBAL) {
306:       if (!dd->localcoloring) {
307:         PetscMalloc1(nc*nx*ny,&colors);
308:         ii   = 0;
309:         for (j=ys; j<ys+ny; j++) {
310:           for (i=xs; i<xs+nx; i++) {
311:             for (k=0; k<nc; k++) {
312:               colors[ii++] = k + nc*((i % col) + col*(j % col));
313:             }
314:           }
315:         }
316:         ncolors = nc + nc*(col-1 + col*(col-1));
317:         ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);
318:       }
319:       *coloring = dd->localcoloring;
320:     } else if (ctype == IS_COLORING_LOCAL) {
321:       if (!dd->ghostedcoloring) {
322:         PetscMalloc1(nc*gnx*gny,&colors);
323:         ii   = 0;
324:         for (j=gys; j<gys+gny; j++) {
325:           for (i=gxs; i<gxs+gnx; i++) {
326:             for (k=0; k<nc; k++) {
327:               /* the complicated stuff is to handle periodic boundaries */
328:               colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
329:             }
330:           }
331:         }
332:         ncolors = nc + nc*(col - 1 + col*(col-1));
333:         ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);
334:         /* PetscIntView(ncolors,(PetscInt*)colors,0); */

336:         ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);
337:       }
338:       *coloring = dd->ghostedcoloring;
339:     } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
340:   }
341:   ISColoringReference(*coloring);
342:   return(0);
343: }

345: /* ---------------------------------------------------------------------------------*/

347: PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
348: {
349:   PetscErrorCode  ierr;
350:   PetscInt        xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
351:   PetscInt        ncolors;
352:   MPI_Comm        comm;
353:   DMBoundaryType  bx,by,bz;
354:   DMDAStencilType st;
355:   ISColoringValue *colors;
356:   DM_DA           *dd = (DM_DA*)da->data;

359:   /*
360:          nc - number of components per grid point
361:          col - number of colors needed in one direction for single component problem

363:   */
364:   DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);
365:   col  = 2*s + 1;
366:   DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);
367:   DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);
368:   PetscObjectGetComm((PetscObject)da,&comm);

370:   /* create the coloring */
371:   if (ctype == IS_COLORING_GLOBAL) {
372:     if (!dd->localcoloring) {
373:       PetscMalloc1(nc*nx*ny*nz,&colors);
374:       ii   = 0;
375:       for (k=zs; k<zs+nz; k++) {
376:         for (j=ys; j<ys+ny; j++) {
377:           for (i=xs; i<xs+nx; i++) {
378:             for (l=0; l<nc; l++) {
379:               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
380:             }
381:           }
382:         }
383:       }
384:       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
385:       ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,PETSC_OWN_POINTER,&dd->localcoloring);
386:     }
387:     *coloring = dd->localcoloring;
388:   } else if (ctype == IS_COLORING_LOCAL) {
389:     if (!dd->ghostedcoloring) {
390:       PetscMalloc1(nc*gnx*gny*gnz,&colors);
391:       ii   = 0;
392:       for (k=gzs; k<gzs+gnz; k++) {
393:         for (j=gys; j<gys+gny; j++) {
394:           for (i=gxs; i<gxs+gnx; i++) {
395:             for (l=0; l<nc; l++) {
396:               /* the complicated stuff is to handle periodic boundaries */
397:               colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
398:             }
399:           }
400:         }
401:       }
402:       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
403:       ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);
404:       ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);
405:     }
406:     *coloring = dd->ghostedcoloring;
407:   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
408:   ISColoringReference(*coloring);
409:   return(0);
410: }

412: /* ---------------------------------------------------------------------------------*/

414: PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
415: {
416:   PetscErrorCode  ierr;
417:   PetscInt        xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
418:   PetscInt        ncolors;
419:   MPI_Comm        comm;
420:   DMBoundaryType  bx;
421:   ISColoringValue *colors;
422:   DM_DA           *dd = (DM_DA*)da->data;

425:   /*
426:          nc - number of components per grid point
427:          col - number of colors needed in one direction for single component problem

429:   */
430:   DMDAGetInfo(da,&dim,&m,NULL,NULL,&M,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);
431:   col  = 2*s + 1;
432:   DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);
433:   DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);
434:   PetscObjectGetComm((PetscObject)da,&comm);

436:   /* create the coloring */
437:   if (ctype == IS_COLORING_GLOBAL) {
438:     if (!dd->localcoloring) {
439:       PetscMalloc1(nc*nx,&colors);
440:       if (dd->ofillcols) {
441:         PetscInt tc = 0;
442:         for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0);
443:         i1 = 0;
444:         for (i=xs; i<xs+nx; i++) {
445:           for (l=0; l<nc; l++) {
446:             if (dd->ofillcols[l] && (i % col)) {
447:               colors[i1++] =  nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l];
448:             } else {
449:               colors[i1++] = l;
450:             }
451:           }
452:         }
453:         ncolors = nc + 2*s*tc;
454:       } else {
455:         i1 = 0;
456:         for (i=xs; i<xs+nx; i++) {
457:           for (l=0; l<nc; l++) {
458:             colors[i1++] = l + nc*(i % col);
459:           }
460:         }
461:         ncolors = nc + nc*(col-1);
462:       }
463:       ISColoringCreate(comm,ncolors,nc*nx,colors,PETSC_OWN_POINTER,&dd->localcoloring);
464:     }
465:     *coloring = dd->localcoloring;
466:   } else if (ctype == IS_COLORING_LOCAL) {
467:     if (!dd->ghostedcoloring) {
468:       PetscMalloc1(nc*gnx,&colors);
469:       i1   = 0;
470:       for (i=gxs; i<gxs+gnx; i++) {
471:         for (l=0; l<nc; l++) {
472:           /* the complicated stuff is to handle periodic boundaries */
473:           colors[i1++] = l + nc*(SetInRange(i,m) % col);
474:         }
475:       }
476:       ncolors = nc + nc*(col-1);
477:       ISColoringCreate(comm,ncolors,nc*gnx,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);
478:       ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);
479:     }
480:     *coloring = dd->ghostedcoloring;
481:   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
482:   ISColoringReference(*coloring);
483:   return(0);
484: }

486: PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
487: {
488:   PetscErrorCode  ierr;
489:   PetscInt        xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
490:   PetscInt        ncolors;
491:   MPI_Comm        comm;
492:   DMBoundaryType  bx,by;
493:   ISColoringValue *colors;
494:   DM_DA           *dd = (DM_DA*)da->data;

497:   /*
498:          nc - number of components per grid point
499:          col - number of colors needed in one direction for single component problem

501:   */
502:   DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,NULL);
503:   DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);
504:   DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);
505:   PetscObjectGetComm((PetscObject)da,&comm);
506:   /* create the coloring */
507:   if (ctype == IS_COLORING_GLOBAL) {
508:     if (!dd->localcoloring) {
509:       PetscMalloc1(nc*nx*ny,&colors);
510:       ii   = 0;
511:       for (j=ys; j<ys+ny; j++) {
512:         for (i=xs; i<xs+nx; i++) {
513:           for (k=0; k<nc; k++) {
514:             colors[ii++] = k + nc*((3*j+i) % 5);
515:           }
516:         }
517:       }
518:       ncolors = 5*nc;
519:       ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);
520:     }
521:     *coloring = dd->localcoloring;
522:   } else if (ctype == IS_COLORING_LOCAL) {
523:     if (!dd->ghostedcoloring) {
524:       PetscMalloc1(nc*gnx*gny,&colors);
525:       ii = 0;
526:       for (j=gys; j<gys+gny; j++) {
527:         for (i=gxs; i<gxs+gnx; i++) {
528:           for (k=0; k<nc; k++) {
529:             colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
530:           }
531:         }
532:       }
533:       ncolors = 5*nc;
534:       ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);
535:       ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);
536:     }
537:     *coloring = dd->ghostedcoloring;
538:   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
539:   return(0);
540: }

542: /* =========================================================================== */
543: extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat,PetscBool);
544: extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat);
545: extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM,Mat,PetscBool);
546: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat,PetscBool);
547: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
548: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat,PetscBool);
549: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
550: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat);
551: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat);
552: extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat);
553: extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat);
554: extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM,Mat);
555: extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM,Mat);
556: extern PetscErrorCode DMCreateMatrix_DA_IS(DM,Mat);

558: /*@C
559:    MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix

561:    Logically Collective on mat

563:    Input Parameters:
564: +  mat - the matrix
565: -  da - the da

567:    Level: intermediate

569: @*/
570: PetscErrorCode MatSetupDM(Mat mat,DM da)
571: {

577:   PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));
578:   return(0);
579: }

581: PetscErrorCode  MatView_MPI_DA(Mat A,PetscViewer viewer)
582: {
583:   DM                da;
584:   PetscErrorCode    ierr;
585:   const char        *prefix;
586:   Mat               Anatural;
587:   AO                ao;
588:   PetscInt          rstart,rend,*petsc,i;
589:   IS                is;
590:   MPI_Comm          comm;
591:   PetscViewerFormat format;

594:   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
595:   PetscViewerGetFormat(viewer,&format);
596:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) return(0);

598:   PetscObjectGetComm((PetscObject)A,&comm);
599:   MatGetDM(A, &da);
600:   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");

602:   DMDAGetAO(da,&ao);
603:   MatGetOwnershipRange(A,&rstart,&rend);
604:   PetscMalloc1(rend-rstart,&petsc);
605:   for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
606:   AOApplicationToPetsc(ao,rend-rstart,petsc);
607:   ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);

609:   /* call viewer on natural ordering */
610:   MatCreateSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);
611:   ISDestroy(&is);
612:   PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);
613:   PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);
614:   PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);
615:   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
616:   MatView(Anatural,viewer);
617:   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
618:   MatDestroy(&Anatural);
619:   return(0);
620: }

622: PetscErrorCode  MatLoad_MPI_DA(Mat A,PetscViewer viewer)
623: {
624:   DM             da;
626:   Mat            Anatural,Aapp;
627:   AO             ao;
628:   PetscInt       rstart,rend,*app,i,m,n,M,N;
629:   IS             is;
630:   MPI_Comm       comm;

633:   PetscObjectGetComm((PetscObject)A,&comm);
634:   MatGetDM(A, &da);
635:   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");

637:   /* Load the matrix in natural ordering */
638:   MatCreate(PetscObjectComm((PetscObject)A),&Anatural);
639:   MatSetType(Anatural,((PetscObject)A)->type_name);
640:   MatGetSize(A,&M,&N);
641:   MatGetLocalSize(A,&m,&n);
642:   MatSetSizes(Anatural,m,n,M,N);
643:   MatLoad(Anatural,viewer);

645:   /* Map natural ordering to application ordering and create IS */
646:   DMDAGetAO(da,&ao);
647:   MatGetOwnershipRange(Anatural,&rstart,&rend);
648:   PetscMalloc1(rend-rstart,&app);
649:   for (i=rstart; i<rend; i++) app[i-rstart] = i;
650:   AOPetscToApplication(ao,rend-rstart,app);
651:   ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);

653:   /* Do permutation and replace header */
654:   MatCreateSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);
655:   MatHeaderReplace(A,&Aapp);
656:   ISDestroy(&is);
657:   MatDestroy(&Anatural);
658:   return(0);
659: }

661: PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
662: {
664:   PetscInt       dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
665:   Mat            A;
666:   MPI_Comm       comm;
667:   MatType        Atype;
668:   void           (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL,(*sell)(void)=NULL,(*is)(void)=NULL;
669:   MatType        mtype;
670:   PetscMPIInt    size;
671:   DM_DA          *dd = (DM_DA*)da->data;

674:   MatInitializePackage();
675:   mtype = da->mattype;

677:   /*
678:                                   m
679:           ------------------------------------------------------
680:          |                                                     |
681:          |                                                     |
682:          |               ----------------------                |
683:          |               |                    |                |
684:       n  |           ny  |                    |                |
685:          |               |                    |                |
686:          |               .---------------------                |
687:          |             (xs,ys)     nx                          |
688:          |            .                                        |
689:          |         (gxs,gys)                                   |
690:          |                                                     |
691:           -----------------------------------------------------
692:   */

694:   /*
695:          nc - number of components per grid point
696:          col - number of colors needed in one direction for single component problem

698:   */
699:   M   = dd->M;
700:   N   = dd->N;
701:   P   = dd->P;
702:   dim = da->dim;
703:   dof = dd->w;
704:   /* DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL); */
705:   DMDAGetCorners(da,NULL,NULL,NULL,&nx,&ny,&nz);
706:   PetscObjectGetComm((PetscObject)da,&comm);
707:   MatCreate(comm,&A);
708:   MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);
709:   MatSetType(A,mtype);
710:   MatSetFromOptions(A);
711:   MatSetDM(A,da);
712:   if (da->structure_only) {
713:     MatSetOption(A,MAT_STRUCTURE_ONLY,PETSC_TRUE);
714:   }
715:   MatGetType(A,&Atype);
716:   /*
717:      We do not provide a getmatrix function in the DMDA operations because
718:    the basic DMDA does not know about matrices. We think of DMDA as being more
719:    more low-level than matrices. This is kind of cheating but, cause sometimes
720:    we think of DMDA has higher level than matrices.

722:      We could switch based on Atype (or mtype), but we do not since the
723:    specialized setting routines depend only on the particular preallocation
724:    details of the matrix, not the type itself.
725:   */
726:   PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);
727:   if (!aij) {
728:     PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);
729:   }
730:   if (!aij) {
731:     PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);
732:     if (!baij) {
733:       PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);
734:     }
735:     if (!baij) {
736:       PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);
737:       if (!sbaij) {
738:         PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);
739:       }
740:       if (!sbaij) {
741:         PetscObjectQueryFunction((PetscObject)A,"MatMPISELLSetPreallocation_C",&sell);
742:         if (!sell) {
743:           PetscObjectQueryFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",&sell);
744:         }
745:       }
746:       if (!sell) {
747:         PetscObjectQueryFunction((PetscObject)A,"MatISSetPreallocation_C",&is);
748:       }
749:     }
750:   }
751:   if (aij) {
752:     if (dim == 1) {
753:       if (dd->ofill) {
754:         DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A);
755:       } else {
756:         DMBoundaryType bx;
757:         PetscMPIInt  size;
758:         DMDAGetInfo(da,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&bx,NULL,NULL,NULL);
759:         MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);
760:         if (size == 1 && bx == DM_BOUNDARY_NONE) {
761:           DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da,A,PETSC_FALSE);
762:         } else {
763:           DMCreateMatrix_DA_1d_MPIAIJ(da,A,PETSC_FALSE);
764:         }
765:       }
766:     } else if (dim == 2) {
767:       if (dd->ofill) {
768:         DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);
769:       } else {
770:         DMCreateMatrix_DA_2d_MPIAIJ(da,A,PETSC_FALSE);
771:       }
772:     } else if (dim == 3) {
773:       if (dd->ofill) {
774:         DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);
775:       } else {
776:         DMCreateMatrix_DA_3d_MPIAIJ(da,A,PETSC_FALSE);
777:       }
778:     }
779:   } else if (baij) {
780:     if (dim == 2) {
781:       DMCreateMatrix_DA_2d_MPIBAIJ(da,A);
782:     } else if (dim == 3) {
783:       DMCreateMatrix_DA_3d_MPIBAIJ(da,A);
784:     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
785:   } else if (sbaij) {
786:     if (dim == 2) {
787:       DMCreateMatrix_DA_2d_MPISBAIJ(da,A);
788:     } else if (dim == 3) {
789:       DMCreateMatrix_DA_3d_MPISBAIJ(da,A);
790:     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
791:   } else if (sell) {
792:      if (dim == 2) {
793:        DMCreateMatrix_DA_2d_MPISELL(da,A);
794:      } else if (dim == 3) {
795:        DMCreateMatrix_DA_3d_MPISELL(da,A);
796:      } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
797:   } else if (is) {
798:     DMCreateMatrix_DA_IS(da,A);
799:   } else {
800:     ISLocalToGlobalMapping ltog;

802:     MatSetBlockSize(A,dof);
803:     MatSetUp(A);
804:     DMGetLocalToGlobalMapping(da,&ltog);
805:     MatSetLocalToGlobalMapping(A,ltog,ltog);
806:   }
807:   DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);
808:   MatSetStencil(A,dim,dims,starts,dof);
809:   MatSetDM(A,da);
810:   MPI_Comm_size(comm,&size);
811:   if (size > 1) {
812:     /* change viewer to display matrix in natural ordering */
813:     MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA);
814:     MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA);
815:   }
816:   *J = A;
817:   return(0);
818: }

820: /* ---------------------------------------------------------------------------------*/
821: PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);

823: PetscErrorCode DMCreateMatrix_DA_IS(DM dm,Mat J)
824: {
825:   DM_DA                  *da = (DM_DA*)dm->data;
826:   Mat                    lJ;
827:   ISLocalToGlobalMapping ltog;
828:   IS                     is_loc_filt, is_glob;
829:   const PetscInt         *e_loc,*idx;
830:   PetscInt               nel,nen,nv,dof,dim,*gidx,nb;
831:   PetscBool              flg;
832:   PetscErrorCode         ierr;

834:   /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
835:      We need to filter the local indices that are represented through the DMDAGetElements decomposition
836:      This is because the size of the local matrices in MATIS is the local size of the l2g map */
838:   dof  = da->w;
839:   dim  = dm->dim;

841:   MatSetBlockSize(J,dof);

843:   /* get local elements indices in local DMDA numbering */
844:   DMDAGetElements(dm,&nel,&nen,&e_loc); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
845:   ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nel*nen,e_loc,PETSC_COPY_VALUES,&is_loc_filt);
846:   DMDARestoreElements(dm,&nel,&nen,&e_loc);

848:   /* obtain a consistent local ordering for MATIS */
849:   ISSortRemoveDups(is_loc_filt);
850:   ISBlockGetLocalSize(is_loc_filt,&nb);
851:   DMGetLocalToGlobalMapping(dm,&ltog);
852:   ISLocalToGlobalMappingGetSize(ltog,&nv);
853:   PetscMalloc1(PetscMax(nb,nv/dof),&gidx);
854:   ISBlockGetIndices(is_loc_filt,&idx);
855:   ISLocalToGlobalMappingApplyBlock(ltog,nb,idx,gidx);
856:   ISBlockRestoreIndices(is_loc_filt,&idx);
857:   ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nb,gidx,PETSC_USE_POINTER,&is_glob);
858:   ISLocalToGlobalMappingCreateIS(is_glob,&ltog);
859:   ISDestroy(&is_glob);
860:   MatSetLocalToGlobalMapping(J,ltog,ltog);
861:   ISLocalToGlobalMappingDestroy(&ltog);

863:   /* We also attach a l2g map to the local matrices to have MatSetValueLocal to work */
864:   MatISGetLocalMat(J,&lJ);
865:   ISLocalToGlobalMappingCreateIS(is_loc_filt,&ltog);
866:   ISDestroy(&is_loc_filt);
867:   ISCreateStride(PetscObjectComm((PetscObject)lJ),nv/dof,0,1,&is_glob);
868:   ISGetIndices(is_glob,&idx);
869:   ISGlobalToLocalMappingApplyBlock(ltog,IS_GTOLM_MASK,nv/dof,idx,&nb,gidx);
870:   ISRestoreIndices(is_glob,&idx);
871:   ISDestroy(&is_glob);
872:   ISLocalToGlobalMappingDestroy(&ltog);
873:   ISCreateBlock(PETSC_COMM_SELF,dof,nb,gidx,PETSC_USE_POINTER,&is_loc_filt);
874:   ISLocalToGlobalMappingCreateIS(is_loc_filt,&ltog);
875:   ISDestroy(&is_loc_filt);
876:   MatSetLocalToGlobalMapping(lJ,ltog,ltog);
877:   ISLocalToGlobalMappingDestroy(&ltog);
878:   PetscFree(gidx);

880:   /* Preallocation (not exact): we reuse the preallocation routines of the assembled version  */
881:   flg = dm->prealloc_only;
882:   dm->prealloc_only = PETSC_TRUE;
883:   switch (dim) {
884:   case 1:
885:     PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);
886:     DMCreateMatrix_DA_1d_MPIAIJ(dm,J,PETSC_TRUE);
887:     PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);
888:     break;
889:   case 2:
890:     PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);
891:     DMCreateMatrix_DA_2d_MPIAIJ(dm,J,PETSC_TRUE);
892:     PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);
893:     break;
894:   case 3:
895:     PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);
896:     DMCreateMatrix_DA_3d_MPIAIJ(dm,J,PETSC_TRUE);
897:     PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);
898:     break;
899:   default:
900:     SETERRQ1(PetscObjectComm((PetscObject)dm),PETSC_ERR_SUP,"Unhandled dimension %d",dim);
901:   }
902:   dm->prealloc_only = flg;
903:   return(0);
904: }

906: PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da,Mat J)
907: {
908:   PetscErrorCode         ierr;
909:   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p;
910:   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
911:   MPI_Comm               comm;
912:   PetscScalar            *values;
913:   DMBoundaryType         bx,by;
914:   ISLocalToGlobalMapping ltog;
915:   DMDAStencilType        st;

918:   /*
919:          nc - number of components per grid point
920:          col - number of colors needed in one direction for single component problem

922:   */
923:   DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);
924:   col  = 2*s + 1;
925:   DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);
926:   DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);
927:   PetscObjectGetComm((PetscObject)da,&comm);

929:   PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);
930:   DMGetLocalToGlobalMapping(da,&ltog);

932:   MatSetBlockSize(J,nc);
933:   /* determine the matrix preallocation information */
934:   MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);
935:   for (i=xs; i<xs+nx; i++) {

937:     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
938:     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));

940:     for (j=ys; j<ys+ny; j++) {
941:       slot = i - gxs + gnx*(j - gys);

943:       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
944:       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));

946:       cnt = 0;
947:       for (k=0; k<nc; k++) {
948:         for (l=lstart; l<lend+1; l++) {
949:           for (p=pstart; p<pend+1; p++) {
950:             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
951:               cols[cnt++] = k + nc*(slot + gnx*l + p);
952:             }
953:           }
954:         }
955:         rows[k] = k + nc*(slot);
956:       }
957:       MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);
958:     }
959:   }
960:   MatSetBlockSize(J,nc);
961:   MatSeqSELLSetPreallocation(J,0,dnz);
962:   MatMPISELLSetPreallocation(J,0,dnz,0,onz);
963:   MatPreallocateFinalize(dnz,onz);

965:   MatSetLocalToGlobalMapping(J,ltog,ltog);

967:   /*
968:     For each node in the grid: we get the neighbors in the local (on processor ordering
969:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
970:     PETSc ordering.
971:   */
972:   if (!da->prealloc_only) {
973:     PetscCalloc1(col*col*nc*nc,&values);
974:     for (i=xs; i<xs+nx; i++) {

976:       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
977:       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));

979:       for (j=ys; j<ys+ny; j++) {
980:         slot = i - gxs + gnx*(j - gys);

982:         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
983:         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));

985:         cnt = 0;
986:         for (k=0; k<nc; k++) {
987:           for (l=lstart; l<lend+1; l++) {
988:             for (p=pstart; p<pend+1; p++) {
989:               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
990:                 cols[cnt++] = k + nc*(slot + gnx*l + p);
991:               }
992:             }
993:           }
994:           rows[k] = k + nc*(slot);
995:         }
996:         MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);
997:       }
998:     }
999:     PetscFree(values);
1000:     /* do not copy values to GPU since they are all zero and not yet needed there */
1001:     MatBindToCPU(J,PETSC_TRUE);
1002:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1003:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1004:     MatBindToCPU(J,PETSC_FALSE);
1005:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1006:   }
1007:   PetscFree2(rows,cols);
1008:   return(0);
1009: }

1011: PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da,Mat J)
1012: {
1013:   PetscErrorCode         ierr;
1014:   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1015:   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1016:   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1017:   MPI_Comm               comm;
1018:   PetscScalar            *values;
1019:   DMBoundaryType         bx,by,bz;
1020:   ISLocalToGlobalMapping ltog;
1021:   DMDAStencilType        st;

1024:   /*
1025:          nc - number of components per grid point
1026:          col - number of colors needed in one direction for single component problem

1028:   */
1029:   DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);
1030:   col  = 2*s + 1;
1031:   DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);
1032:   DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);
1033:   PetscObjectGetComm((PetscObject)da,&comm);

1035:   PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);
1036:   DMGetLocalToGlobalMapping(da,&ltog);

1038:   MatSetBlockSize(J,nc);
1039:   /* determine the matrix preallocation information */
1040:   MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);
1041:   for (i=xs; i<xs+nx; i++) {
1042:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1043:     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1044:     for (j=ys; j<ys+ny; j++) {
1045:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1046:       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1047:       for (k=zs; k<zs+nz; k++) {
1048:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1049:         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));

1051:         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);

1053:         cnt = 0;
1054:         for (l=0; l<nc; l++) {
1055:           for (ii=istart; ii<iend+1; ii++) {
1056:             for (jj=jstart; jj<jend+1; jj++) {
1057:               for (kk=kstart; kk<kend+1; kk++) {
1058:                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1059:                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1060:                 }
1061:               }
1062:             }
1063:           }
1064:           rows[l] = l + nc*(slot);
1065:         }
1066:         MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);
1067:       }
1068:     }
1069:   }
1070:   MatSetBlockSize(J,nc);
1071:   MatSeqSELLSetPreallocation(J,0,dnz);
1072:   MatMPISELLSetPreallocation(J,0,dnz,0,onz);
1073:   MatPreallocateFinalize(dnz,onz);
1074:   MatSetLocalToGlobalMapping(J,ltog,ltog);

1076:   /*
1077:     For each node in the grid: we get the neighbors in the local (on processor ordering
1078:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1079:     PETSc ordering.
1080:   */
1081:   if (!da->prealloc_only) {
1082:     PetscCalloc1(col*col*col*nc*nc*nc,&values);
1083:     for (i=xs; i<xs+nx; i++) {
1084:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1085:       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1086:       for (j=ys; j<ys+ny; j++) {
1087:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1088:         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1089:         for (k=zs; k<zs+nz; k++) {
1090:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1091:           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));

1093:           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);

1095:           cnt = 0;
1096:           for (l=0; l<nc; l++) {
1097:             for (ii=istart; ii<iend+1; ii++) {
1098:               for (jj=jstart; jj<jend+1; jj++) {
1099:                 for (kk=kstart; kk<kend+1; kk++) {
1100:                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1101:                     cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1102:                   }
1103:                 }
1104:               }
1105:             }
1106:             rows[l] = l + nc*(slot);
1107:           }
1108:           MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);
1109:         }
1110:       }
1111:     }
1112:     PetscFree(values);
1113:     /* do not copy values to GPU since they are all zero and not yet needed there */
1114:     MatBindToCPU(J,PETSC_TRUE);
1115:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1116:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1117:     MatBindToCPU(J,PETSC_FALSE);
1118:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1119:   }
1120:   PetscFree2(rows,cols);
1121:   return(0);
1122: }

1124: PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J,PetscBool isIS)
1125: {
1126:   PetscErrorCode         ierr;
1127:   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,M,N;
1128:   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1129:   MPI_Comm               comm;
1130:   DMBoundaryType         bx,by;
1131:   ISLocalToGlobalMapping ltog,mltog;
1132:   DMDAStencilType        st;
1133:   PetscBool              removedups = PETSC_FALSE;

1136:   /*
1137:          nc - number of components per grid point
1138:          col - number of colors needed in one direction for single component problem

1140:   */
1141:   DMDAGetInfo(da,&dim,&m,&n,&M,&N,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);
1142:   if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) {
1143:     MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);
1144:   }
1145:   col  = 2*s + 1;
1146:   /*
1147:        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1148:        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1149:   */
1150:   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1151:   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1152:   DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);
1153:   DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);
1154:   PetscObjectGetComm((PetscObject)da,&comm);

1156:   PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);
1157:   DMGetLocalToGlobalMapping(da,&ltog);

1159:   MatSetBlockSize(J,nc);
1160:   /* determine the matrix preallocation information */
1161:   MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);
1162:   for (i=xs; i<xs+nx; i++) {

1164:     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1165:     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));

1167:     for (j=ys; j<ys+ny; j++) {
1168:       slot = i - gxs + gnx*(j - gys);

1170:       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1171:       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));

1173:       cnt = 0;
1174:       for (k=0; k<nc; k++) {
1175:         for (l=lstart; l<lend+1; l++) {
1176:           for (p=pstart; p<pend+1; p++) {
1177:             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1178:               cols[cnt++] = k + nc*(slot + gnx*l + p);
1179:             }
1180:           }
1181:         }
1182:         rows[k] = k + nc*(slot);
1183:       }
1184:       if (removedups) {
1185:         MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);
1186:       } else {
1187:         MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);
1188:       }
1189:     }
1190:   }
1191:   MatSetBlockSize(J,nc);
1192:   MatSeqAIJSetPreallocation(J,0,dnz);
1193:   MatMPIAIJSetPreallocation(J,0,dnz,0,onz);
1194:   MatPreallocateFinalize(dnz,onz);
1195:   MatGetLocalToGlobalMapping(J,&mltog,NULL);
1196:   if (!mltog) {
1197:     MatSetLocalToGlobalMapping(J,ltog,ltog);
1198:   }

1200:   /*
1201:     For each node in the grid: we get the neighbors in the local (on processor ordering
1202:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1203:     PETSc ordering.
1204:   */
1205:   if (!da->prealloc_only) {
1206:     for (i=xs; i<xs+nx; i++) {

1208:       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1209:       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));

1211:       for (j=ys; j<ys+ny; j++) {
1212:         slot = i - gxs + gnx*(j - gys);

1214:         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1215:         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));

1217:         cnt = 0;
1218:         for (l=lstart; l<lend+1; l++) {
1219:           for (p=pstart; p<pend+1; p++) {
1220:             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1221:               cols[cnt++] = nc*(slot + gnx*l + p);
1222:               for (k=1; k<nc; k++) {
1223:                 cols[cnt] = 1 + cols[cnt-1];cnt++;
1224:               }
1225:             }
1226:           }
1227:         }
1228:         for (k=0; k<nc; k++) rows[k] = k + nc*(slot);
1229:         MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);
1230:       }
1231:     }
1232:     /* do not copy values to GPU since they are all zero and not yet needed there */
1233:     MatBindToCPU(J,PETSC_TRUE);
1234:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1235:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1236:     MatBindToCPU(J,PETSC_FALSE);
1237:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1238:     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) {
1239:       MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);
1240:     }
1241:   }
1242:   PetscFree2(rows,cols);
1243:   return(0);
1244: }

1246: PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
1247: {
1248:   PetscErrorCode         ierr;
1249:   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1250:   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,maxcnt = 0,l,p,M,N;
1251:   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1252:   DM_DA                  *dd = (DM_DA*)da->data;
1253:   PetscInt               ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
1254:   MPI_Comm               comm;
1255:   DMBoundaryType         bx,by;
1256:   ISLocalToGlobalMapping ltog;
1257:   DMDAStencilType        st;
1258:   PetscBool              removedups = PETSC_FALSE;

1261:   /*
1262:          nc - number of components per grid point
1263:          col - number of colors needed in one direction for single component problem

1265:   */
1266:   DMDAGetInfo(da,&dim,&m,&n,&M,&N,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);
1267:   col  = 2*s + 1;
1268:   /*
1269:        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1270:        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1271:   */
1272:   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1273:   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1274:   DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);
1275:   DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);
1276:   PetscObjectGetComm((PetscObject)da,&comm);

1278:   PetscMalloc1(col*col*nc,&cols);
1279:   DMGetLocalToGlobalMapping(da,&ltog);

1281:   MatSetBlockSize(J,nc);
1282:   /* determine the matrix preallocation information */
1283:   MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);
1284:   for (i=xs; i<xs+nx; i++) {

1286:     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1287:     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));

1289:     for (j=ys; j<ys+ny; j++) {
1290:       slot = i - gxs + gnx*(j - gys);

1292:       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1293:       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));

1295:       for (k=0; k<nc; k++) {
1296:         cnt = 0;
1297:         for (l=lstart; l<lend+1; l++) {
1298:           for (p=pstart; p<pend+1; p++) {
1299:             if (l || p) {
1300:               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1301:                 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1302:               }
1303:             } else {
1304:               if (dfill) {
1305:                 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1306:               } else {
1307:                 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1308:               }
1309:             }
1310:           }
1311:         }
1312:         row    = k + nc*(slot);
1313:         maxcnt = PetscMax(maxcnt,cnt);
1314:         if (removedups) {
1315:           MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);
1316:         } else {
1317:           MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);
1318:         }
1319:       }
1320:     }
1321:   }
1322:   MatSeqAIJSetPreallocation(J,0,dnz);
1323:   MatMPIAIJSetPreallocation(J,0,dnz,0,onz);
1324:   MatPreallocateFinalize(dnz,onz);
1325:   MatSetLocalToGlobalMapping(J,ltog,ltog);

1327:   /*
1328:     For each node in the grid: we get the neighbors in the local (on processor ordering
1329:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1330:     PETSc ordering.
1331:   */
1332:   if (!da->prealloc_only) {
1333:     for (i=xs; i<xs+nx; i++) {

1335:       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1336:       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));

1338:       for (j=ys; j<ys+ny; j++) {
1339:         slot = i - gxs + gnx*(j - gys);

1341:         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1342:         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));

1344:         for (k=0; k<nc; k++) {
1345:           cnt = 0;
1346:           for (l=lstart; l<lend+1; l++) {
1347:             for (p=pstart; p<pend+1; p++) {
1348:               if (l || p) {
1349:                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1350:                   for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1351:                 }
1352:               } else {
1353:                 if (dfill) {
1354:                   for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1355:                 } else {
1356:                   for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1357:                 }
1358:               }
1359:             }
1360:           }
1361:           row  = k + nc*(slot);
1362:           MatSetValuesLocal(J,1,&row,cnt,cols,NULL,INSERT_VALUES);
1363:         }
1364:       }
1365:     }
1366:     /* do not copy values to GPU since they are all zero and not yet needed there */
1367:     MatBindToCPU(J,PETSC_TRUE);
1368:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1369:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1370:     MatBindToCPU(J,PETSC_FALSE);
1371:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1372:   }
1373:   PetscFree(cols);
1374:   return(0);
1375: }

1377: /* ---------------------------------------------------------------------------------*/

1379: PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J,PetscBool isIS)
1380: {
1381:   PetscErrorCode         ierr;
1382:   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1383:   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1384:   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1385:   MPI_Comm               comm;
1386:   DMBoundaryType         bx,by,bz;
1387:   ISLocalToGlobalMapping ltog,mltog;
1388:   DMDAStencilType        st;
1389:   PetscBool              removedups = PETSC_FALSE;

1392:   /*
1393:          nc - number of components per grid point
1394:          col - number of colors needed in one direction for single component problem

1396:   */
1397:   DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);
1398:   if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) {
1399:     MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);
1400:   }
1401:   col  = 2*s + 1;

1403:   /*
1404:        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1405:        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1406:   */
1407:   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1408:   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1409:   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;

1411:   DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);
1412:   DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);
1413:   PetscObjectGetComm((PetscObject)da,&comm);

1415:   PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);
1416:   DMGetLocalToGlobalMapping(da,&ltog);

1418:   MatSetBlockSize(J,nc);
1419:   /* determine the matrix preallocation information */
1420:   MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);
1421:   for (i=xs; i<xs+nx; i++) {
1422:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1423:     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1424:     for (j=ys; j<ys+ny; j++) {
1425:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1426:       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1427:       for (k=zs; k<zs+nz; k++) {
1428:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1429:         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));

1431:         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);

1433:         cnt = 0;
1434:         for (l=0; l<nc; l++) {
1435:           for (ii=istart; ii<iend+1; ii++) {
1436:             for (jj=jstart; jj<jend+1; jj++) {
1437:               for (kk=kstart; kk<kend+1; kk++) {
1438:                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1439:                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1440:                 }
1441:               }
1442:             }
1443:           }
1444:           rows[l] = l + nc*(slot);
1445:         }
1446:         if (removedups) {
1447:           MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);
1448:         } else {
1449:           MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);
1450:         }
1451:       }
1452:     }
1453:   }
1454:   MatSetBlockSize(J,nc);
1455:   MatSeqAIJSetPreallocation(J,0,dnz);
1456:   MatMPIAIJSetPreallocation(J,0,dnz,0,onz);
1457:   MatPreallocateFinalize(dnz,onz);
1458:   MatGetLocalToGlobalMapping(J,&mltog,NULL);
1459:   if (!mltog) {
1460:     MatSetLocalToGlobalMapping(J,ltog,ltog);
1461:   }

1463:   /*
1464:     For each node in the grid: we get the neighbors in the local (on processor ordering
1465:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1466:     PETSc ordering.
1467:   */
1468:   if (!da->prealloc_only) {
1469:     for (i=xs; i<xs+nx; i++) {
1470:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1471:       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1472:       for (j=ys; j<ys+ny; j++) {
1473:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1474:         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1475:         for (k=zs; k<zs+nz; k++) {
1476:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1477:           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));

1479:           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);

1481:           cnt = 0;
1482:           for (kk=kstart; kk<kend+1; kk++) {
1483:             for (jj=jstart; jj<jend+1; jj++) {
1484:               for (ii=istart; ii<iend+1; ii++) {
1485:                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1486:                   cols[cnt++] = nc*(slot + ii + gnx*jj + gnx*gny*kk);
1487:                     for (l=1; l<nc; l++) {
1488:                       cols[cnt] = 1 + cols[cnt-1];cnt++;
1489:                   }
1490:                 }
1491:               }
1492:             }
1493:           }
1494:           rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1495:           MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);
1496:         }
1497:       }
1498:     }
1499:     /* do not copy values to GPU since they are all zero and not yet needed there */
1500:     MatBindToCPU(J,PETSC_TRUE);
1501:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1502:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1503:     if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) {
1504:       MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);
1505:     }
1506:     MatBindToCPU(J,PETSC_FALSE);
1507:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1508:   }
1509:   PetscFree2(rows,cols);
1510:   return(0);
1511: }

1513: /* ---------------------------------------------------------------------------------*/

1515: PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J)
1516: {
1517:   PetscErrorCode         ierr;
1518:   DM_DA                  *dd = (DM_DA*)da->data;
1519:   PetscInt               xs,nx,i,j,gxs,gnx,row,k,l;
1520:   PetscInt               m,dim,s,*cols = NULL,nc,cnt,maxcnt = 0,*ocols;
1521:   PetscInt               *ofill = dd->ofill,*dfill = dd->dfill;
1522:   DMBoundaryType         bx;
1523:   ISLocalToGlobalMapping ltog;
1524:   PetscMPIInt            rank,size;

1527:   MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank);
1528:   MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);

1530:   /*
1531:          nc - number of components per grid point

1533:   */
1534:   DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);
1535:   if (s > 1) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Matrix creation for 1d not implemented correctly for stencil width larger than 1");
1536:   DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);
1537:   DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);

1539:   MatSetBlockSize(J,nc);
1540:   PetscCalloc2(nx*nc,&cols,nx*nc,&ocols);

1542:   /*
1543:         note should be smaller for first and last process with no periodic
1544:         does not handle dfill
1545:   */
1546:   cnt = 0;
1547:   /* coupling with process to the left */
1548:   for (i=0; i<s; i++) {
1549:     for (j=0; j<nc; j++) {
1550:       ocols[cnt] = ((rank == 0) ? 0 : (s - i)*(ofill[j+1] - ofill[j]));
1551:       cols[cnt]  = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]);
1552:       if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1553:         if (size > 1) ocols[cnt] += (s - i)*(ofill[j+1] - ofill[j]);
1554:         else cols[cnt] += (s - i)*(ofill[j+1] - ofill[j]);
1555:       }
1556:       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1557:       cnt++;
1558:     }
1559:   }
1560:   for (i=s; i<nx-s; i++) {
1561:     for (j=0; j<nc; j++) {
1562:       cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]);
1563:       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1564:       cnt++;
1565:     }
1566:   }
1567:   /* coupling with process to the right */
1568:   for (i=nx-s; i<nx; i++) {
1569:     for (j=0; j<nc; j++) {
1570:       ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j]));
1571:       cols[cnt]  = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]);
1572:       if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1573:         if (size > 1) ocols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]);
1574:         else cols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]);
1575:       }
1576:       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1577:       cnt++;
1578:     }
1579:   }

1581:   MatSeqAIJSetPreallocation(J,0,cols);
1582:   MatMPIAIJSetPreallocation(J,0,cols,0,ocols);
1583:   PetscFree2(cols,ocols);

1585:   DMGetLocalToGlobalMapping(da,&ltog);
1586:   MatSetLocalToGlobalMapping(J,ltog,ltog);

1588:   /*
1589:     For each node in the grid: we get the neighbors in the local (on processor ordering
1590:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1591:     PETSc ordering.
1592:   */
1593:   if (!da->prealloc_only) {
1594:     PetscMalloc1(maxcnt,&cols);
1595:     row = xs*nc;
1596:     /* coupling with process to the left */
1597:     for (i=xs; i<xs+s; i++) {
1598:       for (j=0; j<nc; j++) {
1599:         cnt = 0;
1600:         if (rank) {
1601:           for (l=0; l<s; l++) {
1602:             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1603:           }
1604:         }
1605:         if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1606:           for (l=0; l<s; l++) {
1607:             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (m + i - s - l)*nc + ofill[k];
1608:           }
1609:         }
1610:         if (dfill) {
1611:           for (k=dfill[j]; k<dfill[j+1]; k++) {
1612:             cols[cnt++] = i*nc + dfill[k];
1613:           }
1614:         } else {
1615:           for (k=0; k<nc; k++) {
1616:             cols[cnt++] = i*nc + k;
1617:           }
1618:         }
1619:         for (l=0; l<s; l++) {
1620:           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1621:         }
1622:         MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);
1623:         row++;
1624:       }
1625:     }
1626:     for (i=xs+s; i<xs+nx-s; i++) {
1627:       for (j=0; j<nc; j++) {
1628:         cnt = 0;
1629:         for (l=0; l<s; l++) {
1630:           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1631:         }
1632:         if (dfill) {
1633:           for (k=dfill[j]; k<dfill[j+1]; k++) {
1634:             cols[cnt++] = i*nc + dfill[k];
1635:           }
1636:         } else {
1637:           for (k=0; k<nc; k++) {
1638:             cols[cnt++] = i*nc + k;
1639:           }
1640:         }
1641:         for (l=0; l<s; l++) {
1642:           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1643:         }
1644:         MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);
1645:         row++;
1646:       }
1647:     }
1648:     /* coupling with process to the right */
1649:     for (i=xs+nx-s; i<xs+nx; i++) {
1650:       for (j=0; j<nc; j++) {
1651:         cnt = 0;
1652:         for (l=0; l<s; l++) {
1653:           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1654:         }
1655:         if (dfill) {
1656:           for (k=dfill[j]; k<dfill[j+1]; k++) {
1657:             cols[cnt++] = i*nc + dfill[k];
1658:           }
1659:         } else {
1660:           for (k=0; k<nc; k++) {
1661:             cols[cnt++] = i*nc + k;
1662:           }
1663:         }
1664:         if (rank < size-1) {
1665:           for (l=0; l<s; l++) {
1666:             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1667:           }
1668:         }
1669:         if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1670:           for (l=0; l<s; l++) {
1671:             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s - l - m + 2)*nc + ofill[k];
1672:           }
1673:         }
1674:         MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);
1675:         row++;
1676:       }
1677:     }
1678:     PetscFree(cols);
1679:     /* do not copy values to GPU since they are all zero and not yet needed there */
1680:     MatBindToCPU(J,PETSC_TRUE);
1681:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1682:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1683:     MatBindToCPU(J,PETSC_FALSE);
1684:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1685:   }
1686:   return(0);
1687: }

1689: /* ---------------------------------------------------------------------------------*/

1691: PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J,PetscBool isIS)
1692: {
1693:   PetscErrorCode         ierr;
1694:   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1695:   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1696:   PetscInt               istart,iend;
1697:   DMBoundaryType         bx;
1698:   ISLocalToGlobalMapping ltog,mltog;

1701:   /*
1702:          nc - number of components per grid point
1703:          col - number of colors needed in one direction for single component problem

1705:   */
1706:   DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);
1707:   if (!isIS && bx == DM_BOUNDARY_NONE) {
1708:     MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);
1709:   }
1710:   col  = 2*s + 1;

1712:   DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);
1713:   DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);

1715:   MatSetBlockSize(J,nc);
1716:   MatSeqAIJSetPreallocation(J,col*nc,NULL);
1717:   MatMPIAIJSetPreallocation(J,col*nc,NULL,col*nc,NULL);

1719:   DMGetLocalToGlobalMapping(da,&ltog);
1720:   MatGetLocalToGlobalMapping(J,&mltog,NULL);
1721:   if (!mltog) {
1722:     MatSetLocalToGlobalMapping(J,ltog,ltog);
1723:   }

1725:   /*
1726:     For each node in the grid: we get the neighbors in the local (on processor ordering
1727:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1728:     PETSc ordering.
1729:   */
1730:   if (!da->prealloc_only) {
1731:     PetscMalloc2(nc,&rows,col*nc*nc,&cols);
1732:     for (i=xs; i<xs+nx; i++) {
1733:       istart = PetscMax(-s,gxs - i);
1734:       iend   = PetscMin(s,gxs + gnx - i - 1);
1735:       slot   = i - gxs;

1737:       cnt = 0;
1738:       for (i1=istart; i1<iend+1; i1++) {
1739:         cols[cnt++] = nc*(slot + i1);
1740:         for (l=1; l<nc; l++) {
1741:           cols[cnt] = 1 + cols[cnt-1];cnt++;
1742:         }
1743:       }
1744:       rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1745:       MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);
1746:     }
1747:     /* do not copy values to GPU since they are all zero and not yet needed there */
1748:     MatBindToCPU(J,PETSC_TRUE);
1749:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1750:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1751:     if (!isIS && bx == DM_BOUNDARY_NONE) {
1752:       MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);
1753:     }
1754:     MatBindToCPU(J,PETSC_FALSE);
1755:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1756:     PetscFree2(rows,cols);
1757:   }
1758:   return(0);
1759: }

1761: /* ---------------------------------------------------------------------------------*/

1763: PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da,Mat J,PetscBool isIS)
1764: {
1765:   PetscErrorCode         ierr;
1766:   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1767:   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1768:   PetscInt               istart,iend;
1769:   DMBoundaryType         bx;
1770:   ISLocalToGlobalMapping ltog,mltog;

1773:   /*
1774:          nc - number of components per grid point
1775:          col - number of colors needed in one direction for single component problem
1776:   */
1777:   DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);
1778:   col  = 2*s + 1;

1780:   DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);
1781:   DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);

1783:   MatSetBlockSize(J,nc);
1784:   MatSeqAIJSetTotalPreallocation(J,nx*nc*col*nc);

1786:   DMGetLocalToGlobalMapping(da,&ltog);
1787:   MatGetLocalToGlobalMapping(J,&mltog,NULL);
1788:   if (!mltog) {
1789:     MatSetLocalToGlobalMapping(J,ltog,ltog);
1790:   }

1792:   /*
1793:     For each node in the grid: we get the neighbors in the local (on processor ordering
1794:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1795:     PETSc ordering.
1796:   */
1797:   if (!da->prealloc_only) {
1798:     PetscMalloc2(nc,&rows,col*nc*nc,&cols);
1799:     for (i=xs; i<xs+nx; i++) {
1800:       istart = PetscMax(-s,gxs - i);
1801:       iend   = PetscMin(s,gxs + gnx - i - 1);
1802:       slot   = i - gxs;

1804:       cnt = 0;
1805:       for (i1=istart; i1<iend+1; i1++) {
1806:         cols[cnt++] = nc*(slot + i1);
1807:         for (l=1; l<nc; l++) {
1808:           cols[cnt] = 1 + cols[cnt-1];cnt++;
1809:         }
1810:       }
1811:       rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1812:       MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);
1813:     }
1814:     /* do not copy values to GPU since they are all zero and not yet needed there */
1815:     MatBindToCPU(J,PETSC_TRUE);
1816:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1817:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1818:     if (!isIS && bx == DM_BOUNDARY_NONE) {
1819:       MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);
1820:     }
1821:     MatBindToCPU(J,PETSC_FALSE);
1822:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1823:     PetscFree2(rows,cols);
1824:   }
1825:   MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);
1826:   return(0);
1827: }

1829: PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1830: {
1831:   PetscErrorCode         ierr;
1832:   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1833:   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1834:   PetscInt               istart,iend,jstart,jend,ii,jj;
1835:   MPI_Comm               comm;
1836:   PetscScalar            *values;
1837:   DMBoundaryType         bx,by;
1838:   DMDAStencilType        st;
1839:   ISLocalToGlobalMapping ltog;

1842:   /*
1843:      nc - number of components per grid point
1844:      col - number of colors needed in one direction for single component problem
1845:   */
1846:   DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);
1847:   col  = 2*s + 1;

1849:   DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);
1850:   DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);
1851:   PetscObjectGetComm((PetscObject)da,&comm);

1853:   PetscMalloc1(col*col*nc*nc,&cols);

1855:   DMGetLocalToGlobalMapping(da,&ltog);

1857:   /* determine the matrix preallocation information */
1858:   MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);
1859:   for (i=xs; i<xs+nx; i++) {
1860:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1861:     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1862:     for (j=ys; j<ys+ny; j++) {
1863:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1864:       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1865:       slot   = i - gxs + gnx*(j - gys);

1867:       /* Find block columns in block row */
1868:       cnt = 0;
1869:       for (ii=istart; ii<iend+1; ii++) {
1870:         for (jj=jstart; jj<jend+1; jj++) {
1871:           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1872:             cols[cnt++] = slot + ii + gnx*jj;
1873:           }
1874:         }
1875:       }
1876:       MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);
1877:     }
1878:   }
1879:   MatSeqBAIJSetPreallocation(J,nc,0,dnz);
1880:   MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);
1881:   MatPreallocateFinalize(dnz,onz);

1883:   MatSetLocalToGlobalMapping(J,ltog,ltog);

1885:   /*
1886:     For each node in the grid: we get the neighbors in the local (on processor ordering
1887:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1888:     PETSc ordering.
1889:   */
1890:   if (!da->prealloc_only) {
1891:     PetscCalloc1(col*col*nc*nc,&values);
1892:     for (i=xs; i<xs+nx; i++) {
1893:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1894:       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1895:       for (j=ys; j<ys+ny; j++) {
1896:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1897:         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1898:         slot = i - gxs + gnx*(j - gys);
1899:         cnt  = 0;
1900:         for (ii=istart; ii<iend+1; ii++) {
1901:           for (jj=jstart; jj<jend+1; jj++) {
1902:             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1903:               cols[cnt++] = slot + ii + gnx*jj;
1904:             }
1905:           }
1906:         }
1907:         MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);
1908:       }
1909:     }
1910:     PetscFree(values);
1911:     /* do not copy values to GPU since they are all zero and not yet needed there */
1912:     MatBindToCPU(J,PETSC_TRUE);
1913:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1914:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1915:     MatBindToCPU(J,PETSC_FALSE);
1916:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1917:   }
1918:   PetscFree(cols);
1919:   return(0);
1920: }

1922: PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1923: {
1924:   PetscErrorCode         ierr;
1925:   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1926:   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1927:   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1928:   MPI_Comm               comm;
1929:   PetscScalar            *values;
1930:   DMBoundaryType         bx,by,bz;
1931:   DMDAStencilType        st;
1932:   ISLocalToGlobalMapping ltog;

1935:   /*
1936:          nc - number of components per grid point
1937:          col - number of colors needed in one direction for single component problem

1939:   */
1940:   DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st);
1941:   col  = 2*s + 1;

1943:   DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);
1944:   DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);
1945:   PetscObjectGetComm((PetscObject)da,&comm);

1947:   PetscMalloc1(col*col*col,&cols);

1949:   DMGetLocalToGlobalMapping(da,&ltog);

1951:   /* determine the matrix preallocation information */
1952:   MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);
1953:   for (i=xs; i<xs+nx; i++) {
1954:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1955:     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1956:     for (j=ys; j<ys+ny; j++) {
1957:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1958:       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1959:       for (k=zs; k<zs+nz; k++) {
1960:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1961:         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));

1963:         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);

1965:         /* Find block columns in block row */
1966:         cnt = 0;
1967:         for (ii=istart; ii<iend+1; ii++) {
1968:           for (jj=jstart; jj<jend+1; jj++) {
1969:             for (kk=kstart; kk<kend+1; kk++) {
1970:               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1971:                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1972:               }
1973:             }
1974:           }
1975:         }
1976:         MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);
1977:       }
1978:     }
1979:   }
1980:   MatSeqBAIJSetPreallocation(J,nc,0,dnz);
1981:   MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);
1982:   MatPreallocateFinalize(dnz,onz);

1984:   MatSetLocalToGlobalMapping(J,ltog,ltog);

1986:   /*
1987:     For each node in the grid: we get the neighbors in the local (on processor ordering
1988:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1989:     PETSc ordering.
1990:   */
1991:   if (!da->prealloc_only) {
1992:     PetscCalloc1(col*col*col*nc*nc,&values);
1993:     for (i=xs; i<xs+nx; i++) {
1994:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1995:       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1996:       for (j=ys; j<ys+ny; j++) {
1997:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1998:         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1999:         for (k=zs; k<zs+nz; k++) {
2000:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2001:           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));

2003:           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);

2005:           cnt = 0;
2006:           for (ii=istart; ii<iend+1; ii++) {
2007:             for (jj=jstart; jj<jend+1; jj++) {
2008:               for (kk=kstart; kk<kend+1; kk++) {
2009:                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2010:                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2011:                 }
2012:               }
2013:             }
2014:           }
2015:           MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);
2016:         }
2017:       }
2018:     }
2019:     PetscFree(values);
2020:     /* do not copy values to GPU since they are all zero and not yet needed there */
2021:     MatBindToCPU(J,PETSC_TRUE);
2022:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
2023:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
2024:     MatBindToCPU(J,PETSC_FALSE);
2025:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
2026:   }
2027:   PetscFree(cols);
2028:   return(0);
2029: }

2031: /*
2032:   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
2033:   identify in the local ordering with periodic domain.
2034: */
2035: static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
2036: {
2038:   PetscInt       i,n;

2041:   ISLocalToGlobalMappingApplyBlock(ltog,1,row,row);
2042:   ISLocalToGlobalMappingApplyBlock(ltog,*cnt,col,col);
2043:   for (i=0,n=0; i<*cnt; i++) {
2044:     if (col[i] >= *row) col[n++] = col[i];
2045:   }
2046:   *cnt = n;
2047:   return(0);
2048: }

2050: PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
2051: {
2052:   PetscErrorCode         ierr;
2053:   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2054:   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
2055:   PetscInt               istart,iend,jstart,jend,ii,jj;
2056:   MPI_Comm               comm;
2057:   PetscScalar            *values;
2058:   DMBoundaryType         bx,by;
2059:   DMDAStencilType        st;
2060:   ISLocalToGlobalMapping ltog;

2063:   /*
2064:      nc - number of components per grid point
2065:      col - number of colors needed in one direction for single component problem
2066:   */
2067:   DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);
2068:   col  = 2*s + 1;

2070:   DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);
2071:   DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);
2072:   PetscObjectGetComm((PetscObject)da,&comm);

2074:   PetscMalloc1(col*col*nc*nc,&cols);

2076:   DMGetLocalToGlobalMapping(da,&ltog);

2078:   /* determine the matrix preallocation information */
2079:   MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);
2080:   for (i=xs; i<xs+nx; i++) {
2081:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2082:     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2083:     for (j=ys; j<ys+ny; j++) {
2084:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2085:       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2086:       slot   = i - gxs + gnx*(j - gys);

2088:       /* Find block columns in block row */
2089:       cnt = 0;
2090:       for (ii=istart; ii<iend+1; ii++) {
2091:         for (jj=jstart; jj<jend+1; jj++) {
2092:           if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2093:             cols[cnt++] = slot + ii + gnx*jj;
2094:           }
2095:         }
2096:       }
2097:       L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);
2098:       MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);
2099:     }
2100:   }
2101:   MatSeqSBAIJSetPreallocation(J,nc,0,dnz);
2102:   MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);
2103:   MatPreallocateFinalize(dnz,onz);

2105:   MatSetLocalToGlobalMapping(J,ltog,ltog);

2107:   /*
2108:     For each node in the grid: we get the neighbors in the local (on processor ordering
2109:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2110:     PETSc ordering.
2111:   */
2112:   if (!da->prealloc_only) {
2113:     PetscCalloc1(col*col*nc*nc,&values);
2114:     for (i=xs; i<xs+nx; i++) {
2115:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2116:       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2117:       for (j=ys; j<ys+ny; j++) {
2118:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2119:         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2120:         slot   = i - gxs + gnx*(j - gys);

2122:         /* Find block columns in block row */
2123:         cnt = 0;
2124:         for (ii=istart; ii<iend+1; ii++) {
2125:           for (jj=jstart; jj<jend+1; jj++) {
2126:             if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2127:               cols[cnt++] = slot + ii + gnx*jj;
2128:             }
2129:           }
2130:         }
2131:         L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);
2132:         MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);
2133:       }
2134:     }
2135:     PetscFree(values);
2136:     /* do not copy values to GPU since they are all zero and not yet needed there */
2137:     MatBindToCPU(J,PETSC_TRUE);
2138:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
2139:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
2140:     MatBindToCPU(J,PETSC_FALSE);
2141:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
2142:   }
2143:   PetscFree(cols);
2144:   return(0);
2145: }

2147: PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
2148: {
2149:   PetscErrorCode         ierr;
2150:   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2151:   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
2152:   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
2153:   MPI_Comm               comm;
2154:   PetscScalar            *values;
2155:   DMBoundaryType         bx,by,bz;
2156:   DMDAStencilType        st;
2157:   ISLocalToGlobalMapping ltog;

2160:   /*
2161:      nc - number of components per grid point
2162:      col - number of colors needed in one direction for single component problem
2163:   */
2164:   DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st);
2165:   col  = 2*s + 1;

2167:   DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);
2168:   DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);
2169:   PetscObjectGetComm((PetscObject)da,&comm);

2171:   /* create the matrix */
2172:   PetscMalloc1(col*col*col,&cols);

2174:   DMGetLocalToGlobalMapping(da,&ltog);

2176:   /* determine the matrix preallocation information */
2177:   MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);
2178:   for (i=xs; i<xs+nx; i++) {
2179:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2180:     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2181:     for (j=ys; j<ys+ny; j++) {
2182:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2183:       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2184:       for (k=zs; k<zs+nz; k++) {
2185:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2186:         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));

2188:         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);

2190:         /* Find block columns in block row */
2191:         cnt = 0;
2192:         for (ii=istart; ii<iend+1; ii++) {
2193:           for (jj=jstart; jj<jend+1; jj++) {
2194:             for (kk=kstart; kk<kend+1; kk++) {
2195:               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2196:                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2197:               }
2198:             }
2199:           }
2200:         }
2201:         L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);
2202:         MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);
2203:       }
2204:     }
2205:   }
2206:   MatSeqSBAIJSetPreallocation(J,nc,0,dnz);
2207:   MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);
2208:   MatPreallocateFinalize(dnz,onz);

2210:   MatSetLocalToGlobalMapping(J,ltog,ltog);

2212:   /*
2213:     For each node in the grid: we get the neighbors in the local (on processor ordering
2214:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2215:     PETSc ordering.
2216:   */
2217:   if (!da->prealloc_only) {
2218:     PetscCalloc1(col*col*col*nc*nc,&values);
2219:     for (i=xs; i<xs+nx; i++) {
2220:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2221:       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2222:       for (j=ys; j<ys+ny; j++) {
2223:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2224:         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2225:         for (k=zs; k<zs+nz; k++) {
2226:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2227:           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));

2229:           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);

2231:           cnt = 0;
2232:           for (ii=istart; ii<iend+1; ii++) {
2233:             for (jj=jstart; jj<jend+1; jj++) {
2234:               for (kk=kstart; kk<kend+1; kk++) {
2235:                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2236:                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2237:                 }
2238:               }
2239:             }
2240:           }
2241:           L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);
2242:           MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);
2243:         }
2244:       }
2245:     }
2246:     PetscFree(values);
2247:     /* do not copy values to GPU since they are all zero and not yet needed there */
2248:     MatBindToCPU(J,PETSC_TRUE);
2249:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
2250:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
2251:     MatBindToCPU(J,PETSC_FALSE);
2252:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
2253:   }
2254:   PetscFree(cols);
2255:   return(0);
2256: }

2258: /* ---------------------------------------------------------------------------------*/

2260: PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
2261: {
2262:   PetscErrorCode         ierr;
2263:   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2264:   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt, maxcnt = 0,l,p,*dnz,*onz;
2265:   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
2266:   DM_DA                  *dd = (DM_DA*)da->data;
2267:   PetscInt               ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
2268:   MPI_Comm               comm;
2269:   PetscScalar            *values;
2270:   DMBoundaryType         bx,by,bz;
2271:   ISLocalToGlobalMapping ltog;
2272:   DMDAStencilType        st;
2273:   PetscBool              removedups = PETSC_FALSE;

2276:   /*
2277:          nc - number of components per grid point
2278:          col - number of colors needed in one direction for single component problem

2280:   */
2281:   DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);
2282:   col  = 2*s + 1;
2283:   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
2284:                  by 2*stencil_width + 1\n");
2285:   if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
2286:                  by 2*stencil_width + 1\n");
2287:   if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
2288:                  by 2*stencil_width + 1\n");

2290:   /*
2291:        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2292:        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2293:   */
2294:   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
2295:   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
2296:   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;

2298:   DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);
2299:   DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);
2300:   PetscObjectGetComm((PetscObject)da,&comm);

2302:   PetscMalloc1(col*col*col*nc,&cols);
2303:   DMGetLocalToGlobalMapping(da,&ltog);

2305:   /* determine the matrix preallocation information */
2306:   MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);

2308:   MatSetBlockSize(J,nc);
2309:   for (i=xs; i<xs+nx; i++) {
2310:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2311:     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2312:     for (j=ys; j<ys+ny; j++) {
2313:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2314:       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2315:       for (k=zs; k<zs+nz; k++) {
2316:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2317:         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));

2319:         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);

2321:         for (l=0; l<nc; l++) {
2322:           cnt = 0;
2323:           for (ii=istart; ii<iend+1; ii++) {
2324:             for (jj=jstart; jj<jend+1; jj++) {
2325:               for (kk=kstart; kk<kend+1; kk++) {
2326:                 if (ii || jj || kk) {
2327:                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2328:                     for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2329:                   }
2330:                 } else {
2331:                   if (dfill) {
2332:                     for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2333:                   } else {
2334:                     for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2335:                   }
2336:                 }
2337:               }
2338:             }
2339:           }
2340:           row  = l + nc*(slot);
2341:           maxcnt = PetscMax(maxcnt,cnt);
2342:           if (removedups) {
2343:             MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);
2344:           } else {
2345:             MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);
2346:           }
2347:         }
2348:       }
2349:     }
2350:   }
2351:   MatSeqAIJSetPreallocation(J,0,dnz);
2352:   MatMPIAIJSetPreallocation(J,0,dnz,0,onz);
2353:   MatPreallocateFinalize(dnz,onz);
2354:   MatSetLocalToGlobalMapping(J,ltog,ltog);

2356:   /*
2357:     For each node in the grid: we get the neighbors in the local (on processor ordering
2358:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2359:     PETSc ordering.
2360:   */
2361:   if (!da->prealloc_only) {
2362:     PetscCalloc1(maxcnt,&values);
2363:     for (i=xs; i<xs+nx; i++) {
2364:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2365:       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2366:       for (j=ys; j<ys+ny; j++) {
2367:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2368:         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2369:         for (k=zs; k<zs+nz; k++) {
2370:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2371:           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));

2373:           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);

2375:           for (l=0; l<nc; l++) {
2376:             cnt = 0;
2377:             for (ii=istart; ii<iend+1; ii++) {
2378:               for (jj=jstart; jj<jend+1; jj++) {
2379:                 for (kk=kstart; kk<kend+1; kk++) {
2380:                   if (ii || jj || kk) {
2381:                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2382:                       for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2383:                     }
2384:                   } else {
2385:                     if (dfill) {
2386:                       for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2387:                     } else {
2388:                       for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2389:                     }
2390:                   }
2391:                 }
2392:               }
2393:             }
2394:             row  = l + nc*(slot);
2395:             MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);
2396:           }
2397:         }
2398:       }
2399:     }
2400:     PetscFree(values);
2401:     /* do not copy values to GPU since they are all zero and not yet needed there */
2402:     MatBindToCPU(J,PETSC_TRUE);
2403:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
2404:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
2405:     MatBindToCPU(J,PETSC_FALSE);
2406:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
2407:   }
2408:   PetscFree(cols);
2409:   return(0);
2410: }