Actual source code: fdda.c

  1: #include <petsc/private/dmdaimpl.h>
  2: #include <petscmat.h>
  3: #include <petscbt.h>

  5: extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM, ISColoringType, ISColoring *);
  6: extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM, ISColoringType, ISColoring *);
  7: extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM, ISColoringType, ISColoring *);
  8: extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM, ISColoringType, ISColoring *);

 10: /*
 11:    For ghost i that may be negative or greater than the upper bound this
 12:   maps it into the 0:m-1 range using periodicity
 13: */
 14: #define SetInRange(i, m) ((i < 0) ? m + i : ((i >= m) ? i - m : i))

 16: static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill, PetscInt w, PetscInt **rfill)
 17: {
 18:   PetscInt i, j, nz, *fill;

 20:   PetscFunctionBegin;
 21:   if (!dfill) PetscFunctionReturn(PETSC_SUCCESS);

 23:   /* count number nonzeros */
 24:   nz = 0;
 25:   for (i = 0; i < w; i++) {
 26:     for (j = 0; j < w; j++) {
 27:       if (dfill[w * i + j]) nz++;
 28:     }
 29:   }
 30:   PetscCall(PetscMalloc1(nz + w + 1, &fill));
 31:   /* construct modified CSR storage of nonzero structure */
 32:   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
 33:    so fill[1] - fill[0] gives number of nonzeros in first row etc */
 34:   nz = w + 1;
 35:   for (i = 0; i < w; i++) {
 36:     fill[i] = nz;
 37:     for (j = 0; j < w; j++) {
 38:       if (dfill[w * i + j]) {
 39:         fill[nz] = j;
 40:         nz++;
 41:       }
 42:     }
 43:   }
 44:   fill[w] = nz;

 46:   *rfill = fill;
 47:   PetscFunctionReturn(PETSC_SUCCESS);
 48: }

 50: static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse, PetscInt w, PetscInt **rfill)
 51: {
 52:   PetscInt nz;

 54:   PetscFunctionBegin;
 55:   if (!dfillsparse) PetscFunctionReturn(PETSC_SUCCESS);

 57:   /* Determine number of non-zeros */
 58:   nz = (dfillsparse[w] - w - 1);

 60:   /* Allocate space for our copy of the given sparse matrix representation. */
 61:   PetscCall(PetscMalloc1(nz + w + 1, rfill));
 62:   PetscCall(PetscArraycpy(*rfill, dfillsparse, nz + w + 1));
 63:   PetscFunctionReturn(PETSC_SUCCESS);
 64: }

 66: static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
 67: {
 68:   PetscInt i, k, cnt = 1;

 70:   PetscFunctionBegin;

 72:   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
 73:    columns to the left with any nonzeros in them plus 1 */
 74:   PetscCall(PetscCalloc1(dd->w, &dd->ofillcols));
 75:   for (i = 0; i < dd->w; i++) {
 76:     for (k = dd->ofill[i]; k < dd->ofill[i + 1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
 77:   }
 78:   for (i = 0; i < dd->w; i++) {
 79:     if (dd->ofillcols[i]) dd->ofillcols[i] = cnt++;
 80:   }
 81:   PetscFunctionReturn(PETSC_SUCCESS);
 82: }

 84: /*@
 85:   DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
 86:   of the matrix returned by `DMCreateMatrix()`.

 88:   Logically Collective

 90:   Input Parameters:
 91: + da    - the distributed array
 92: . dfill - the fill pattern in the diagonal block (may be `NULL`, means use dense block)
 93: - ofill - the fill pattern in the off-diagonal blocks

 95:   Level: developer

 97:   Notes:
 98:   This only makes sense when you are doing multicomponent problems but using the
 99:   `MATMPIAIJ` matrix format

101:   The format for `dfill` and `ofill` is a 2 dimensional dof by dof matrix with 1 entries
102:   representing coupling and 0 entries for missing coupling. For example
103: .vb
104:             dfill[9] = {1, 0, 0,
105:                         1, 1, 0,
106:                         0, 1, 1}
107: .ve
108:   means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
109:   itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
110:   diagonal block).

112:   `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
113:   can be represented in the `dfill`, `ofill` format

115:   Contributed by\:
116:   Glenn Hammond

118: .seealso: [](sec_struct), `DM`, `DMDA`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`, `DMDASetBlockFillsSparse()`
119: @*/
120: PetscErrorCode DMDASetBlockFills(DM da, const PetscInt *dfill, const PetscInt *ofill)
121: {
122:   DM_DA *dd = (DM_DA *)da->data;

124:   PetscFunctionBegin;
125:   /* save the given dfill and ofill information */
126:   PetscCall(DMDASetBlockFills_Private(dfill, dd->w, &dd->dfill));
127:   PetscCall(DMDASetBlockFills_Private(ofill, dd->w, &dd->ofill));

129:   /* count nonzeros in ofill columns */
130:   PetscCall(DMDASetBlockFills_Private2(dd));
131:   PetscFunctionReturn(PETSC_SUCCESS);
132: }

134: /*@
135:   DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
136:   of the matrix returned by `DMCreateMatrix()`, using sparse representations
137:   of fill patterns.

139:   Logically Collective

141:   Input Parameters:
142: + da          - the distributed array
143: . dfillsparse - the sparse fill pattern in the diagonal block (may be `NULL`, means use dense block)
144: - ofillsparse - the sparse fill pattern in the off-diagonal blocks

146:   Level: developer

148:   Notes:
149:   This only makes sense when you are doing multicomponent problems but using the
150:   `MATMPIAIJ` matrix format

152:   The format for `dfill` and `ofill` is a sparse representation of a
153:   dof-by-dof matrix with 1 entries representing coupling and 0 entries
154:   for missing coupling.  The sparse representation is a 1 dimensional
155:   array of length nz + dof + 1, where nz is the number of non-zeros in
156:   the matrix.  The first dof entries in the array give the
157:   starting array indices of each row's items in the rest of the array,
158:   the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
159:   and the remaining nz items give the column indices of each of
160:   the 1s within the logical 2D matrix.  Each row's items within
161:   the array are the column indices of the 1s within that row
162:   of the 2D matrix.  PETSc developers may recognize that this is the
163:   same format as that computed by the `DMDASetBlockFills_Private()`
164:   function from a dense 2D matrix representation.

166:   `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
167:   can be represented in the `dfill`, `ofill` format

169:   Contributed by\:
170:   Philip C. Roth

172: .seealso: [](sec_struct), `DM`, `DMDA`, `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`
173: @*/
174: PetscErrorCode DMDASetBlockFillsSparse(DM da, const PetscInt *dfillsparse, const PetscInt *ofillsparse)
175: {
176:   DM_DA *dd = (DM_DA *)da->data;

178:   PetscFunctionBegin;
179:   /* save the given dfill and ofill information */
180:   PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse, dd->w, &dd->dfill));
181:   PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse, dd->w, &dd->ofill));

183:   /* count nonzeros in ofill columns */
184:   PetscCall(DMDASetBlockFills_Private2(dd));
185:   PetscFunctionReturn(PETSC_SUCCESS);
186: }

188: PetscErrorCode DMCreateColoring_DA(DM da, ISColoringType ctype, ISColoring *coloring)
189: {
190:   PetscInt       dim, m, n, p, nc;
191:   DMBoundaryType bx, by, bz;
192:   MPI_Comm       comm;
193:   PetscMPIInt    size;
194:   PetscBool      isBAIJ;
195:   DM_DA         *dd = (DM_DA *)da->data;

197:   PetscFunctionBegin;
198:   /*
199:                                   m
200:           ------------------------------------------------------
201:          |                                                     |
202:          |                                                     |
203:          |               ----------------------                |
204:          |               |                    |                |
205:       n  |           yn  |                    |                |
206:          |               |                    |                |
207:          |               .---------------------                |
208:          |             (xs,ys)     xn                          |
209:          |            .                                        |
210:          |         (gxs,gys)                                   |
211:          |                                                     |
212:           -----------------------------------------------------
213:   */

215:   /*
216:          nc - number of components per grid point
217:          col - number of colors needed in one direction for single component problem

219:   */
220:   PetscCall(DMDAGetInfo(da, &dim, NULL, NULL, NULL, &m, &n, &p, &nc, NULL, &bx, &by, &bz, NULL));

222:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
223:   PetscCallMPI(MPI_Comm_size(comm, &size));
224:   if (ctype == IS_COLORING_LOCAL) {
225:     if (size == 1) {
226:       ctype = IS_COLORING_GLOBAL;
227:     } else {
228:       PetscCheck((dim == 1) || !((m == 1 && bx == DM_BOUNDARY_PERIODIC) || (n == 1 && by == DM_BOUNDARY_PERIODIC) || (p == 1 && bz == DM_BOUNDARY_PERIODIC)), PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process");
229:     }
230:   }

232:   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
233:      matrices is for the blocks, not the individual matrix elements  */
234:   PetscCall(PetscStrbeginswith(da->mattype, MATBAIJ, &isBAIJ));
235:   if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATMPIBAIJ, &isBAIJ));
236:   if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATSEQBAIJ, &isBAIJ));
237:   if (isBAIJ) {
238:     dd->w  = 1;
239:     dd->xs = dd->xs / nc;
240:     dd->xe = dd->xe / nc;
241:     dd->Xs = dd->Xs / nc;
242:     dd->Xe = dd->Xe / nc;
243:   }

245:   /*
246:      We do not provide a getcoloring function in the DMDA operations because
247:    the basic DMDA does not know about matrices. We think of DMDA as being
248:    more low-level then matrices.
249:   */
250:   if (dim == 1) PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da, ctype, coloring));
251:   else if (dim == 2) PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da, ctype, coloring));
252:   else if (dim == 3) PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da, ctype, coloring));
253:   else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code", dim);
254:   if (isBAIJ) {
255:     dd->w  = nc;
256:     dd->xs = dd->xs * nc;
257:     dd->xe = dd->xe * nc;
258:     dd->Xs = dd->Xs * nc;
259:     dd->Xe = dd->Xe * nc;
260:   }
261:   PetscFunctionReturn(PETSC_SUCCESS);
262: }

264: PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
265: {
266:   PetscInt         xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, M, N, dim, s, k, nc, col;
267:   PetscInt         ncolors = 0;
268:   MPI_Comm         comm;
269:   DMBoundaryType   bx, by;
270:   DMDAStencilType  st;
271:   ISColoringValue *colors;
272:   DM_DA           *dd = (DM_DA *)da->data;

274:   PetscFunctionBegin;
275:   /*
276:          nc - number of components per grid point
277:          col - number of colors needed in one direction for single component problem

279:   */
280:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
281:   col = 2 * s + 1;
282:   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
283:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
284:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

286:   /* special case as taught to us by Paul Hovland */
287:   if (st == DMDA_STENCIL_STAR && s == 1) {
288:     PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da, ctype, coloring));
289:   } else {
290:     if (ctype == IS_COLORING_GLOBAL) {
291:       if (!dd->localcoloring) {
292:         PetscCall(PetscMalloc1(nc * nx * ny, &colors));
293:         ii = 0;
294:         for (j = ys; j < ys + ny; j++) {
295:           for (i = xs; i < xs + nx; i++) {
296:             for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((i % col) + col * (j % col));
297:           }
298:         }
299:         ncolors = nc + nc * (col - 1 + col * (col - 1));
300:         PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
301:       }
302:       *coloring = dd->localcoloring;
303:     } else if (ctype == IS_COLORING_LOCAL) {
304:       if (!dd->ghostedcoloring) {
305:         PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
306:         ii = 0;
307:         for (j = gys; j < gys + gny; j++) {
308:           for (i = gxs; i < gxs + gnx; i++) {
309:             for (k = 0; k < nc; k++) {
310:               /* the complicated stuff is to handle periodic boundaries */
311:               colors[ii++] = k + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col));
312:             }
313:           }
314:         }
315:         ncolors = nc + nc * (col - 1 + col * (col - 1));
316:         PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
317:         /* PetscIntView(ncolors,(PetscInt*)colors,0); */

319:         PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
320:       }
321:       *coloring = dd->ghostedcoloring;
322:     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
323:   }
324:   PetscCall(ISColoringReference(*coloring));
325:   PetscFunctionReturn(PETSC_SUCCESS);
326: }

328: PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
329: {
330:   PetscInt         xs, ys, nx, ny, i, j, gxs, gys, gnx, gny, m, n, p, dim, s, k, nc, col, zs, gzs, ii, l, nz, gnz, M, N, P;
331:   PetscInt         ncolors;
332:   MPI_Comm         comm;
333:   DMBoundaryType   bx, by, bz;
334:   DMDAStencilType  st;
335:   ISColoringValue *colors;
336:   DM_DA           *dd = (DM_DA *)da->data;

338:   PetscFunctionBegin;
339:   /*
340:          nc - number of components per grid point
341:          col - number of colors needed in one direction for single component problem
342:   */
343:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
344:   col = 2 * s + 1;
345:   PetscCheck(bx != DM_BOUNDARY_PERIODIC || (m % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in X is divisible\n\
346:                  by 2*stencil_width + 1\n");
347:   PetscCheck(by != DM_BOUNDARY_PERIODIC || (n % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Y is divisible\n\
348:                  by 2*stencil_width + 1\n");
349:   PetscCheck(bz != DM_BOUNDARY_PERIODIC || (p % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Z is divisible\n\
350:                  by 2*stencil_width + 1\n");

352:   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
353:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
354:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

356:   /* create the coloring */
357:   if (ctype == IS_COLORING_GLOBAL) {
358:     if (!dd->localcoloring) {
359:       PetscCall(PetscMalloc1(nc * nx * ny * nz, &colors));
360:       ii = 0;
361:       for (k = zs; k < zs + nz; k++) {
362:         for (j = ys; j < ys + ny; j++) {
363:           for (i = xs; i < xs + nx; i++) {
364:             for (l = 0; l < nc; l++) colors[ii++] = l + nc * ((i % col) + col * (j % col) + col * col * (k % col));
365:           }
366:         }
367:       }
368:       ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
369:       PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny * nz, colors, PETSC_OWN_POINTER, &dd->localcoloring));
370:     }
371:     *coloring = dd->localcoloring;
372:   } else if (ctype == IS_COLORING_LOCAL) {
373:     if (!dd->ghostedcoloring) {
374:       PetscCall(PetscMalloc1(nc * gnx * gny * gnz, &colors));
375:       ii = 0;
376:       for (k = gzs; k < gzs + gnz; k++) {
377:         for (j = gys; j < gys + gny; j++) {
378:           for (i = gxs; i < gxs + gnx; i++) {
379:             for (l = 0; l < nc; l++) {
380:               /* the complicated stuff is to handle periodic boundaries */
381:               colors[ii++] = l + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col) + col * col * (SetInRange(k, p) % col));
382:             }
383:           }
384:         }
385:       }
386:       ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
387:       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny * gnz, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
388:       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
389:     }
390:     *coloring = dd->ghostedcoloring;
391:   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
392:   PetscCall(ISColoringReference(*coloring));
393:   PetscFunctionReturn(PETSC_SUCCESS);
394: }

396: PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
397: {
398:   PetscInt         xs, nx, i, i1, gxs, gnx, l, m, M, dim, s, nc, col;
399:   PetscInt         ncolors;
400:   MPI_Comm         comm;
401:   DMBoundaryType   bx;
402:   ISColoringValue *colors;
403:   DM_DA           *dd = (DM_DA *)da->data;

405:   PetscFunctionBegin;
406:   /*
407:          nc - number of components per grid point
408:          col - number of colors needed in one direction for single component problem
409:   */
410:   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, &M, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
411:   col = 2 * s + 1;
412:   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
413:   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
414:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

416:   /* create the coloring */
417:   if (ctype == IS_COLORING_GLOBAL) {
418:     if (!dd->localcoloring) {
419:       PetscCall(PetscMalloc1(nc * nx, &colors));
420:       if (dd->ofillcols) {
421:         PetscInt tc = 0;
422:         for (i = 0; i < nc; i++) tc += (PetscInt)(dd->ofillcols[i] > 0);
423:         i1 = 0;
424:         for (i = xs; i < xs + nx; i++) {
425:           for (l = 0; l < nc; l++) {
426:             if (dd->ofillcols[l] && (i % col)) {
427:               colors[i1++] = nc - 1 + tc * ((i % col) - 1) + dd->ofillcols[l];
428:             } else {
429:               colors[i1++] = l;
430:             }
431:           }
432:         }
433:         ncolors = nc + 2 * s * tc;
434:       } else {
435:         i1 = 0;
436:         for (i = xs; i < xs + nx; i++) {
437:           for (l = 0; l < nc; l++) colors[i1++] = l + nc * (i % col);
438:         }
439:         ncolors = nc + nc * (col - 1);
440:       }
441:       PetscCall(ISColoringCreate(comm, ncolors, nc * nx, colors, PETSC_OWN_POINTER, &dd->localcoloring));
442:     }
443:     *coloring = dd->localcoloring;
444:   } else if (ctype == IS_COLORING_LOCAL) {
445:     if (!dd->ghostedcoloring) {
446:       PetscCall(PetscMalloc1(nc * gnx, &colors));
447:       i1 = 0;
448:       for (i = gxs; i < gxs + gnx; i++) {
449:         for (l = 0; l < nc; l++) {
450:           /* the complicated stuff is to handle periodic boundaries */
451:           colors[i1++] = l + nc * (SetInRange(i, m) % col);
452:         }
453:       }
454:       ncolors = nc + nc * (col - 1);
455:       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
456:       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
457:     }
458:     *coloring = dd->ghostedcoloring;
459:   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
460:   PetscCall(ISColoringReference(*coloring));
461:   PetscFunctionReturn(PETSC_SUCCESS);
462: }

464: PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
465: {
466:   PetscInt         xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, dim, s, k, nc;
467:   PetscInt         ncolors;
468:   MPI_Comm         comm;
469:   DMBoundaryType   bx, by;
470:   ISColoringValue *colors;
471:   DM_DA           *dd = (DM_DA *)da->data;

473:   PetscFunctionBegin;
474:   /*
475:          nc - number of components per grid point
476:          col - number of colors needed in one direction for single component problem
477:   */
478:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, NULL));
479:   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
480:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
481:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
482:   /* create the coloring */
483:   if (ctype == IS_COLORING_GLOBAL) {
484:     if (!dd->localcoloring) {
485:       PetscCall(PetscMalloc1(nc * nx * ny, &colors));
486:       ii = 0;
487:       for (j = ys; j < ys + ny; j++) {
488:         for (i = xs; i < xs + nx; i++) {
489:           for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * j + i) % 5);
490:         }
491:       }
492:       ncolors = 5 * nc;
493:       PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
494:     }
495:     *coloring = dd->localcoloring;
496:   } else if (ctype == IS_COLORING_LOCAL) {
497:     if (!dd->ghostedcoloring) {
498:       PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
499:       ii = 0;
500:       for (j = gys; j < gys + gny; j++) {
501:         for (i = gxs; i < gxs + gnx; i++) {
502:           for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * SetInRange(j, n) + SetInRange(i, m)) % 5);
503:         }
504:       }
505:       ncolors = 5 * nc;
506:       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
507:       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
508:     }
509:     *coloring = dd->ghostedcoloring;
510:   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
511:   PetscFunctionReturn(PETSC_SUCCESS);
512: }

514: /* =========================================================================== */
515: extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM, Mat);
516: extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM, Mat);
517: extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM, Mat);
518: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM, Mat);
519: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM, Mat);
520: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM, Mat);
521: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM, Mat);
522: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM, Mat);
523: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM, Mat);
524: extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM, Mat);
525: extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM, Mat);
526: extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM, Mat);
527: extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM, Mat);
528: extern PetscErrorCode DMCreateMatrix_DA_IS(DM, Mat);

530: static PetscErrorCode MatView_MPI_DA(Mat A, PetscViewer viewer)
531: {
532:   DM                da;
533:   const char       *prefix;
534:   Mat               AA, Anatural;
535:   AO                ao;
536:   PetscInt          rstart, rend, *petsc, i;
537:   IS                is;
538:   MPI_Comm          comm;
539:   PetscViewerFormat format;
540:   PetscBool         flag;

542:   PetscFunctionBegin;
543:   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
544:   PetscCall(PetscViewerGetFormat(viewer, &format));
545:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(PETSC_SUCCESS);

547:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
548:   PetscCall(MatGetDM(A, &da));
549:   PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");

551:   PetscCall(DMDAGetAO(da, &ao));
552:   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
553:   PetscCall(PetscMalloc1(rend - rstart, &petsc));
554:   for (i = rstart; i < rend; i++) petsc[i - rstart] = i;
555:   PetscCall(AOApplicationToPetsc(ao, rend - rstart, petsc));
556:   PetscCall(ISCreateGeneral(comm, rend - rstart, petsc, PETSC_OWN_POINTER, &is));

558:   /* call viewer on natural ordering */
559:   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPISELL, &flag));
560:   if (flag) {
561:     PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &AA));
562:     A = AA;
563:   }
564:   PetscCall(MatCreateSubMatrix(A, is, is, MAT_INITIAL_MATRIX, &Anatural));
565:   PetscCall(ISDestroy(&is));
566:   PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A, &prefix));
567:   PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural, prefix));
568:   PetscCall(PetscObjectSetName((PetscObject)Anatural, ((PetscObject)A)->name));
569:   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
570:   PetscCall(MatView(Anatural, viewer));
571:   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
572:   PetscCall(MatDestroy(&Anatural));
573:   if (flag) PetscCall(MatDestroy(&AA));
574:   PetscFunctionReturn(PETSC_SUCCESS);
575: }

577: static PetscErrorCode MatLoad_MPI_DA(Mat A, PetscViewer viewer)
578: {
579:   DM       da;
580:   Mat      Anatural, Aapp;
581:   AO       ao;
582:   PetscInt rstart, rend, *app, i, m, n, M, N;
583:   IS       is;
584:   MPI_Comm comm;

586:   PetscFunctionBegin;
587:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
588:   PetscCall(MatGetDM(A, &da));
589:   PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");

591:   /* Load the matrix in natural ordering */
592:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Anatural));
593:   PetscCall(MatSetType(Anatural, ((PetscObject)A)->type_name));
594:   PetscCall(MatGetSize(A, &M, &N));
595:   PetscCall(MatGetLocalSize(A, &m, &n));
596:   PetscCall(MatSetSizes(Anatural, m, n, M, N));
597:   PetscCall(MatLoad(Anatural, viewer));

599:   /* Map natural ordering to application ordering and create IS */
600:   PetscCall(DMDAGetAO(da, &ao));
601:   PetscCall(MatGetOwnershipRange(Anatural, &rstart, &rend));
602:   PetscCall(PetscMalloc1(rend - rstart, &app));
603:   for (i = rstart; i < rend; i++) app[i - rstart] = i;
604:   PetscCall(AOPetscToApplication(ao, rend - rstart, app));
605:   PetscCall(ISCreateGeneral(comm, rend - rstart, app, PETSC_OWN_POINTER, &is));

607:   /* Do permutation and replace header */
608:   PetscCall(MatCreateSubMatrix(Anatural, is, is, MAT_INITIAL_MATRIX, &Aapp));
609:   PetscCall(MatHeaderReplace(A, &Aapp));
610:   PetscCall(ISDestroy(&is));
611:   PetscCall(MatDestroy(&Anatural));
612:   PetscFunctionReturn(PETSC_SUCCESS);
613: }

615: PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
616: {
617:   PetscInt    dim, dof, nx, ny, nz, dims[3], starts[3], M, N, P;
618:   Mat         A;
619:   MPI_Comm    comm;
620:   MatType     Atype;
621:   MatType     mtype;
622:   PetscMPIInt size;
623:   DM_DA      *dd    = (DM_DA *)da->data;
624:   void (*aij)(void) = NULL, (*baij)(void) = NULL, (*sbaij)(void) = NULL, (*sell)(void) = NULL, (*is)(void) = NULL;

626:   PetscFunctionBegin;
627:   PetscCall(MatInitializePackage());
628:   mtype = da->mattype;

630:   /*
631:                                   m
632:           ------------------------------------------------------
633:          |                                                     |
634:          |                                                     |
635:          |               ----------------------                |
636:          |               |                    |                |
637:       n  |           ny  |                    |                |
638:          |               |                    |                |
639:          |               .---------------------                |
640:          |             (xs,ys)     nx                          |
641:          |            .                                        |
642:          |         (gxs,gys)                                   |
643:          |                                                     |
644:           -----------------------------------------------------
645:   */

647:   /*
648:          nc - number of components per grid point
649:          col - number of colors needed in one direction for single component problem
650:   */
651:   M   = dd->M;
652:   N   = dd->N;
653:   P   = dd->P;
654:   dim = da->dim;
655:   dof = dd->w;
656:   /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */
657:   PetscCall(DMDAGetCorners(da, NULL, NULL, NULL, &nx, &ny, &nz));
658:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
659:   PetscCall(MatCreate(comm, &A));
660:   PetscCall(MatSetSizes(A, dof * nx * ny * nz, dof * nx * ny * nz, dof * M * N * P, dof * M * N * P));
661:   PetscCall(MatSetType(A, mtype));
662:   PetscCall(MatSetFromOptions(A));
663:   if (dof * nx * ny * nz < da->bind_below) {
664:     PetscCall(MatSetBindingPropagates(A, PETSC_TRUE));
665:     PetscCall(MatBindToCPU(A, PETSC_TRUE));
666:   }
667:   PetscCall(MatSetDM(A, da));
668:   if (da->structure_only) PetscCall(MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE));
669:   PetscCall(MatGetType(A, &Atype));
670:   /*
671:      We do not provide a getmatrix function in the DMDA operations because
672:    the basic DMDA does not know about matrices. We think of DMDA as being more
673:    more low-level than matrices. This is kind of cheating but, cause sometimes
674:    we think of DMDA has higher level than matrices.

676:      We could switch based on Atype (or mtype), but we do not since the
677:    specialized setting routines depend only on the particular preallocation
678:    details of the matrix, not the type itself.
679:   */
680:   if (!da->prealloc_skip) { // Flag is likely set when user intends to use MatSetPreallocationCOO()
681:     PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", &aij));
682:     if (!aij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJSetPreallocation_C", &aij));
683:     if (!aij) {
684:       PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIBAIJSetPreallocation_C", &baij));
685:       if (!baij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", &baij));
686:       if (!baij) {
687:         PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISBAIJSetPreallocation_C", &sbaij));
688:         if (!sbaij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSBAIJSetPreallocation_C", &sbaij));
689:         if (!sbaij) {
690:           PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", &sell));
691:           if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", &sell));
692:         }
693:         if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatISSetPreallocation_C", &is));
694:       }
695:     }
696:   }
697:   if (aij) {
698:     if (dim == 1) {
699:       if (dd->ofill) {
700:         PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da, A));
701:       } else {
702:         DMBoundaryType bx;
703:         PetscMPIInt    size;
704:         PetscCall(DMDAGetInfo(da, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &bx, NULL, NULL, NULL));
705:         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));
706:         if (size == 1 && bx == DM_BOUNDARY_NONE) {
707:           PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da, A));
708:         } else {
709:           PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da, A));
710:         }
711:       }
712:     } else if (dim == 2) {
713:       if (dd->ofill) {
714:         PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da, A));
715:       } else {
716:         PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da, A));
717:       }
718:     } else if (dim == 3) {
719:       if (dd->ofill) {
720:         PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da, A));
721:       } else {
722:         PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da, A));
723:       }
724:     }
725:   } else if (baij) {
726:     if (dim == 2) {
727:       PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da, A));
728:     } else if (dim == 3) {
729:       PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da, A));
730:     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
731:   } else if (sbaij) {
732:     if (dim == 2) {
733:       PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da, A));
734:     } else if (dim == 3) {
735:       PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da, A));
736:     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
737:   } else if (sell) {
738:     if (dim == 2) {
739:       PetscCall(DMCreateMatrix_DA_2d_MPISELL(da, A));
740:     } else if (dim == 3) {
741:       PetscCall(DMCreateMatrix_DA_3d_MPISELL(da, A));
742:     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
743:   } else if (is) {
744:     PetscCall(DMCreateMatrix_DA_IS(da, A));
745:   } else { // unknown type or da->prealloc_skip so structural information may be needed, but no prealloc
746:     ISLocalToGlobalMapping ltog;

748:     PetscCall(MatSetBlockSize(A, dof));
749:     PetscCall(MatSetUp(A));
750:     PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
751:     PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog));
752:   }
753:   PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2]));
754:   PetscCall(MatSetStencil(A, dim, dims, starts, dof));
755:   PetscCall(MatSetDM(A, da));
756:   PetscCallMPI(MPI_Comm_size(comm, &size));
757:   if (size > 1) {
758:     /* change viewer to display matrix in natural ordering */
759:     PetscCall(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA));
760:     PetscCall(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA));
761:   }
762:   *J = A;
763:   PetscFunctionReturn(PETSC_SUCCESS);
764: }

766: PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]);

768: PetscErrorCode DMCreateMatrix_DA_IS(DM dm, Mat J)
769: {
770:   DM_DA                 *da = (DM_DA *)dm->data;
771:   Mat                    lJ, P;
772:   ISLocalToGlobalMapping ltog;
773:   IS                     is;
774:   PetscBT                bt;
775:   const PetscInt        *e_loc, *idx;
776:   PetscInt               i, nel, nen, nv, dof, *gidx, n, N;

778:   /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
779:      We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */
780:   PetscFunctionBegin;
781:   dof = da->w;
782:   PetscCall(MatSetBlockSize(J, dof));
783:   PetscCall(DMGetLocalToGlobalMapping(dm, &ltog));

785:   /* flag local elements indices in local DMDA numbering */
786:   PetscCall(ISLocalToGlobalMappingGetSize(ltog, &nv));
787:   PetscCall(PetscBTCreate(nv / dof, &bt));
788:   PetscCall(DMDAGetElements(dm, &nel, &nen, &e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
789:   for (i = 0; i < nel * nen; i++) PetscCall(PetscBTSet(bt, e_loc[i]));

791:   /* filter out (set to -1) the global indices not used by the local elements */
792:   PetscCall(PetscMalloc1(nv / dof, &gidx));
793:   PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog, &idx));
794:   PetscCall(PetscArraycpy(gidx, idx, nv / dof));
795:   PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog, &idx));
796:   for (i = 0; i < nv / dof; i++)
797:     if (!PetscBTLookup(bt, i)) gidx[i] = -1;
798:   PetscCall(PetscBTDestroy(&bt));
799:   PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm), dof, nv / dof, gidx, PETSC_OWN_POINTER, &is));
800:   PetscCall(ISLocalToGlobalMappingCreateIS(is, &ltog));
801:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
802:   PetscCall(ISLocalToGlobalMappingDestroy(&ltog));
803:   PetscCall(ISDestroy(&is));

805:   /* Preallocation */
806:   if (dm->prealloc_skip) {
807:     PetscCall(MatSetUp(J));
808:   } else {
809:     PetscCall(MatISGetLocalMat(J, &lJ));
810:     PetscCall(MatGetLocalToGlobalMapping(lJ, &ltog, NULL));
811:     PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ), &P));
812:     PetscCall(MatSetType(P, MATPREALLOCATOR));
813:     PetscCall(MatSetLocalToGlobalMapping(P, ltog, ltog));
814:     PetscCall(MatGetSize(lJ, &N, NULL));
815:     PetscCall(MatGetLocalSize(lJ, &n, NULL));
816:     PetscCall(MatSetSizes(P, n, n, N, N));
817:     PetscCall(MatSetBlockSize(P, dof));
818:     PetscCall(MatSetUp(P));
819:     for (i = 0; i < nel; i++) PetscCall(MatSetValuesBlockedLocal(P, nen, e_loc + i * nen, nen, e_loc + i * nen, NULL, INSERT_VALUES));
820:     PetscCall(MatPreallocatorPreallocate(P, (PetscBool)!da->prealloc_only, lJ));
821:     PetscCall(MatISRestoreLocalMat(J, &lJ));
822:     PetscCall(DMDARestoreElements(dm, &nel, &nen, &e_loc));
823:     PetscCall(MatDestroy(&P));

825:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
826:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
827:   }
828:   PetscFunctionReturn(PETSC_SUCCESS);
829: }

831: PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da, Mat J)
832: {
833:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p;
834:   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
835:   MPI_Comm               comm;
836:   PetscScalar           *values;
837:   DMBoundaryType         bx, by;
838:   ISLocalToGlobalMapping ltog;
839:   DMDAStencilType        st;

841:   PetscFunctionBegin;
842:   /*
843:          nc - number of components per grid point
844:          col - number of colors needed in one direction for single component problem
845:   */
846:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
847:   col = 2 * s + 1;
848:   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
849:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
850:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

852:   PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
853:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

855:   PetscCall(MatSetBlockSize(J, nc));
856:   /* determine the matrix preallocation information */
857:   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
858:   for (i = xs; i < xs + nx; i++) {
859:     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
860:     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));

862:     for (j = ys; j < ys + ny; j++) {
863:       slot = i - gxs + gnx * (j - gys);

865:       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
866:       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));

868:       cnt = 0;
869:       for (k = 0; k < nc; k++) {
870:         for (l = lstart; l < lend + 1; l++) {
871:           for (p = pstart; p < pend + 1; p++) {
872:             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
873:               cols[cnt++] = k + nc * (slot + gnx * l + p);
874:             }
875:           }
876:         }
877:         rows[k] = k + nc * (slot);
878:       }
879:       PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
880:     }
881:   }
882:   PetscCall(MatSetBlockSize(J, nc));
883:   PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
884:   PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
885:   MatPreallocateEnd(dnz, onz);

887:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

889:   /*
890:     For each node in the grid: we get the neighbors in the local (on processor ordering
891:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
892:     PETSc ordering.
893:   */
894:   if (!da->prealloc_only) {
895:     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
896:     for (i = xs; i < xs + nx; i++) {
897:       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
898:       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));

900:       for (j = ys; j < ys + ny; j++) {
901:         slot = i - gxs + gnx * (j - gys);

903:         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
904:         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));

906:         cnt = 0;
907:         for (k = 0; k < nc; k++) {
908:           for (l = lstart; l < lend + 1; l++) {
909:             for (p = pstart; p < pend + 1; p++) {
910:               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
911:                 cols[cnt++] = k + nc * (slot + gnx * l + p);
912:               }
913:             }
914:           }
915:           rows[k] = k + nc * (slot);
916:         }
917:         PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
918:       }
919:     }
920:     PetscCall(PetscFree(values));
921:     /* do not copy values to GPU since they are all zero and not yet needed there */
922:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
923:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
924:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
925:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
926:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
927:   }
928:   PetscCall(PetscFree2(rows, cols));
929:   PetscFunctionReturn(PETSC_SUCCESS);
930: }

932: PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da, Mat J)
933: {
934:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
935:   PetscInt               m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
936:   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
937:   MPI_Comm               comm;
938:   PetscScalar           *values;
939:   DMBoundaryType         bx, by, bz;
940:   ISLocalToGlobalMapping ltog;
941:   DMDAStencilType        st;

943:   PetscFunctionBegin;
944:   /*
945:          nc - number of components per grid point
946:          col - number of colors needed in one direction for single component problem
947:   */
948:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
949:   col = 2 * s + 1;
950:   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
951:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
952:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

954:   PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
955:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

957:   PetscCall(MatSetBlockSize(J, nc));
958:   /* determine the matrix preallocation information */
959:   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
960:   for (i = xs; i < xs + nx; i++) {
961:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
962:     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
963:     for (j = ys; j < ys + ny; j++) {
964:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
965:       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
966:       for (k = zs; k < zs + nz; k++) {
967:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
968:         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

970:         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

972:         cnt = 0;
973:         for (l = 0; l < nc; l++) {
974:           for (ii = istart; ii < iend + 1; ii++) {
975:             for (jj = jstart; jj < jend + 1; jj++) {
976:               for (kk = kstart; kk < kend + 1; kk++) {
977:                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
978:                   cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
979:                 }
980:               }
981:             }
982:           }
983:           rows[l] = l + nc * (slot);
984:         }
985:         PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
986:       }
987:     }
988:   }
989:   PetscCall(MatSetBlockSize(J, nc));
990:   PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
991:   PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
992:   MatPreallocateEnd(dnz, onz);
993:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

995:   /*
996:     For each node in the grid: we get the neighbors in the local (on processor ordering
997:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
998:     PETSc ordering.
999:   */
1000:   if (!da->prealloc_only) {
1001:     PetscCall(PetscCalloc1(col * col * col * nc * nc * nc, &values));
1002:     for (i = xs; i < xs + nx; i++) {
1003:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1004:       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1005:       for (j = ys; j < ys + ny; j++) {
1006:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1007:         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1008:         for (k = zs; k < zs + nz; k++) {
1009:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1010:           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

1012:           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

1014:           cnt = 0;
1015:           for (l = 0; l < nc; l++) {
1016:             for (ii = istart; ii < iend + 1; ii++) {
1017:               for (jj = jstart; jj < jend + 1; jj++) {
1018:                 for (kk = kstart; kk < kend + 1; kk++) {
1019:                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1020:                     cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1021:                   }
1022:                 }
1023:               }
1024:             }
1025:             rows[l] = l + nc * (slot);
1026:           }
1027:           PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
1028:         }
1029:       }
1030:     }
1031:     PetscCall(PetscFree(values));
1032:     /* do not copy values to GPU since they are all zero and not yet needed there */
1033:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1034:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1035:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1036:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1037:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1038:   }
1039:   PetscCall(PetscFree2(rows, cols));
1040:   PetscFunctionReturn(PETSC_SUCCESS);
1041: }

1043: PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da, Mat J)
1044: {
1045:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, M, N;
1046:   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
1047:   MPI_Comm               comm;
1048:   DMBoundaryType         bx, by;
1049:   ISLocalToGlobalMapping ltog, mltog;
1050:   DMDAStencilType        st;
1051:   PetscBool              removedups = PETSC_FALSE, alreadyboundtocpu = PETSC_TRUE;

1053:   PetscFunctionBegin;
1054:   /*
1055:          nc - number of components per grid point
1056:          col - number of colors needed in one direction for single component problem
1057:   */
1058:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1059:   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1060:   col = 2 * s + 1;
1061:   /*
1062:        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1063:        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1064:   */
1065:   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1066:   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1067:   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1068:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1069:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

1071:   PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
1072:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

1074:   PetscCall(MatSetBlockSize(J, nc));
1075:   /* determine the matrix preallocation information */
1076:   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1077:   for (i = xs; i < xs + nx; i++) {
1078:     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1079:     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));

1081:     for (j = ys; j < ys + ny; j++) {
1082:       slot = i - gxs + gnx * (j - gys);

1084:       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1085:       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));

1087:       cnt = 0;
1088:       for (k = 0; k < nc; k++) {
1089:         for (l = lstart; l < lend + 1; l++) {
1090:           for (p = pstart; p < pend + 1; p++) {
1091:             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1092:               cols[cnt++] = k + nc * (slot + gnx * l + p);
1093:             }
1094:           }
1095:         }
1096:         rows[k] = k + nc * (slot);
1097:       }
1098:       if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1099:       else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1100:     }
1101:   }
1102:   PetscCall(MatSetBlockSize(J, nc));
1103:   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1104:   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1105:   MatPreallocateEnd(dnz, onz);
1106:   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1107:   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

1109:   /*
1110:     For each node in the grid: we get the neighbors in the local (on processor ordering
1111:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1112:     PETSc ordering.
1113:   */
1114:   if (!da->prealloc_only) {
1115:     for (i = xs; i < xs + nx; i++) {
1116:       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1117:       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));

1119:       for (j = ys; j < ys + ny; j++) {
1120:         slot = i - gxs + gnx * (j - gys);

1122:         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1123:         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));

1125:         cnt = 0;
1126:         for (l = lstart; l < lend + 1; l++) {
1127:           for (p = pstart; p < pend + 1; p++) {
1128:             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1129:               cols[cnt++] = nc * (slot + gnx * l + p);
1130:               for (k = 1; k < nc; k++) {
1131:                 cols[cnt] = 1 + cols[cnt - 1];
1132:                 cnt++;
1133:               }
1134:             }
1135:           }
1136:         }
1137:         for (k = 0; k < nc; k++) rows[k] = k + nc * (slot);
1138:         PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1139:       }
1140:     }
1141:     /* do not copy values to GPU since they are all zero and not yet needed there */
1142:     PetscCall(MatBoundToCPU(J, &alreadyboundtocpu));
1143:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1144:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1145:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1146:     if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J, PETSC_FALSE));
1147:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1148:     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1149:   }
1150:   PetscCall(PetscFree2(rows, cols));
1151:   PetscFunctionReturn(PETSC_SUCCESS);
1152: }

1154: PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da, Mat J)
1155: {
1156:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1157:   PetscInt               m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, M, N;
1158:   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
1159:   DM_DA                 *dd = (DM_DA *)da->data;
1160:   PetscInt               ifill_col, *ofill = dd->ofill, *dfill = dd->dfill;
1161:   MPI_Comm               comm;
1162:   DMBoundaryType         bx, by;
1163:   ISLocalToGlobalMapping ltog;
1164:   DMDAStencilType        st;
1165:   PetscBool              removedups = PETSC_FALSE;

1167:   PetscFunctionBegin;
1168:   /*
1169:          nc - number of components per grid point
1170:          col - number of colors needed in one direction for single component problem
1171:   */
1172:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1173:   col = 2 * s + 1;
1174:   /*
1175:        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1176:        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1177:   */
1178:   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1179:   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1180:   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1181:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1182:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

1184:   PetscCall(PetscMalloc1(col * col * nc, &cols));
1185:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

1187:   PetscCall(MatSetBlockSize(J, nc));
1188:   /* determine the matrix preallocation information */
1189:   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1190:   for (i = xs; i < xs + nx; i++) {
1191:     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1192:     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));

1194:     for (j = ys; j < ys + ny; j++) {
1195:       slot = i - gxs + gnx * (j - gys);

1197:       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1198:       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));

1200:       for (k = 0; k < nc; k++) {
1201:         cnt = 0;
1202:         for (l = lstart; l < lend + 1; l++) {
1203:           for (p = pstart; p < pend + 1; p++) {
1204:             if (l || p) {
1205:               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1206:                 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1207:               }
1208:             } else {
1209:               if (dfill) {
1210:                 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1211:               } else {
1212:                 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1213:               }
1214:             }
1215:           }
1216:         }
1217:         row    = k + nc * (slot);
1218:         maxcnt = PetscMax(maxcnt, cnt);
1219:         if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1220:         else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1221:       }
1222:     }
1223:   }
1224:   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1225:   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1226:   MatPreallocateEnd(dnz, onz);
1227:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

1229:   /*
1230:     For each node in the grid: we get the neighbors in the local (on processor ordering
1231:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1232:     PETSc ordering.
1233:   */
1234:   if (!da->prealloc_only) {
1235:     for (i = xs; i < xs + nx; i++) {
1236:       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1237:       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));

1239:       for (j = ys; j < ys + ny; j++) {
1240:         slot = i - gxs + gnx * (j - gys);

1242:         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1243:         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));

1245:         for (k = 0; k < nc; k++) {
1246:           cnt = 0;
1247:           for (l = lstart; l < lend + 1; l++) {
1248:             for (p = pstart; p < pend + 1; p++) {
1249:               if (l || p) {
1250:                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1251:                   for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1252:                 }
1253:               } else {
1254:                 if (dfill) {
1255:                   for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1256:                 } else {
1257:                   for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1258:                 }
1259:               }
1260:             }
1261:           }
1262:           row = k + nc * (slot);
1263:           PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1264:         }
1265:       }
1266:     }
1267:     /* do not copy values to GPU since they are all zero and not yet needed there */
1268:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1269:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1270:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1271:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1272:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1273:   }
1274:   PetscCall(PetscFree(cols));
1275:   PetscFunctionReturn(PETSC_SUCCESS);
1276: }

1278: PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da, Mat J)
1279: {
1280:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1281:   PetscInt               m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
1282:   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
1283:   MPI_Comm               comm;
1284:   DMBoundaryType         bx, by, bz;
1285:   ISLocalToGlobalMapping ltog, mltog;
1286:   DMDAStencilType        st;
1287:   PetscBool              removedups = PETSC_FALSE;

1289:   PetscFunctionBegin;
1290:   /*
1291:          nc - number of components per grid point
1292:          col - number of colors needed in one direction for single component problem
1293:   */
1294:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
1295:   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1296:   col = 2 * s + 1;

1298:   /*
1299:        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1300:        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1301:   */
1302:   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1303:   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1304:   if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;

1306:   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1307:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1308:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

1310:   PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
1311:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

1313:   PetscCall(MatSetBlockSize(J, nc));
1314:   /* determine the matrix preallocation information */
1315:   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
1316:   for (i = xs; i < xs + nx; i++) {
1317:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1318:     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1319:     for (j = ys; j < ys + ny; j++) {
1320:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1321:       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1322:       for (k = zs; k < zs + nz; k++) {
1323:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1324:         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

1326:         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

1328:         cnt = 0;
1329:         for (l = 0; l < nc; l++) {
1330:           for (ii = istart; ii < iend + 1; ii++) {
1331:             for (jj = jstart; jj < jend + 1; jj++) {
1332:               for (kk = kstart; kk < kend + 1; kk++) {
1333:                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1334:                   cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1335:                 }
1336:               }
1337:             }
1338:           }
1339:           rows[l] = l + nc * (slot);
1340:         }
1341:         if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1342:         else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1343:       }
1344:     }
1345:   }
1346:   PetscCall(MatSetBlockSize(J, nc));
1347:   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1348:   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1349:   MatPreallocateEnd(dnz, onz);
1350:   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1351:   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

1353:   /*
1354:     For each node in the grid: we get the neighbors in the local (on processor ordering
1355:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1356:     PETSc ordering.
1357:   */
1358:   if (!da->prealloc_only) {
1359:     for (i = xs; i < xs + nx; i++) {
1360:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1361:       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1362:       for (j = ys; j < ys + ny; j++) {
1363:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1364:         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1365:         for (k = zs; k < zs + nz; k++) {
1366:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1367:           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

1369:           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

1371:           cnt = 0;
1372:           for (kk = kstart; kk < kend + 1; kk++) {
1373:             for (jj = jstart; jj < jend + 1; jj++) {
1374:               for (ii = istart; ii < iend + 1; ii++) {
1375:                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1376:                   cols[cnt++] = nc * (slot + ii + gnx * jj + gnx * gny * kk);
1377:                   for (l = 1; l < nc; l++) {
1378:                     cols[cnt] = 1 + cols[cnt - 1];
1379:                     cnt++;
1380:                   }
1381:                 }
1382:               }
1383:             }
1384:           }
1385:           rows[0] = nc * (slot);
1386:           for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1387:           PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1388:         }
1389:       }
1390:     }
1391:     /* do not copy values to GPU since they are all zero and not yet needed there */
1392:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1393:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1394:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1395:     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1396:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1397:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1398:   }
1399:   PetscCall(PetscFree2(rows, cols));
1400:   PetscFunctionReturn(PETSC_SUCCESS);
1401: }

1403: PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da, Mat J)
1404: {
1405:   DM_DA                 *dd = (DM_DA *)da->data;
1406:   PetscInt               xs, nx, i, j, gxs, gnx, row, k, l;
1407:   PetscInt               m, dim, s, *cols = NULL, nc, cnt, maxcnt = 0, *ocols;
1408:   PetscInt              *ofill = dd->ofill, *dfill = dd->dfill;
1409:   DMBoundaryType         bx;
1410:   ISLocalToGlobalMapping ltog;
1411:   PetscMPIInt            rank, size;

1413:   PetscFunctionBegin;
1414:   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da), &rank));
1415:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));

1417:   /*
1418:          nc - number of components per grid point
1419:   */
1420:   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1421:   PetscCheck(s <= 1, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Matrix creation for 1d not implemented correctly for stencil width larger than 1");
1422:   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1423:   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));

1425:   PetscCall(MatSetBlockSize(J, nc));
1426:   PetscCall(PetscCalloc2(nx * nc, &cols, nx * nc, &ocols));

1428:   /*
1429:         note should be smaller for first and last process with no periodic
1430:         does not handle dfill
1431:   */
1432:   cnt = 0;
1433:   /* coupling with process to the left */
1434:   for (i = 0; i < s; i++) {
1435:     for (j = 0; j < nc; j++) {
1436:       ocols[cnt] = ((rank == 0) ? 0 : (s - i) * (ofill[j + 1] - ofill[j]));
1437:       cols[cnt]  = dfill[j + 1] - dfill[j] + (s + i) * (ofill[j + 1] - ofill[j]);
1438:       if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1439:         if (size > 1) ocols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1440:         else cols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1441:       }
1442:       maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1443:       cnt++;
1444:     }
1445:   }
1446:   for (i = s; i < nx - s; i++) {
1447:     for (j = 0; j < nc; j++) {
1448:       cols[cnt] = dfill[j + 1] - dfill[j] + 2 * s * (ofill[j + 1] - ofill[j]);
1449:       maxcnt    = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1450:       cnt++;
1451:     }
1452:   }
1453:   /* coupling with process to the right */
1454:   for (i = nx - s; i < nx; i++) {
1455:     for (j = 0; j < nc; j++) {
1456:       ocols[cnt] = ((rank == (size - 1)) ? 0 : (i - nx + s + 1) * (ofill[j + 1] - ofill[j]));
1457:       cols[cnt]  = dfill[j + 1] - dfill[j] + (s + nx - i - 1) * (ofill[j + 1] - ofill[j]);
1458:       if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1459:         if (size > 1) ocols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1460:         else cols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1461:       }
1462:       maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1463:       cnt++;
1464:     }
1465:   }

1467:   PetscCall(MatSeqAIJSetPreallocation(J, 0, cols));
1468:   PetscCall(MatMPIAIJSetPreallocation(J, 0, cols, 0, ocols));
1469:   PetscCall(PetscFree2(cols, ocols));

1471:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1472:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

1474:   /*
1475:     For each node in the grid: we get the neighbors in the local (on processor ordering
1476:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1477:     PETSc ordering.
1478:   */
1479:   if (!da->prealloc_only) {
1480:     PetscCall(PetscMalloc1(maxcnt, &cols));
1481:     row = xs * nc;
1482:     /* coupling with process to the left */
1483:     for (i = xs; i < xs + s; i++) {
1484:       for (j = 0; j < nc; j++) {
1485:         cnt = 0;
1486:         if (rank) {
1487:           for (l = 0; l < s; l++) {
1488:             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1489:           }
1490:         }
1491:         if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1492:           for (l = 0; l < s; l++) {
1493:             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (m + i - s - l) * nc + ofill[k];
1494:           }
1495:         }
1496:         if (dfill) {
1497:           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1498:         } else {
1499:           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1500:         }
1501:         for (l = 0; l < s; l++) {
1502:           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1503:         }
1504:         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1505:         row++;
1506:       }
1507:     }
1508:     for (i = xs + s; i < xs + nx - s; i++) {
1509:       for (j = 0; j < nc; j++) {
1510:         cnt = 0;
1511:         for (l = 0; l < s; l++) {
1512:           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1513:         }
1514:         if (dfill) {
1515:           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1516:         } else {
1517:           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1518:         }
1519:         for (l = 0; l < s; l++) {
1520:           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1521:         }
1522:         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1523:         row++;
1524:       }
1525:     }
1526:     /* coupling with process to the right */
1527:     for (i = xs + nx - s; i < xs + nx; i++) {
1528:       for (j = 0; j < nc; j++) {
1529:         cnt = 0;
1530:         for (l = 0; l < s; l++) {
1531:           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1532:         }
1533:         if (dfill) {
1534:           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1535:         } else {
1536:           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1537:         }
1538:         if (rank < size - 1) {
1539:           for (l = 0; l < s; l++) {
1540:             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1541:           }
1542:         }
1543:         if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1544:           for (l = 0; l < s; l++) {
1545:             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s - l - m + 2) * nc + ofill[k];
1546:           }
1547:         }
1548:         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1549:         row++;
1550:       }
1551:     }
1552:     PetscCall(PetscFree(cols));
1553:     /* do not copy values to GPU since they are all zero and not yet needed there */
1554:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1555:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1556:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1557:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1558:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1559:   }
1560:   PetscFunctionReturn(PETSC_SUCCESS);
1561: }

1563: PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da, Mat J)
1564: {
1565:   PetscInt               xs, nx, i, i1, slot, gxs, gnx;
1566:   PetscInt               m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1567:   PetscInt               istart, iend;
1568:   DMBoundaryType         bx;
1569:   ISLocalToGlobalMapping ltog, mltog;

1571:   PetscFunctionBegin;
1572:   /*
1573:          nc - number of components per grid point
1574:          col - number of colors needed in one direction for single component problem
1575:   */
1576:   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1577:   if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1578:   col = 2 * s + 1;

1580:   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1581:   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));

1583:   PetscCall(MatSetBlockSize(J, nc));
1584:   PetscCall(MatSeqAIJSetPreallocation(J, col * nc, NULL));
1585:   PetscCall(MatMPIAIJSetPreallocation(J, col * nc, NULL, col * nc, NULL));

1587:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1588:   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1589:   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

1591:   /*
1592:     For each node in the grid: we get the neighbors in the local (on processor ordering
1593:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1594:     PETSc ordering.
1595:   */
1596:   if (!da->prealloc_only) {
1597:     PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1598:     for (i = xs; i < xs + nx; i++) {
1599:       istart = PetscMax(-s, gxs - i);
1600:       iend   = PetscMin(s, gxs + gnx - i - 1);
1601:       slot   = i - gxs;

1603:       cnt = 0;
1604:       for (i1 = istart; i1 < iend + 1; i1++) {
1605:         cols[cnt++] = nc * (slot + i1);
1606:         for (l = 1; l < nc; l++) {
1607:           cols[cnt] = 1 + cols[cnt - 1];
1608:           cnt++;
1609:         }
1610:       }
1611:       rows[0] = nc * (slot);
1612:       for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1613:       PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1614:     }
1615:     /* do not copy values to GPU since they are all zero and not yet needed there */
1616:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1617:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1618:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1619:     if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1620:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1621:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1622:     PetscCall(PetscFree2(rows, cols));
1623:   }
1624:   PetscFunctionReturn(PETSC_SUCCESS);
1625: }

1627: PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da, Mat J)
1628: {
1629:   PetscInt               xs, nx, i, i1, slot, gxs, gnx;
1630:   PetscInt               m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1631:   PetscInt               istart, iend;
1632:   DMBoundaryType         bx;
1633:   ISLocalToGlobalMapping ltog, mltog;

1635:   PetscFunctionBegin;
1636:   /*
1637:          nc - number of components per grid point
1638:          col - number of colors needed in one direction for single component problem
1639:   */
1640:   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1641:   col = 2 * s + 1;

1643:   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1644:   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));

1646:   PetscCall(MatSetBlockSize(J, nc));
1647:   PetscCall(MatSeqAIJSetTotalPreallocation(J, nx * nc * col * nc));

1649:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1650:   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1651:   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

1653:   /*
1654:     For each node in the grid: we get the neighbors in the local (on processor ordering
1655:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1656:     PETSc ordering.
1657:   */
1658:   if (!da->prealloc_only) {
1659:     PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1660:     for (i = xs; i < xs + nx; i++) {
1661:       istart = PetscMax(-s, gxs - i);
1662:       iend   = PetscMin(s, gxs + gnx - i - 1);
1663:       slot   = i - gxs;

1665:       cnt = 0;
1666:       for (i1 = istart; i1 < iend + 1; i1++) {
1667:         cols[cnt++] = nc * (slot + i1);
1668:         for (l = 1; l < nc; l++) {
1669:           cols[cnt] = 1 + cols[cnt - 1];
1670:           cnt++;
1671:         }
1672:       }
1673:       rows[0] = nc * (slot);
1674:       for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1675:       PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1676:     }
1677:     /* do not copy values to GPU since they are all zero and not yet needed there */
1678:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1679:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1680:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1681:     if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1682:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1683:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1684:     PetscCall(PetscFree2(rows, cols));
1685:   }
1686:   PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1687:   PetscFunctionReturn(PETSC_SUCCESS);
1688: }

1690: PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da, Mat J)
1691: {
1692:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1693:   PetscInt               m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1694:   PetscInt               istart, iend, jstart, jend, ii, jj;
1695:   MPI_Comm               comm;
1696:   PetscScalar           *values;
1697:   DMBoundaryType         bx, by;
1698:   DMDAStencilType        st;
1699:   ISLocalToGlobalMapping ltog;

1701:   PetscFunctionBegin;
1702:   /*
1703:      nc - number of components per grid point
1704:      col - number of colors needed in one direction for single component problem
1705:   */
1706:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1707:   col = 2 * s + 1;

1709:   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1710:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1711:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

1713:   PetscCall(PetscMalloc1(col * col * nc * nc, &cols));

1715:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

1717:   /* determine the matrix preallocation information */
1718:   MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1719:   for (i = xs; i < xs + nx; i++) {
1720:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1721:     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1722:     for (j = ys; j < ys + ny; j++) {
1723:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1724:       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1725:       slot   = i - gxs + gnx * (j - gys);

1727:       /* Find block columns in block row */
1728:       cnt = 0;
1729:       for (ii = istart; ii < iend + 1; ii++) {
1730:         for (jj = jstart; jj < jend + 1; jj++) {
1731:           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1732:             cols[cnt++] = slot + ii + gnx * jj;
1733:           }
1734:         }
1735:       }
1736:       PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1737:     }
1738:   }
1739:   PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1740:   PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1741:   MatPreallocateEnd(dnz, onz);

1743:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

1745:   /*
1746:     For each node in the grid: we get the neighbors in the local (on processor ordering
1747:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1748:     PETSc ordering.
1749:   */
1750:   if (!da->prealloc_only) {
1751:     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1752:     for (i = xs; i < xs + nx; i++) {
1753:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1754:       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1755:       for (j = ys; j < ys + ny; j++) {
1756:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1757:         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1758:         slot   = i - gxs + gnx * (j - gys);
1759:         cnt    = 0;
1760:         for (ii = istart; ii < iend + 1; ii++) {
1761:           for (jj = jstart; jj < jend + 1; jj++) {
1762:             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1763:               cols[cnt++] = slot + ii + gnx * jj;
1764:             }
1765:           }
1766:         }
1767:         PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1768:       }
1769:     }
1770:     PetscCall(PetscFree(values));
1771:     /* do not copy values to GPU since they are all zero and not yet needed there */
1772:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1773:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1774:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1775:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1776:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1777:   }
1778:   PetscCall(PetscFree(cols));
1779:   PetscFunctionReturn(PETSC_SUCCESS);
1780: }

1782: PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da, Mat J)
1783: {
1784:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1785:   PetscInt               m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
1786:   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
1787:   MPI_Comm               comm;
1788:   PetscScalar           *values;
1789:   DMBoundaryType         bx, by, bz;
1790:   DMDAStencilType        st;
1791:   ISLocalToGlobalMapping ltog;

1793:   PetscFunctionBegin;
1794:   /*
1795:          nc - number of components per grid point
1796:          col - number of colors needed in one direction for single component problem
1797:   */
1798:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
1799:   col = 2 * s + 1;

1801:   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1802:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1803:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

1805:   PetscCall(PetscMalloc1(col * col * col, &cols));

1807:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

1809:   /* determine the matrix preallocation information */
1810:   MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
1811:   for (i = xs; i < xs + nx; i++) {
1812:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1813:     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1814:     for (j = ys; j < ys + ny; j++) {
1815:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1816:       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1817:       for (k = zs; k < zs + nz; k++) {
1818:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1819:         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

1821:         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

1823:         /* Find block columns in block row */
1824:         cnt = 0;
1825:         for (ii = istart; ii < iend + 1; ii++) {
1826:           for (jj = jstart; jj < jend + 1; jj++) {
1827:             for (kk = kstart; kk < kend + 1; kk++) {
1828:               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1829:                 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1830:               }
1831:             }
1832:           }
1833:         }
1834:         PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1835:       }
1836:     }
1837:   }
1838:   PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1839:   PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1840:   MatPreallocateEnd(dnz, onz);

1842:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

1844:   /*
1845:     For each node in the grid: we get the neighbors in the local (on processor ordering
1846:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1847:     PETSc ordering.
1848:   */
1849:   if (!da->prealloc_only) {
1850:     PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
1851:     for (i = xs; i < xs + nx; i++) {
1852:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1853:       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1854:       for (j = ys; j < ys + ny; j++) {
1855:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1856:         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1857:         for (k = zs; k < zs + nz; k++) {
1858:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1859:           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

1861:           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

1863:           cnt = 0;
1864:           for (ii = istart; ii < iend + 1; ii++) {
1865:             for (jj = jstart; jj < jend + 1; jj++) {
1866:               for (kk = kstart; kk < kend + 1; kk++) {
1867:                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1868:                   cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1869:                 }
1870:               }
1871:             }
1872:           }
1873:           PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1874:         }
1875:       }
1876:     }
1877:     PetscCall(PetscFree(values));
1878:     /* do not copy values to GPU since they are all zero and not yet needed there */
1879:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1880:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1881:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1882:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1883:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1884:   }
1885:   PetscCall(PetscFree(cols));
1886:   PetscFunctionReturn(PETSC_SUCCESS);
1887: }

1889: /*
1890:   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1891:   identify in the local ordering with periodic domain.
1892: */
1893: static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog, PetscInt *row, PetscInt *cnt, PetscInt col[])
1894: {
1895:   PetscInt i, n;

1897:   PetscFunctionBegin;
1898:   PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, 1, row, row));
1899:   PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, *cnt, col, col));
1900:   for (i = 0, n = 0; i < *cnt; i++) {
1901:     if (col[i] >= *row) col[n++] = col[i];
1902:   }
1903:   *cnt = n;
1904:   PetscFunctionReturn(PETSC_SUCCESS);
1905: }

1907: PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da, Mat J)
1908: {
1909:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1910:   PetscInt               m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1911:   PetscInt               istart, iend, jstart, jend, ii, jj;
1912:   MPI_Comm               comm;
1913:   PetscScalar           *values;
1914:   DMBoundaryType         bx, by;
1915:   DMDAStencilType        st;
1916:   ISLocalToGlobalMapping ltog;

1918:   PetscFunctionBegin;
1919:   /*
1920:      nc - number of components per grid point
1921:      col - number of colors needed in one direction for single component problem
1922:   */
1923:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1924:   col = 2 * s + 1;

1926:   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1927:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1928:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

1930:   PetscCall(PetscMalloc1(col * col * nc * nc, &cols));

1932:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

1934:   /* determine the matrix preallocation information */
1935:   MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1936:   for (i = xs; i < xs + nx; i++) {
1937:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1938:     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1939:     for (j = ys; j < ys + ny; j++) {
1940:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1941:       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1942:       slot   = i - gxs + gnx * (j - gys);

1944:       /* Find block columns in block row */
1945:       cnt = 0;
1946:       for (ii = istart; ii < iend + 1; ii++) {
1947:         for (jj = jstart; jj < jend + 1; jj++) {
1948:           if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1949:         }
1950:       }
1951:       PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
1952:       PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
1953:     }
1954:   }
1955:   PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
1956:   PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1957:   MatPreallocateEnd(dnz, onz);

1959:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

1961:   /*
1962:     For each node in the grid: we get the neighbors in the local (on processor ordering
1963:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1964:     PETSc ordering.
1965:   */
1966:   if (!da->prealloc_only) {
1967:     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1968:     for (i = xs; i < xs + nx; i++) {
1969:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1970:       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1971:       for (j = ys; j < ys + ny; j++) {
1972:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1973:         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1974:         slot   = i - gxs + gnx * (j - gys);

1976:         /* Find block columns in block row */
1977:         cnt = 0;
1978:         for (ii = istart; ii < iend + 1; ii++) {
1979:           for (jj = jstart; jj < jend + 1; jj++) {
1980:             if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1981:           }
1982:         }
1983:         PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
1984:         PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1985:       }
1986:     }
1987:     PetscCall(PetscFree(values));
1988:     /* do not copy values to GPU since they are all zero and not yet needed there */
1989:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1990:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1991:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1992:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1993:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1994:   }
1995:   PetscCall(PetscFree(cols));
1996:   PetscFunctionReturn(PETSC_SUCCESS);
1997: }

1999: PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da, Mat J)
2000: {
2001:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2002:   PetscInt               m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
2003:   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
2004:   MPI_Comm               comm;
2005:   PetscScalar           *values;
2006:   DMBoundaryType         bx, by, bz;
2007:   DMDAStencilType        st;
2008:   ISLocalToGlobalMapping ltog;

2010:   PetscFunctionBegin;
2011:   /*
2012:      nc - number of components per grid point
2013:      col - number of colors needed in one direction for single component problem
2014:   */
2015:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
2016:   col = 2 * s + 1;

2018:   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2019:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2020:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

2022:   /* create the matrix */
2023:   PetscCall(PetscMalloc1(col * col * col, &cols));

2025:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

2027:   /* determine the matrix preallocation information */
2028:   MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
2029:   for (i = xs; i < xs + nx; i++) {
2030:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2031:     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2032:     for (j = ys; j < ys + ny; j++) {
2033:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2034:       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2035:       for (k = zs; k < zs + nz; k++) {
2036:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2037:         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

2039:         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

2041:         /* Find block columns in block row */
2042:         cnt = 0;
2043:         for (ii = istart; ii < iend + 1; ii++) {
2044:           for (jj = jstart; jj < jend + 1; jj++) {
2045:             for (kk = kstart; kk < kend + 1; kk++) {
2046:               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2047:             }
2048:           }
2049:         }
2050:         PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2051:         PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
2052:       }
2053:     }
2054:   }
2055:   PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
2056:   PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
2057:   MatPreallocateEnd(dnz, onz);

2059:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

2061:   /*
2062:     For each node in the grid: we get the neighbors in the local (on processor ordering
2063:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2064:     PETSc ordering.
2065:   */
2066:   if (!da->prealloc_only) {
2067:     PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
2068:     for (i = xs; i < xs + nx; i++) {
2069:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2070:       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2071:       for (j = ys; j < ys + ny; j++) {
2072:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2073:         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2074:         for (k = zs; k < zs + nz; k++) {
2075:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2076:           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

2078:           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

2080:           cnt = 0;
2081:           for (ii = istart; ii < iend + 1; ii++) {
2082:             for (jj = jstart; jj < jend + 1; jj++) {
2083:               for (kk = kstart; kk < kend + 1; kk++) {
2084:                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2085:               }
2086:             }
2087:           }
2088:           PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2089:           PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
2090:         }
2091:       }
2092:     }
2093:     PetscCall(PetscFree(values));
2094:     /* do not copy values to GPU since they are all zero and not yet needed there */
2095:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2096:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2097:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2098:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2099:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2100:   }
2101:   PetscCall(PetscFree(cols));
2102:   PetscFunctionReturn(PETSC_SUCCESS);
2103: }

2105: PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da, Mat J)
2106: {
2107:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2108:   PetscInt               m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, *dnz, *onz;
2109:   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
2110:   DM_DA                 *dd = (DM_DA *)da->data;
2111:   PetscInt               ifill_col, *dfill = dd->dfill, *ofill = dd->ofill;
2112:   MPI_Comm               comm;
2113:   PetscScalar           *values;
2114:   DMBoundaryType         bx, by, bz;
2115:   ISLocalToGlobalMapping ltog;
2116:   DMDAStencilType        st;
2117:   PetscBool              removedups = PETSC_FALSE;

2119:   PetscFunctionBegin;
2120:   /*
2121:          nc - number of components per grid point
2122:          col - number of colors needed in one direction for single component problem
2123:   */
2124:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
2125:   col = 2 * s + 1;

2127:   /*
2128:        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2129:        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2130:   */
2131:   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
2132:   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
2133:   if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;

2135:   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2136:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2137:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

2139:   PetscCall(PetscMalloc1(col * col * col * nc, &cols));
2140:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

2142:   /* determine the matrix preallocation information */
2143:   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);

2145:   PetscCall(MatSetBlockSize(J, nc));
2146:   for (i = xs; i < xs + nx; i++) {
2147:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2148:     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2149:     for (j = ys; j < ys + ny; j++) {
2150:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2151:       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2152:       for (k = zs; k < zs + nz; k++) {
2153:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2154:         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

2156:         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

2158:         for (l = 0; l < nc; l++) {
2159:           cnt = 0;
2160:           for (ii = istart; ii < iend + 1; ii++) {
2161:             for (jj = jstart; jj < jend + 1; jj++) {
2162:               for (kk = kstart; kk < kend + 1; kk++) {
2163:                 if (ii || jj || kk) {
2164:                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2165:                     for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2166:                   }
2167:                 } else {
2168:                   if (dfill) {
2169:                     for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2170:                   } else {
2171:                     for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2172:                   }
2173:                 }
2174:               }
2175:             }
2176:           }
2177:           row    = l + nc * (slot);
2178:           maxcnt = PetscMax(maxcnt, cnt);
2179:           if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2180:           else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2181:         }
2182:       }
2183:     }
2184:   }
2185:   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
2186:   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
2187:   MatPreallocateEnd(dnz, onz);
2188:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

2190:   /*
2191:     For each node in the grid: we get the neighbors in the local (on processor ordering
2192:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2193:     PETSc ordering.
2194:   */
2195:   if (!da->prealloc_only) {
2196:     PetscCall(PetscCalloc1(maxcnt, &values));
2197:     for (i = xs; i < xs + nx; i++) {
2198:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2199:       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2200:       for (j = ys; j < ys + ny; j++) {
2201:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2202:         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2203:         for (k = zs; k < zs + nz; k++) {
2204:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2205:           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

2207:           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

2209:           for (l = 0; l < nc; l++) {
2210:             cnt = 0;
2211:             for (ii = istart; ii < iend + 1; ii++) {
2212:               for (jj = jstart; jj < jend + 1; jj++) {
2213:                 for (kk = kstart; kk < kend + 1; kk++) {
2214:                   if (ii || jj || kk) {
2215:                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2216:                       for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2217:                     }
2218:                   } else {
2219:                     if (dfill) {
2220:                       for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2221:                     } else {
2222:                       for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2223:                     }
2224:                   }
2225:                 }
2226:               }
2227:             }
2228:             row = l + nc * (slot);
2229:             PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, values, INSERT_VALUES));
2230:           }
2231:         }
2232:       }
2233:     }
2234:     PetscCall(PetscFree(values));
2235:     /* do not copy values to GPU since they are all zero and not yet needed there */
2236:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2237:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2238:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2239:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2240:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2241:   }
2242:   PetscCall(PetscFree(cols));
2243:   PetscFunctionReturn(PETSC_SUCCESS);
2244: }