Actual source code: fdda.c
1: #include <petsc/private/dmdaimpl.h>
2: #include <petscmat.h>
3: #include <petscbt.h>
5: extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM, ISColoringType, ISColoring *);
6: extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM, ISColoringType, ISColoring *);
7: extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM, ISColoringType, ISColoring *);
8: extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM, ISColoringType, ISColoring *);
10: /*
11: For ghost i that may be negative or greater than the upper bound this
12: maps it into the 0:m-1 range using periodicity
13: */
14: #define SetInRange(i, m) ((i < 0) ? m + i : ((i >= m) ? i - m : i))
16: static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill, PetscInt w, PetscInt **rfill)
17: {
18: PetscInt i, j, nz, *fill;
20: PetscFunctionBegin;
21: if (!dfill) PetscFunctionReturn(PETSC_SUCCESS);
23: /* count number nonzeros */
24: nz = 0;
25: for (i = 0; i < w; i++) {
26: for (j = 0; j < w; j++) {
27: if (dfill[w * i + j]) nz++;
28: }
29: }
30: PetscCall(PetscMalloc1(nz + w + 1, &fill));
31: /* construct modified CSR storage of nonzero structure */
32: /* fill[0 -- w] marks starts of each row of column indices (and end of last row)
33: so fill[1] - fill[0] gives number of nonzeros in first row etc */
34: nz = w + 1;
35: for (i = 0; i < w; i++) {
36: fill[i] = nz;
37: for (j = 0; j < w; j++) {
38: if (dfill[w * i + j]) {
39: fill[nz] = j;
40: nz++;
41: }
42: }
43: }
44: fill[w] = nz;
46: *rfill = fill;
47: PetscFunctionReturn(PETSC_SUCCESS);
48: }
50: static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse, PetscInt w, PetscInt **rfill)
51: {
52: PetscInt nz;
54: PetscFunctionBegin;
55: if (!dfillsparse) PetscFunctionReturn(PETSC_SUCCESS);
57: /* Determine number of non-zeros */
58: nz = (dfillsparse[w] - w - 1);
60: /* Allocate space for our copy of the given sparse matrix representation. */
61: PetscCall(PetscMalloc1(nz + w + 1, rfill));
62: PetscCall(PetscArraycpy(*rfill, dfillsparse, nz + w + 1));
63: PetscFunctionReturn(PETSC_SUCCESS);
64: }
66: static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
67: {
68: PetscInt i, k, cnt = 1;
70: PetscFunctionBegin;
72: /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
73: columns to the left with any nonzeros in them plus 1 */
74: PetscCall(PetscCalloc1(dd->w, &dd->ofillcols));
75: for (i = 0; i < dd->w; i++) {
76: for (k = dd->ofill[i]; k < dd->ofill[i + 1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
77: }
78: for (i = 0; i < dd->w; i++) {
79: if (dd->ofillcols[i]) dd->ofillcols[i] = cnt++;
80: }
81: PetscFunctionReturn(PETSC_SUCCESS);
82: }
84: /*@
85: DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
86: of the matrix returned by `DMCreateMatrix()`.
88: Logically Collective
90: Input Parameters:
91: + da - the distributed array
92: . dfill - the fill pattern in the diagonal block (may be `NULL`, means use dense block)
93: - ofill - the fill pattern in the off-diagonal blocks
95: Level: developer
97: Notes:
98: This only makes sense when you are doing multicomponent problems but using the
99: `MATMPIAIJ` matrix format
101: The format for `dfill` and `ofill` is a 2 dimensional dof by dof matrix with 1 entries
102: representing coupling and 0 entries for missing coupling. For example
103: .vb
104: dfill[9] = {1, 0, 0,
105: 1, 1, 0,
106: 0, 1, 1}
107: .ve
108: means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
109: itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
110: diagonal block).
112: `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
113: can be represented in the `dfill`, `ofill` format
115: Contributed by\:
116: Glenn Hammond
118: .seealso: [](sec_struct), `DM`, `DMDA`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`, `DMDASetBlockFillsSparse()`
119: @*/
120: PetscErrorCode DMDASetBlockFills(DM da, const PetscInt *dfill, const PetscInt *ofill)
121: {
122: DM_DA *dd = (DM_DA *)da->data;
124: PetscFunctionBegin;
125: /* save the given dfill and ofill information */
126: PetscCall(DMDASetBlockFills_Private(dfill, dd->w, &dd->dfill));
127: PetscCall(DMDASetBlockFills_Private(ofill, dd->w, &dd->ofill));
129: /* count nonzeros in ofill columns */
130: PetscCall(DMDASetBlockFills_Private2(dd));
131: PetscFunctionReturn(PETSC_SUCCESS);
132: }
134: /*@
135: DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
136: of the matrix returned by `DMCreateMatrix()`, using sparse representations
137: of fill patterns.
139: Logically Collective
141: Input Parameters:
142: + da - the distributed array
143: . dfillsparse - the sparse fill pattern in the diagonal block (may be `NULL`, means use dense block)
144: - ofillsparse - the sparse fill pattern in the off-diagonal blocks
146: Level: developer
148: Notes:
149: This only makes sense when you are doing multicomponent problems but using the
150: `MATMPIAIJ` matrix format
152: The format for `dfill` and `ofill` is a sparse representation of a
153: dof-by-dof matrix with 1 entries representing coupling and 0 entries
154: for missing coupling. The sparse representation is a 1 dimensional
155: array of length nz + dof + 1, where nz is the number of non-zeros in
156: the matrix. The first dof entries in the array give the
157: starting array indices of each row's items in the rest of the array,
158: the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
159: and the remaining nz items give the column indices of each of
160: the 1s within the logical 2D matrix. Each row's items within
161: the array are the column indices of the 1s within that row
162: of the 2D matrix. PETSc developers may recognize that this is the
163: same format as that computed by the `DMDASetBlockFills_Private()`
164: function from a dense 2D matrix representation.
166: `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
167: can be represented in the `dfill`, `ofill` format
169: Contributed by\:
170: Philip C. Roth
172: .seealso: [](sec_struct), `DM`, `DMDA`, `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`
173: @*/
174: PetscErrorCode DMDASetBlockFillsSparse(DM da, const PetscInt *dfillsparse, const PetscInt *ofillsparse)
175: {
176: DM_DA *dd = (DM_DA *)da->data;
178: PetscFunctionBegin;
179: /* save the given dfill and ofill information */
180: PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse, dd->w, &dd->dfill));
181: PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse, dd->w, &dd->ofill));
183: /* count nonzeros in ofill columns */
184: PetscCall(DMDASetBlockFills_Private2(dd));
185: PetscFunctionReturn(PETSC_SUCCESS);
186: }
188: PetscErrorCode DMCreateColoring_DA(DM da, ISColoringType ctype, ISColoring *coloring)
189: {
190: PetscInt dim, m, n, p, nc;
191: DMBoundaryType bx, by, bz;
192: MPI_Comm comm;
193: PetscMPIInt size;
194: PetscBool isBAIJ;
195: DM_DA *dd = (DM_DA *)da->data;
197: PetscFunctionBegin;
198: /*
199: m
200: ------------------------------------------------------
201: | |
202: | |
203: | ---------------------- |
204: | | | |
205: n | yn | | |
206: | | | |
207: | .--------------------- |
208: | (xs,ys) xn |
209: | . |
210: | (gxs,gys) |
211: | |
212: -----------------------------------------------------
213: */
215: /*
216: nc - number of components per grid point
217: col - number of colors needed in one direction for single component problem
219: */
220: PetscCall(DMDAGetInfo(da, &dim, NULL, NULL, NULL, &m, &n, &p, &nc, NULL, &bx, &by, &bz, NULL));
222: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
223: PetscCallMPI(MPI_Comm_size(comm, &size));
224: if (ctype == IS_COLORING_LOCAL) {
225: if (size == 1) {
226: ctype = IS_COLORING_GLOBAL;
227: } else {
228: PetscCheck((dim == 1) || !((m == 1 && bx == DM_BOUNDARY_PERIODIC) || (n == 1 && by == DM_BOUNDARY_PERIODIC) || (p == 1 && bz == DM_BOUNDARY_PERIODIC)), PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process");
229: }
230: }
232: /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
233: matrices is for the blocks, not the individual matrix elements */
234: PetscCall(PetscStrbeginswith(da->mattype, MATBAIJ, &isBAIJ));
235: if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATMPIBAIJ, &isBAIJ));
236: if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATSEQBAIJ, &isBAIJ));
237: if (isBAIJ) {
238: dd->w = 1;
239: dd->xs = dd->xs / nc;
240: dd->xe = dd->xe / nc;
241: dd->Xs = dd->Xs / nc;
242: dd->Xe = dd->Xe / nc;
243: }
245: /*
246: We do not provide a getcoloring function in the DMDA operations because
247: the basic DMDA does not know about matrices. We think of DMDA as being
248: more low-level then matrices.
249: */
250: if (dim == 1) PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da, ctype, coloring));
251: else if (dim == 2) PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da, ctype, coloring));
252: else if (dim == 3) PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da, ctype, coloring));
253: else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code", dim);
254: if (isBAIJ) {
255: dd->w = nc;
256: dd->xs = dd->xs * nc;
257: dd->xe = dd->xe * nc;
258: dd->Xs = dd->Xs * nc;
259: dd->Xe = dd->Xe * nc;
260: }
261: PetscFunctionReturn(PETSC_SUCCESS);
262: }
264: PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
265: {
266: PetscInt xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, M, N, dim, s, k, nc, col;
267: PetscInt ncolors = 0;
268: MPI_Comm comm;
269: DMBoundaryType bx, by;
270: DMDAStencilType st;
271: ISColoringValue *colors;
272: DM_DA *dd = (DM_DA *)da->data;
274: PetscFunctionBegin;
275: /*
276: nc - number of components per grid point
277: col - number of colors needed in one direction for single component problem
279: */
280: PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
281: col = 2 * s + 1;
282: PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
283: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
284: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
286: /* special case as taught to us by Paul Hovland */
287: if (st == DMDA_STENCIL_STAR && s == 1) {
288: PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da, ctype, coloring));
289: } else {
290: if (ctype == IS_COLORING_GLOBAL) {
291: if (!dd->localcoloring) {
292: PetscCall(PetscMalloc1(nc * nx * ny, &colors));
293: ii = 0;
294: for (j = ys; j < ys + ny; j++) {
295: for (i = xs; i < xs + nx; i++) {
296: for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((i % col) + col * (j % col));
297: }
298: }
299: ncolors = nc + nc * (col - 1 + col * (col - 1));
300: PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
301: }
302: *coloring = dd->localcoloring;
303: } else if (ctype == IS_COLORING_LOCAL) {
304: if (!dd->ghostedcoloring) {
305: PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
306: ii = 0;
307: for (j = gys; j < gys + gny; j++) {
308: for (i = gxs; i < gxs + gnx; i++) {
309: for (k = 0; k < nc; k++) {
310: /* the complicated stuff is to handle periodic boundaries */
311: colors[ii++] = k + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col));
312: }
313: }
314: }
315: ncolors = nc + nc * (col - 1 + col * (col - 1));
316: PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
317: /* PetscIntView(ncolors,(PetscInt*)colors,0); */
319: PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
320: }
321: *coloring = dd->ghostedcoloring;
322: } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
323: }
324: PetscCall(ISColoringReference(*coloring));
325: PetscFunctionReturn(PETSC_SUCCESS);
326: }
328: PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
329: {
330: PetscInt xs, ys, nx, ny, i, j, gxs, gys, gnx, gny, m, n, p, dim, s, k, nc, col, zs, gzs, ii, l, nz, gnz, M, N, P;
331: PetscInt ncolors;
332: MPI_Comm comm;
333: DMBoundaryType bx, by, bz;
334: DMDAStencilType st;
335: ISColoringValue *colors;
336: DM_DA *dd = (DM_DA *)da->data;
338: PetscFunctionBegin;
339: /*
340: nc - number of components per grid point
341: col - number of colors needed in one direction for single component problem
342: */
343: PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
344: col = 2 * s + 1;
345: PetscCheck(bx != DM_BOUNDARY_PERIODIC || (m % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in X is divisible\n\
346: by 2*stencil_width + 1\n");
347: PetscCheck(by != DM_BOUNDARY_PERIODIC || (n % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Y is divisible\n\
348: by 2*stencil_width + 1\n");
349: PetscCheck(bz != DM_BOUNDARY_PERIODIC || (p % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Z is divisible\n\
350: by 2*stencil_width + 1\n");
352: PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
353: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
354: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
356: /* create the coloring */
357: if (ctype == IS_COLORING_GLOBAL) {
358: if (!dd->localcoloring) {
359: PetscCall(PetscMalloc1(nc * nx * ny * nz, &colors));
360: ii = 0;
361: for (k = zs; k < zs + nz; k++) {
362: for (j = ys; j < ys + ny; j++) {
363: for (i = xs; i < xs + nx; i++) {
364: for (l = 0; l < nc; l++) colors[ii++] = l + nc * ((i % col) + col * (j % col) + col * col * (k % col));
365: }
366: }
367: }
368: ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
369: PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny * nz, colors, PETSC_OWN_POINTER, &dd->localcoloring));
370: }
371: *coloring = dd->localcoloring;
372: } else if (ctype == IS_COLORING_LOCAL) {
373: if (!dd->ghostedcoloring) {
374: PetscCall(PetscMalloc1(nc * gnx * gny * gnz, &colors));
375: ii = 0;
376: for (k = gzs; k < gzs + gnz; k++) {
377: for (j = gys; j < gys + gny; j++) {
378: for (i = gxs; i < gxs + gnx; i++) {
379: for (l = 0; l < nc; l++) {
380: /* the complicated stuff is to handle periodic boundaries */
381: colors[ii++] = l + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col) + col * col * (SetInRange(k, p) % col));
382: }
383: }
384: }
385: }
386: ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
387: PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny * gnz, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
388: PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
389: }
390: *coloring = dd->ghostedcoloring;
391: } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
392: PetscCall(ISColoringReference(*coloring));
393: PetscFunctionReturn(PETSC_SUCCESS);
394: }
396: PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
397: {
398: PetscInt xs, nx, i, i1, gxs, gnx, l, m, M, dim, s, nc, col;
399: PetscInt ncolors;
400: MPI_Comm comm;
401: DMBoundaryType bx;
402: ISColoringValue *colors;
403: DM_DA *dd = (DM_DA *)da->data;
405: PetscFunctionBegin;
406: /*
407: nc - number of components per grid point
408: col - number of colors needed in one direction for single component problem
409: */
410: PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, &M, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
411: col = 2 * s + 1;
412: PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
413: PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
414: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
416: /* create the coloring */
417: if (ctype == IS_COLORING_GLOBAL) {
418: if (!dd->localcoloring) {
419: PetscCall(PetscMalloc1(nc * nx, &colors));
420: if (dd->ofillcols) {
421: PetscInt tc = 0;
422: for (i = 0; i < nc; i++) tc += (PetscInt)(dd->ofillcols[i] > 0);
423: i1 = 0;
424: for (i = xs; i < xs + nx; i++) {
425: for (l = 0; l < nc; l++) {
426: if (dd->ofillcols[l] && (i % col)) {
427: colors[i1++] = nc - 1 + tc * ((i % col) - 1) + dd->ofillcols[l];
428: } else {
429: colors[i1++] = l;
430: }
431: }
432: }
433: ncolors = nc + 2 * s * tc;
434: } else {
435: i1 = 0;
436: for (i = xs; i < xs + nx; i++) {
437: for (l = 0; l < nc; l++) colors[i1++] = l + nc * (i % col);
438: }
439: ncolors = nc + nc * (col - 1);
440: }
441: PetscCall(ISColoringCreate(comm, ncolors, nc * nx, colors, PETSC_OWN_POINTER, &dd->localcoloring));
442: }
443: *coloring = dd->localcoloring;
444: } else if (ctype == IS_COLORING_LOCAL) {
445: if (!dd->ghostedcoloring) {
446: PetscCall(PetscMalloc1(nc * gnx, &colors));
447: i1 = 0;
448: for (i = gxs; i < gxs + gnx; i++) {
449: for (l = 0; l < nc; l++) {
450: /* the complicated stuff is to handle periodic boundaries */
451: colors[i1++] = l + nc * (SetInRange(i, m) % col);
452: }
453: }
454: ncolors = nc + nc * (col - 1);
455: PetscCall(ISColoringCreate(comm, ncolors, nc * gnx, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
456: PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
457: }
458: *coloring = dd->ghostedcoloring;
459: } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
460: PetscCall(ISColoringReference(*coloring));
461: PetscFunctionReturn(PETSC_SUCCESS);
462: }
464: PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
465: {
466: PetscInt xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, dim, s, k, nc;
467: PetscInt ncolors;
468: MPI_Comm comm;
469: DMBoundaryType bx, by;
470: ISColoringValue *colors;
471: DM_DA *dd = (DM_DA *)da->data;
473: PetscFunctionBegin;
474: /*
475: nc - number of components per grid point
476: col - number of colors needed in one direction for single component problem
477: */
478: PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, NULL));
479: PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
480: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
481: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
482: /* create the coloring */
483: if (ctype == IS_COLORING_GLOBAL) {
484: if (!dd->localcoloring) {
485: PetscCall(PetscMalloc1(nc * nx * ny, &colors));
486: ii = 0;
487: for (j = ys; j < ys + ny; j++) {
488: for (i = xs; i < xs + nx; i++) {
489: for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * j + i) % 5);
490: }
491: }
492: ncolors = 5 * nc;
493: PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
494: }
495: *coloring = dd->localcoloring;
496: } else if (ctype == IS_COLORING_LOCAL) {
497: if (!dd->ghostedcoloring) {
498: PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
499: ii = 0;
500: for (j = gys; j < gys + gny; j++) {
501: for (i = gxs; i < gxs + gnx; i++) {
502: for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * SetInRange(j, n) + SetInRange(i, m)) % 5);
503: }
504: }
505: ncolors = 5 * nc;
506: PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
507: PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
508: }
509: *coloring = dd->ghostedcoloring;
510: } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
511: PetscFunctionReturn(PETSC_SUCCESS);
512: }
514: /* =========================================================================== */
515: extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM, Mat);
516: extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM, Mat);
517: extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM, Mat);
518: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM, Mat);
519: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM, Mat);
520: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM, Mat);
521: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM, Mat);
522: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM, Mat);
523: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM, Mat);
524: extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM, Mat);
525: extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM, Mat);
526: extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM, Mat);
527: extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM, Mat);
528: extern PetscErrorCode DMCreateMatrix_DA_IS(DM, Mat);
530: static PetscErrorCode MatView_MPI_DA(Mat A, PetscViewer viewer)
531: {
532: DM da;
533: const char *prefix;
534: Mat AA, Anatural;
535: AO ao;
536: PetscInt rstart, rend, *petsc, i;
537: IS is;
538: MPI_Comm comm;
539: PetscViewerFormat format;
540: PetscBool flag;
542: PetscFunctionBegin;
543: /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
544: PetscCall(PetscViewerGetFormat(viewer, &format));
545: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(PETSC_SUCCESS);
547: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
548: PetscCall(MatGetDM(A, &da));
549: PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");
551: PetscCall(DMDAGetAO(da, &ao));
552: PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
553: PetscCall(PetscMalloc1(rend - rstart, &petsc));
554: for (i = rstart; i < rend; i++) petsc[i - rstart] = i;
555: PetscCall(AOApplicationToPetsc(ao, rend - rstart, petsc));
556: PetscCall(ISCreateGeneral(comm, rend - rstart, petsc, PETSC_OWN_POINTER, &is));
558: /* call viewer on natural ordering */
559: PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPISELL, &flag));
560: if (flag) {
561: PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &AA));
562: A = AA;
563: }
564: PetscCall(MatCreateSubMatrix(A, is, is, MAT_INITIAL_MATRIX, &Anatural));
565: PetscCall(ISDestroy(&is));
566: PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A, &prefix));
567: PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural, prefix));
568: PetscCall(PetscObjectSetName((PetscObject)Anatural, ((PetscObject)A)->name));
569: ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
570: PetscCall(MatView(Anatural, viewer));
571: ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
572: PetscCall(MatDestroy(&Anatural));
573: if (flag) PetscCall(MatDestroy(&AA));
574: PetscFunctionReturn(PETSC_SUCCESS);
575: }
577: static PetscErrorCode MatLoad_MPI_DA(Mat A, PetscViewer viewer)
578: {
579: DM da;
580: Mat Anatural, Aapp;
581: AO ao;
582: PetscInt rstart, rend, *app, i, m, n, M, N;
583: IS is;
584: MPI_Comm comm;
586: PetscFunctionBegin;
587: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
588: PetscCall(MatGetDM(A, &da));
589: PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");
591: /* Load the matrix in natural ordering */
592: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Anatural));
593: PetscCall(MatSetType(Anatural, ((PetscObject)A)->type_name));
594: PetscCall(MatGetSize(A, &M, &N));
595: PetscCall(MatGetLocalSize(A, &m, &n));
596: PetscCall(MatSetSizes(Anatural, m, n, M, N));
597: PetscCall(MatLoad(Anatural, viewer));
599: /* Map natural ordering to application ordering and create IS */
600: PetscCall(DMDAGetAO(da, &ao));
601: PetscCall(MatGetOwnershipRange(Anatural, &rstart, &rend));
602: PetscCall(PetscMalloc1(rend - rstart, &app));
603: for (i = rstart; i < rend; i++) app[i - rstart] = i;
604: PetscCall(AOPetscToApplication(ao, rend - rstart, app));
605: PetscCall(ISCreateGeneral(comm, rend - rstart, app, PETSC_OWN_POINTER, &is));
607: /* Do permutation and replace header */
608: PetscCall(MatCreateSubMatrix(Anatural, is, is, MAT_INITIAL_MATRIX, &Aapp));
609: PetscCall(MatHeaderReplace(A, &Aapp));
610: PetscCall(ISDestroy(&is));
611: PetscCall(MatDestroy(&Anatural));
612: PetscFunctionReturn(PETSC_SUCCESS);
613: }
615: PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
616: {
617: PetscInt dim, dof, nx, ny, nz, dims[3], starts[3], M, N, P;
618: Mat A;
619: MPI_Comm comm;
620: MatType Atype;
621: MatType mtype;
622: PetscMPIInt size;
623: DM_DA *dd = (DM_DA *)da->data;
624: void (*aij)(void) = NULL, (*baij)(void) = NULL, (*sbaij)(void) = NULL, (*sell)(void) = NULL, (*is)(void) = NULL;
626: PetscFunctionBegin;
627: PetscCall(MatInitializePackage());
628: mtype = da->mattype;
630: /*
631: m
632: ------------------------------------------------------
633: | |
634: | |
635: | ---------------------- |
636: | | | |
637: n | ny | | |
638: | | | |
639: | .--------------------- |
640: | (xs,ys) nx |
641: | . |
642: | (gxs,gys) |
643: | |
644: -----------------------------------------------------
645: */
647: /*
648: nc - number of components per grid point
649: col - number of colors needed in one direction for single component problem
650: */
651: M = dd->M;
652: N = dd->N;
653: P = dd->P;
654: dim = da->dim;
655: dof = dd->w;
656: /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */
657: PetscCall(DMDAGetCorners(da, NULL, NULL, NULL, &nx, &ny, &nz));
658: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
659: PetscCall(MatCreate(comm, &A));
660: PetscCall(MatSetSizes(A, dof * nx * ny * nz, dof * nx * ny * nz, dof * M * N * P, dof * M * N * P));
661: PetscCall(MatSetType(A, mtype));
662: PetscCall(MatSetFromOptions(A));
663: if (dof * nx * ny * nz < da->bind_below) {
664: PetscCall(MatSetBindingPropagates(A, PETSC_TRUE));
665: PetscCall(MatBindToCPU(A, PETSC_TRUE));
666: }
667: PetscCall(MatSetDM(A, da));
668: if (da->structure_only) PetscCall(MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE));
669: PetscCall(MatGetType(A, &Atype));
670: /*
671: We do not provide a getmatrix function in the DMDA operations because
672: the basic DMDA does not know about matrices. We think of DMDA as being more
673: more low-level than matrices. This is kind of cheating but, cause sometimes
674: we think of DMDA has higher level than matrices.
676: We could switch based on Atype (or mtype), but we do not since the
677: specialized setting routines depend only on the particular preallocation
678: details of the matrix, not the type itself.
679: */
680: if (!da->prealloc_skip) { // Flag is likely set when user intends to use MatSetPreallocationCOO()
681: PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", &aij));
682: if (!aij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJSetPreallocation_C", &aij));
683: if (!aij) {
684: PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIBAIJSetPreallocation_C", &baij));
685: if (!baij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", &baij));
686: if (!baij) {
687: PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISBAIJSetPreallocation_C", &sbaij));
688: if (!sbaij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSBAIJSetPreallocation_C", &sbaij));
689: if (!sbaij) {
690: PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", &sell));
691: if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", &sell));
692: }
693: if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatISSetPreallocation_C", &is));
694: }
695: }
696: }
697: if (aij) {
698: if (dim == 1) {
699: if (dd->ofill) {
700: PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da, A));
701: } else {
702: DMBoundaryType bx;
703: PetscMPIInt size;
704: PetscCall(DMDAGetInfo(da, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &bx, NULL, NULL, NULL));
705: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));
706: if (size == 1 && bx == DM_BOUNDARY_NONE) {
707: PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da, A));
708: } else {
709: PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da, A));
710: }
711: }
712: } else if (dim == 2) {
713: if (dd->ofill) {
714: PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da, A));
715: } else {
716: PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da, A));
717: }
718: } else if (dim == 3) {
719: if (dd->ofill) {
720: PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da, A));
721: } else {
722: PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da, A));
723: }
724: }
725: } else if (baij) {
726: if (dim == 2) {
727: PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da, A));
728: } else if (dim == 3) {
729: PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da, A));
730: } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
731: } else if (sbaij) {
732: if (dim == 2) {
733: PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da, A));
734: } else if (dim == 3) {
735: PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da, A));
736: } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
737: } else if (sell) {
738: if (dim == 2) {
739: PetscCall(DMCreateMatrix_DA_2d_MPISELL(da, A));
740: } else if (dim == 3) {
741: PetscCall(DMCreateMatrix_DA_3d_MPISELL(da, A));
742: } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
743: } else if (is) {
744: PetscCall(DMCreateMatrix_DA_IS(da, A));
745: } else { // unknown type or da->prealloc_skip so structural information may be needed, but no prealloc
746: ISLocalToGlobalMapping ltog;
748: PetscCall(MatSetBlockSize(A, dof));
749: PetscCall(MatSetUp(A));
750: PetscCall(DMGetLocalToGlobalMapping(da, <og));
751: PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog));
752: }
753: PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2]));
754: PetscCall(MatSetStencil(A, dim, dims, starts, dof));
755: PetscCall(MatSetDM(A, da));
756: PetscCallMPI(MPI_Comm_size(comm, &size));
757: if (size > 1) {
758: /* change viewer to display matrix in natural ordering */
759: PetscCall(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA));
760: PetscCall(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA));
761: }
762: *J = A;
763: PetscFunctionReturn(PETSC_SUCCESS);
764: }
766: PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]);
768: PetscErrorCode DMCreateMatrix_DA_IS(DM dm, Mat J)
769: {
770: DM_DA *da = (DM_DA *)dm->data;
771: Mat lJ, P;
772: ISLocalToGlobalMapping ltog;
773: IS is;
774: PetscBT bt;
775: const PetscInt *e_loc, *idx;
776: PetscInt i, nel, nen, nv, dof, *gidx, n, N;
778: /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
779: We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */
780: PetscFunctionBegin;
781: dof = da->w;
782: PetscCall(MatSetBlockSize(J, dof));
783: PetscCall(DMGetLocalToGlobalMapping(dm, <og));
785: /* flag local elements indices in local DMDA numbering */
786: PetscCall(ISLocalToGlobalMappingGetSize(ltog, &nv));
787: PetscCall(PetscBTCreate(nv / dof, &bt));
788: PetscCall(DMDAGetElements(dm, &nel, &nen, &e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
789: for (i = 0; i < nel * nen; i++) PetscCall(PetscBTSet(bt, e_loc[i]));
791: /* filter out (set to -1) the global indices not used by the local elements */
792: PetscCall(PetscMalloc1(nv / dof, &gidx));
793: PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog, &idx));
794: PetscCall(PetscArraycpy(gidx, idx, nv / dof));
795: PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog, &idx));
796: for (i = 0; i < nv / dof; i++)
797: if (!PetscBTLookup(bt, i)) gidx[i] = -1;
798: PetscCall(PetscBTDestroy(&bt));
799: PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm), dof, nv / dof, gidx, PETSC_OWN_POINTER, &is));
800: PetscCall(ISLocalToGlobalMappingCreateIS(is, <og));
801: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
802: PetscCall(ISLocalToGlobalMappingDestroy(<og));
803: PetscCall(ISDestroy(&is));
805: /* Preallocation */
806: if (dm->prealloc_skip) {
807: PetscCall(MatSetUp(J));
808: } else {
809: PetscCall(MatISGetLocalMat(J, &lJ));
810: PetscCall(MatGetLocalToGlobalMapping(lJ, <og, NULL));
811: PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ), &P));
812: PetscCall(MatSetType(P, MATPREALLOCATOR));
813: PetscCall(MatSetLocalToGlobalMapping(P, ltog, ltog));
814: PetscCall(MatGetSize(lJ, &N, NULL));
815: PetscCall(MatGetLocalSize(lJ, &n, NULL));
816: PetscCall(MatSetSizes(P, n, n, N, N));
817: PetscCall(MatSetBlockSize(P, dof));
818: PetscCall(MatSetUp(P));
819: for (i = 0; i < nel; i++) PetscCall(MatSetValuesBlockedLocal(P, nen, e_loc + i * nen, nen, e_loc + i * nen, NULL, INSERT_VALUES));
820: PetscCall(MatPreallocatorPreallocate(P, (PetscBool)!da->prealloc_only, lJ));
821: PetscCall(MatISRestoreLocalMat(J, &lJ));
822: PetscCall(DMDARestoreElements(dm, &nel, &nen, &e_loc));
823: PetscCall(MatDestroy(&P));
825: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
826: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
827: }
828: PetscFunctionReturn(PETSC_SUCCESS);
829: }
831: PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da, Mat J)
832: {
833: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p;
834: PetscInt lstart, lend, pstart, pend, *dnz, *onz;
835: MPI_Comm comm;
836: PetscScalar *values;
837: DMBoundaryType bx, by;
838: ISLocalToGlobalMapping ltog;
839: DMDAStencilType st;
841: PetscFunctionBegin;
842: /*
843: nc - number of components per grid point
844: col - number of colors needed in one direction for single component problem
845: */
846: PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
847: col = 2 * s + 1;
848: PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
849: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
850: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
852: PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
853: PetscCall(DMGetLocalToGlobalMapping(da, <og));
855: PetscCall(MatSetBlockSize(J, nc));
856: /* determine the matrix preallocation information */
857: MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
858: for (i = xs; i < xs + nx; i++) {
859: pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
860: pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
862: for (j = ys; j < ys + ny; j++) {
863: slot = i - gxs + gnx * (j - gys);
865: lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
866: lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
868: cnt = 0;
869: for (k = 0; k < nc; k++) {
870: for (l = lstart; l < lend + 1; l++) {
871: for (p = pstart; p < pend + 1; p++) {
872: if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
873: cols[cnt++] = k + nc * (slot + gnx * l + p);
874: }
875: }
876: }
877: rows[k] = k + nc * (slot);
878: }
879: PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
880: }
881: }
882: PetscCall(MatSetBlockSize(J, nc));
883: PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
884: PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
885: MatPreallocateEnd(dnz, onz);
887: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
889: /*
890: For each node in the grid: we get the neighbors in the local (on processor ordering
891: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
892: PETSc ordering.
893: */
894: if (!da->prealloc_only) {
895: PetscCall(PetscCalloc1(col * col * nc * nc, &values));
896: for (i = xs; i < xs + nx; i++) {
897: pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
898: pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
900: for (j = ys; j < ys + ny; j++) {
901: slot = i - gxs + gnx * (j - gys);
903: lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
904: lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
906: cnt = 0;
907: for (k = 0; k < nc; k++) {
908: for (l = lstart; l < lend + 1; l++) {
909: for (p = pstart; p < pend + 1; p++) {
910: if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
911: cols[cnt++] = k + nc * (slot + gnx * l + p);
912: }
913: }
914: }
915: rows[k] = k + nc * (slot);
916: }
917: PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
918: }
919: }
920: PetscCall(PetscFree(values));
921: /* do not copy values to GPU since they are all zero and not yet needed there */
922: PetscCall(MatBindToCPU(J, PETSC_TRUE));
923: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
924: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
925: PetscCall(MatBindToCPU(J, PETSC_FALSE));
926: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
927: }
928: PetscCall(PetscFree2(rows, cols));
929: PetscFunctionReturn(PETSC_SUCCESS);
930: }
932: PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da, Mat J)
933: {
934: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
935: PetscInt m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
936: PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
937: MPI_Comm comm;
938: PetscScalar *values;
939: DMBoundaryType bx, by, bz;
940: ISLocalToGlobalMapping ltog;
941: DMDAStencilType st;
943: PetscFunctionBegin;
944: /*
945: nc - number of components per grid point
946: col - number of colors needed in one direction for single component problem
947: */
948: PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
949: col = 2 * s + 1;
950: PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
951: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
952: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
954: PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
955: PetscCall(DMGetLocalToGlobalMapping(da, <og));
957: PetscCall(MatSetBlockSize(J, nc));
958: /* determine the matrix preallocation information */
959: MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
960: for (i = xs; i < xs + nx; i++) {
961: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
962: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
963: for (j = ys; j < ys + ny; j++) {
964: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
965: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
966: for (k = zs; k < zs + nz; k++) {
967: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
968: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
970: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
972: cnt = 0;
973: for (l = 0; l < nc; l++) {
974: for (ii = istart; ii < iend + 1; ii++) {
975: for (jj = jstart; jj < jend + 1; jj++) {
976: for (kk = kstart; kk < kend + 1; kk++) {
977: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
978: cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
979: }
980: }
981: }
982: }
983: rows[l] = l + nc * (slot);
984: }
985: PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
986: }
987: }
988: }
989: PetscCall(MatSetBlockSize(J, nc));
990: PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
991: PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
992: MatPreallocateEnd(dnz, onz);
993: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
995: /*
996: For each node in the grid: we get the neighbors in the local (on processor ordering
997: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
998: PETSc ordering.
999: */
1000: if (!da->prealloc_only) {
1001: PetscCall(PetscCalloc1(col * col * col * nc * nc * nc, &values));
1002: for (i = xs; i < xs + nx; i++) {
1003: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1004: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1005: for (j = ys; j < ys + ny; j++) {
1006: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1007: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1008: for (k = zs; k < zs + nz; k++) {
1009: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1010: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1012: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1014: cnt = 0;
1015: for (l = 0; l < nc; l++) {
1016: for (ii = istart; ii < iend + 1; ii++) {
1017: for (jj = jstart; jj < jend + 1; jj++) {
1018: for (kk = kstart; kk < kend + 1; kk++) {
1019: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1020: cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1021: }
1022: }
1023: }
1024: }
1025: rows[l] = l + nc * (slot);
1026: }
1027: PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
1028: }
1029: }
1030: }
1031: PetscCall(PetscFree(values));
1032: /* do not copy values to GPU since they are all zero and not yet needed there */
1033: PetscCall(MatBindToCPU(J, PETSC_TRUE));
1034: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1035: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1036: PetscCall(MatBindToCPU(J, PETSC_FALSE));
1037: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1038: }
1039: PetscCall(PetscFree2(rows, cols));
1040: PetscFunctionReturn(PETSC_SUCCESS);
1041: }
1043: PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da, Mat J)
1044: {
1045: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, M, N;
1046: PetscInt lstart, lend, pstart, pend, *dnz, *onz;
1047: MPI_Comm comm;
1048: DMBoundaryType bx, by;
1049: ISLocalToGlobalMapping ltog, mltog;
1050: DMDAStencilType st;
1051: PetscBool removedups = PETSC_FALSE, alreadyboundtocpu = PETSC_TRUE;
1053: PetscFunctionBegin;
1054: /*
1055: nc - number of components per grid point
1056: col - number of colors needed in one direction for single component problem
1057: */
1058: PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1059: if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1060: col = 2 * s + 1;
1061: /*
1062: With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1063: because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1064: */
1065: if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1066: if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1067: PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1068: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1069: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1071: PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
1072: PetscCall(DMGetLocalToGlobalMapping(da, <og));
1074: PetscCall(MatSetBlockSize(J, nc));
1075: /* determine the matrix preallocation information */
1076: MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1077: for (i = xs; i < xs + nx; i++) {
1078: pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1079: pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1081: for (j = ys; j < ys + ny; j++) {
1082: slot = i - gxs + gnx * (j - gys);
1084: lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1085: lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1087: cnt = 0;
1088: for (k = 0; k < nc; k++) {
1089: for (l = lstart; l < lend + 1; l++) {
1090: for (p = pstart; p < pend + 1; p++) {
1091: if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1092: cols[cnt++] = k + nc * (slot + gnx * l + p);
1093: }
1094: }
1095: }
1096: rows[k] = k + nc * (slot);
1097: }
1098: if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1099: else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1100: }
1101: }
1102: PetscCall(MatSetBlockSize(J, nc));
1103: PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1104: PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1105: MatPreallocateEnd(dnz, onz);
1106: PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1107: if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1109: /*
1110: For each node in the grid: we get the neighbors in the local (on processor ordering
1111: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1112: PETSc ordering.
1113: */
1114: if (!da->prealloc_only) {
1115: for (i = xs; i < xs + nx; i++) {
1116: pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1117: pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1119: for (j = ys; j < ys + ny; j++) {
1120: slot = i - gxs + gnx * (j - gys);
1122: lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1123: lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1125: cnt = 0;
1126: for (l = lstart; l < lend + 1; l++) {
1127: for (p = pstart; p < pend + 1; p++) {
1128: if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1129: cols[cnt++] = nc * (slot + gnx * l + p);
1130: for (k = 1; k < nc; k++) {
1131: cols[cnt] = 1 + cols[cnt - 1];
1132: cnt++;
1133: }
1134: }
1135: }
1136: }
1137: for (k = 0; k < nc; k++) rows[k] = k + nc * (slot);
1138: PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1139: }
1140: }
1141: /* do not copy values to GPU since they are all zero and not yet needed there */
1142: PetscCall(MatBoundToCPU(J, &alreadyboundtocpu));
1143: PetscCall(MatBindToCPU(J, PETSC_TRUE));
1144: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1145: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1146: if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J, PETSC_FALSE));
1147: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1148: if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1149: }
1150: PetscCall(PetscFree2(rows, cols));
1151: PetscFunctionReturn(PETSC_SUCCESS);
1152: }
1154: PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da, Mat J)
1155: {
1156: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1157: PetscInt m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, M, N;
1158: PetscInt lstart, lend, pstart, pend, *dnz, *onz;
1159: DM_DA *dd = (DM_DA *)da->data;
1160: PetscInt ifill_col, *ofill = dd->ofill, *dfill = dd->dfill;
1161: MPI_Comm comm;
1162: DMBoundaryType bx, by;
1163: ISLocalToGlobalMapping ltog;
1164: DMDAStencilType st;
1165: PetscBool removedups = PETSC_FALSE;
1167: PetscFunctionBegin;
1168: /*
1169: nc - number of components per grid point
1170: col - number of colors needed in one direction for single component problem
1171: */
1172: PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1173: col = 2 * s + 1;
1174: /*
1175: With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1176: because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1177: */
1178: if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1179: if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1180: PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1181: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1182: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1184: PetscCall(PetscMalloc1(col * col * nc, &cols));
1185: PetscCall(DMGetLocalToGlobalMapping(da, <og));
1187: PetscCall(MatSetBlockSize(J, nc));
1188: /* determine the matrix preallocation information */
1189: MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1190: for (i = xs; i < xs + nx; i++) {
1191: pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1192: pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1194: for (j = ys; j < ys + ny; j++) {
1195: slot = i - gxs + gnx * (j - gys);
1197: lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1198: lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1200: for (k = 0; k < nc; k++) {
1201: cnt = 0;
1202: for (l = lstart; l < lend + 1; l++) {
1203: for (p = pstart; p < pend + 1; p++) {
1204: if (l || p) {
1205: if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1206: for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1207: }
1208: } else {
1209: if (dfill) {
1210: for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1211: } else {
1212: for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1213: }
1214: }
1215: }
1216: }
1217: row = k + nc * (slot);
1218: maxcnt = PetscMax(maxcnt, cnt);
1219: if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1220: else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1221: }
1222: }
1223: }
1224: PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1225: PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1226: MatPreallocateEnd(dnz, onz);
1227: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1229: /*
1230: For each node in the grid: we get the neighbors in the local (on processor ordering
1231: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1232: PETSc ordering.
1233: */
1234: if (!da->prealloc_only) {
1235: for (i = xs; i < xs + nx; i++) {
1236: pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1237: pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1239: for (j = ys; j < ys + ny; j++) {
1240: slot = i - gxs + gnx * (j - gys);
1242: lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1243: lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1245: for (k = 0; k < nc; k++) {
1246: cnt = 0;
1247: for (l = lstart; l < lend + 1; l++) {
1248: for (p = pstart; p < pend + 1; p++) {
1249: if (l || p) {
1250: if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1251: for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1252: }
1253: } else {
1254: if (dfill) {
1255: for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1256: } else {
1257: for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1258: }
1259: }
1260: }
1261: }
1262: row = k + nc * (slot);
1263: PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1264: }
1265: }
1266: }
1267: /* do not copy values to GPU since they are all zero and not yet needed there */
1268: PetscCall(MatBindToCPU(J, PETSC_TRUE));
1269: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1270: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1271: PetscCall(MatBindToCPU(J, PETSC_FALSE));
1272: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1273: }
1274: PetscCall(PetscFree(cols));
1275: PetscFunctionReturn(PETSC_SUCCESS);
1276: }
1278: PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da, Mat J)
1279: {
1280: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1281: PetscInt m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
1282: PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
1283: MPI_Comm comm;
1284: DMBoundaryType bx, by, bz;
1285: ISLocalToGlobalMapping ltog, mltog;
1286: DMDAStencilType st;
1287: PetscBool removedups = PETSC_FALSE;
1289: PetscFunctionBegin;
1290: /*
1291: nc - number of components per grid point
1292: col - number of colors needed in one direction for single component problem
1293: */
1294: PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
1295: if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1296: col = 2 * s + 1;
1298: /*
1299: With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1300: because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1301: */
1302: if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1303: if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1304: if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;
1306: PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1307: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1308: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1310: PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
1311: PetscCall(DMGetLocalToGlobalMapping(da, <og));
1313: PetscCall(MatSetBlockSize(J, nc));
1314: /* determine the matrix preallocation information */
1315: MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
1316: for (i = xs; i < xs + nx; i++) {
1317: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1318: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1319: for (j = ys; j < ys + ny; j++) {
1320: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1321: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1322: for (k = zs; k < zs + nz; k++) {
1323: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1324: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1326: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1328: cnt = 0;
1329: for (l = 0; l < nc; l++) {
1330: for (ii = istart; ii < iend + 1; ii++) {
1331: for (jj = jstart; jj < jend + 1; jj++) {
1332: for (kk = kstart; kk < kend + 1; kk++) {
1333: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1334: cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1335: }
1336: }
1337: }
1338: }
1339: rows[l] = l + nc * (slot);
1340: }
1341: if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1342: else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1343: }
1344: }
1345: }
1346: PetscCall(MatSetBlockSize(J, nc));
1347: PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1348: PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1349: MatPreallocateEnd(dnz, onz);
1350: PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1351: if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1353: /*
1354: For each node in the grid: we get the neighbors in the local (on processor ordering
1355: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1356: PETSc ordering.
1357: */
1358: if (!da->prealloc_only) {
1359: for (i = xs; i < xs + nx; i++) {
1360: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1361: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1362: for (j = ys; j < ys + ny; j++) {
1363: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1364: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1365: for (k = zs; k < zs + nz; k++) {
1366: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1367: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1369: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1371: cnt = 0;
1372: for (kk = kstart; kk < kend + 1; kk++) {
1373: for (jj = jstart; jj < jend + 1; jj++) {
1374: for (ii = istart; ii < iend + 1; ii++) {
1375: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1376: cols[cnt++] = nc * (slot + ii + gnx * jj + gnx * gny * kk);
1377: for (l = 1; l < nc; l++) {
1378: cols[cnt] = 1 + cols[cnt - 1];
1379: cnt++;
1380: }
1381: }
1382: }
1383: }
1384: }
1385: rows[0] = nc * (slot);
1386: for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1387: PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1388: }
1389: }
1390: }
1391: /* do not copy values to GPU since they are all zero and not yet needed there */
1392: PetscCall(MatBindToCPU(J, PETSC_TRUE));
1393: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1394: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1395: if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1396: PetscCall(MatBindToCPU(J, PETSC_FALSE));
1397: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1398: }
1399: PetscCall(PetscFree2(rows, cols));
1400: PetscFunctionReturn(PETSC_SUCCESS);
1401: }
1403: PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da, Mat J)
1404: {
1405: DM_DA *dd = (DM_DA *)da->data;
1406: PetscInt xs, nx, i, j, gxs, gnx, row, k, l;
1407: PetscInt m, dim, s, *cols = NULL, nc, cnt, maxcnt = 0, *ocols;
1408: PetscInt *ofill = dd->ofill, *dfill = dd->dfill;
1409: DMBoundaryType bx;
1410: ISLocalToGlobalMapping ltog;
1411: PetscMPIInt rank, size;
1413: PetscFunctionBegin;
1414: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da), &rank));
1415: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));
1417: /*
1418: nc - number of components per grid point
1419: */
1420: PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1421: PetscCheck(s <= 1, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Matrix creation for 1d not implemented correctly for stencil width larger than 1");
1422: PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1423: PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1425: PetscCall(MatSetBlockSize(J, nc));
1426: PetscCall(PetscCalloc2(nx * nc, &cols, nx * nc, &ocols));
1428: /*
1429: note should be smaller for first and last process with no periodic
1430: does not handle dfill
1431: */
1432: cnt = 0;
1433: /* coupling with process to the left */
1434: for (i = 0; i < s; i++) {
1435: for (j = 0; j < nc; j++) {
1436: ocols[cnt] = ((rank == 0) ? 0 : (s - i) * (ofill[j + 1] - ofill[j]));
1437: cols[cnt] = dfill[j + 1] - dfill[j] + (s + i) * (ofill[j + 1] - ofill[j]);
1438: if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1439: if (size > 1) ocols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1440: else cols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1441: }
1442: maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1443: cnt++;
1444: }
1445: }
1446: for (i = s; i < nx - s; i++) {
1447: for (j = 0; j < nc; j++) {
1448: cols[cnt] = dfill[j + 1] - dfill[j] + 2 * s * (ofill[j + 1] - ofill[j]);
1449: maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1450: cnt++;
1451: }
1452: }
1453: /* coupling with process to the right */
1454: for (i = nx - s; i < nx; i++) {
1455: for (j = 0; j < nc; j++) {
1456: ocols[cnt] = ((rank == (size - 1)) ? 0 : (i - nx + s + 1) * (ofill[j + 1] - ofill[j]));
1457: cols[cnt] = dfill[j + 1] - dfill[j] + (s + nx - i - 1) * (ofill[j + 1] - ofill[j]);
1458: if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1459: if (size > 1) ocols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1460: else cols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1461: }
1462: maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1463: cnt++;
1464: }
1465: }
1467: PetscCall(MatSeqAIJSetPreallocation(J, 0, cols));
1468: PetscCall(MatMPIAIJSetPreallocation(J, 0, cols, 0, ocols));
1469: PetscCall(PetscFree2(cols, ocols));
1471: PetscCall(DMGetLocalToGlobalMapping(da, <og));
1472: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1474: /*
1475: For each node in the grid: we get the neighbors in the local (on processor ordering
1476: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1477: PETSc ordering.
1478: */
1479: if (!da->prealloc_only) {
1480: PetscCall(PetscMalloc1(maxcnt, &cols));
1481: row = xs * nc;
1482: /* coupling with process to the left */
1483: for (i = xs; i < xs + s; i++) {
1484: for (j = 0; j < nc; j++) {
1485: cnt = 0;
1486: if (rank) {
1487: for (l = 0; l < s; l++) {
1488: for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1489: }
1490: }
1491: if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1492: for (l = 0; l < s; l++) {
1493: for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (m + i - s - l) * nc + ofill[k];
1494: }
1495: }
1496: if (dfill) {
1497: for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1498: } else {
1499: for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1500: }
1501: for (l = 0; l < s; l++) {
1502: for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1503: }
1504: PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1505: row++;
1506: }
1507: }
1508: for (i = xs + s; i < xs + nx - s; i++) {
1509: for (j = 0; j < nc; j++) {
1510: cnt = 0;
1511: for (l = 0; l < s; l++) {
1512: for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1513: }
1514: if (dfill) {
1515: for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1516: } else {
1517: for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1518: }
1519: for (l = 0; l < s; l++) {
1520: for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1521: }
1522: PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1523: row++;
1524: }
1525: }
1526: /* coupling with process to the right */
1527: for (i = xs + nx - s; i < xs + nx; i++) {
1528: for (j = 0; j < nc; j++) {
1529: cnt = 0;
1530: for (l = 0; l < s; l++) {
1531: for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1532: }
1533: if (dfill) {
1534: for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1535: } else {
1536: for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1537: }
1538: if (rank < size - 1) {
1539: for (l = 0; l < s; l++) {
1540: for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1541: }
1542: }
1543: if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1544: for (l = 0; l < s; l++) {
1545: for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s - l - m + 2) * nc + ofill[k];
1546: }
1547: }
1548: PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1549: row++;
1550: }
1551: }
1552: PetscCall(PetscFree(cols));
1553: /* do not copy values to GPU since they are all zero and not yet needed there */
1554: PetscCall(MatBindToCPU(J, PETSC_TRUE));
1555: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1556: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1557: PetscCall(MatBindToCPU(J, PETSC_FALSE));
1558: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1559: }
1560: PetscFunctionReturn(PETSC_SUCCESS);
1561: }
1563: PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da, Mat J)
1564: {
1565: PetscInt xs, nx, i, i1, slot, gxs, gnx;
1566: PetscInt m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1567: PetscInt istart, iend;
1568: DMBoundaryType bx;
1569: ISLocalToGlobalMapping ltog, mltog;
1571: PetscFunctionBegin;
1572: /*
1573: nc - number of components per grid point
1574: col - number of colors needed in one direction for single component problem
1575: */
1576: PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1577: if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1578: col = 2 * s + 1;
1580: PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1581: PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1583: PetscCall(MatSetBlockSize(J, nc));
1584: PetscCall(MatSeqAIJSetPreallocation(J, col * nc, NULL));
1585: PetscCall(MatMPIAIJSetPreallocation(J, col * nc, NULL, col * nc, NULL));
1587: PetscCall(DMGetLocalToGlobalMapping(da, <og));
1588: PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1589: if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1591: /*
1592: For each node in the grid: we get the neighbors in the local (on processor ordering
1593: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1594: PETSc ordering.
1595: */
1596: if (!da->prealloc_only) {
1597: PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1598: for (i = xs; i < xs + nx; i++) {
1599: istart = PetscMax(-s, gxs - i);
1600: iend = PetscMin(s, gxs + gnx - i - 1);
1601: slot = i - gxs;
1603: cnt = 0;
1604: for (i1 = istart; i1 < iend + 1; i1++) {
1605: cols[cnt++] = nc * (slot + i1);
1606: for (l = 1; l < nc; l++) {
1607: cols[cnt] = 1 + cols[cnt - 1];
1608: cnt++;
1609: }
1610: }
1611: rows[0] = nc * (slot);
1612: for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1613: PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1614: }
1615: /* do not copy values to GPU since they are all zero and not yet needed there */
1616: PetscCall(MatBindToCPU(J, PETSC_TRUE));
1617: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1618: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1619: if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1620: PetscCall(MatBindToCPU(J, PETSC_FALSE));
1621: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1622: PetscCall(PetscFree2(rows, cols));
1623: }
1624: PetscFunctionReturn(PETSC_SUCCESS);
1625: }
1627: PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da, Mat J)
1628: {
1629: PetscInt xs, nx, i, i1, slot, gxs, gnx;
1630: PetscInt m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1631: PetscInt istart, iend;
1632: DMBoundaryType bx;
1633: ISLocalToGlobalMapping ltog, mltog;
1635: PetscFunctionBegin;
1636: /*
1637: nc - number of components per grid point
1638: col - number of colors needed in one direction for single component problem
1639: */
1640: PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1641: col = 2 * s + 1;
1643: PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1644: PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1646: PetscCall(MatSetBlockSize(J, nc));
1647: PetscCall(MatSeqAIJSetTotalPreallocation(J, nx * nc * col * nc));
1649: PetscCall(DMGetLocalToGlobalMapping(da, <og));
1650: PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1651: if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1653: /*
1654: For each node in the grid: we get the neighbors in the local (on processor ordering
1655: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1656: PETSc ordering.
1657: */
1658: if (!da->prealloc_only) {
1659: PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1660: for (i = xs; i < xs + nx; i++) {
1661: istart = PetscMax(-s, gxs - i);
1662: iend = PetscMin(s, gxs + gnx - i - 1);
1663: slot = i - gxs;
1665: cnt = 0;
1666: for (i1 = istart; i1 < iend + 1; i1++) {
1667: cols[cnt++] = nc * (slot + i1);
1668: for (l = 1; l < nc; l++) {
1669: cols[cnt] = 1 + cols[cnt - 1];
1670: cnt++;
1671: }
1672: }
1673: rows[0] = nc * (slot);
1674: for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1675: PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1676: }
1677: /* do not copy values to GPU since they are all zero and not yet needed there */
1678: PetscCall(MatBindToCPU(J, PETSC_TRUE));
1679: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1680: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1681: if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1682: PetscCall(MatBindToCPU(J, PETSC_FALSE));
1683: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1684: PetscCall(PetscFree2(rows, cols));
1685: }
1686: PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1687: PetscFunctionReturn(PETSC_SUCCESS);
1688: }
1690: PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da, Mat J)
1691: {
1692: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1693: PetscInt m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1694: PetscInt istart, iend, jstart, jend, ii, jj;
1695: MPI_Comm comm;
1696: PetscScalar *values;
1697: DMBoundaryType bx, by;
1698: DMDAStencilType st;
1699: ISLocalToGlobalMapping ltog;
1701: PetscFunctionBegin;
1702: /*
1703: nc - number of components per grid point
1704: col - number of colors needed in one direction for single component problem
1705: */
1706: PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1707: col = 2 * s + 1;
1709: PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1710: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1711: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1713: PetscCall(PetscMalloc1(col * col * nc * nc, &cols));
1715: PetscCall(DMGetLocalToGlobalMapping(da, <og));
1717: /* determine the matrix preallocation information */
1718: MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1719: for (i = xs; i < xs + nx; i++) {
1720: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1721: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1722: for (j = ys; j < ys + ny; j++) {
1723: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1724: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1725: slot = i - gxs + gnx * (j - gys);
1727: /* Find block columns in block row */
1728: cnt = 0;
1729: for (ii = istart; ii < iend + 1; ii++) {
1730: for (jj = jstart; jj < jend + 1; jj++) {
1731: if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1732: cols[cnt++] = slot + ii + gnx * jj;
1733: }
1734: }
1735: }
1736: PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1737: }
1738: }
1739: PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1740: PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1741: MatPreallocateEnd(dnz, onz);
1743: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1745: /*
1746: For each node in the grid: we get the neighbors in the local (on processor ordering
1747: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1748: PETSc ordering.
1749: */
1750: if (!da->prealloc_only) {
1751: PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1752: for (i = xs; i < xs + nx; i++) {
1753: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1754: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1755: for (j = ys; j < ys + ny; j++) {
1756: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1757: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1758: slot = i - gxs + gnx * (j - gys);
1759: cnt = 0;
1760: for (ii = istart; ii < iend + 1; ii++) {
1761: for (jj = jstart; jj < jend + 1; jj++) {
1762: if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1763: cols[cnt++] = slot + ii + gnx * jj;
1764: }
1765: }
1766: }
1767: PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1768: }
1769: }
1770: PetscCall(PetscFree(values));
1771: /* do not copy values to GPU since they are all zero and not yet needed there */
1772: PetscCall(MatBindToCPU(J, PETSC_TRUE));
1773: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1774: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1775: PetscCall(MatBindToCPU(J, PETSC_FALSE));
1776: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1777: }
1778: PetscCall(PetscFree(cols));
1779: PetscFunctionReturn(PETSC_SUCCESS);
1780: }
1782: PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da, Mat J)
1783: {
1784: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1785: PetscInt m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
1786: PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
1787: MPI_Comm comm;
1788: PetscScalar *values;
1789: DMBoundaryType bx, by, bz;
1790: DMDAStencilType st;
1791: ISLocalToGlobalMapping ltog;
1793: PetscFunctionBegin;
1794: /*
1795: nc - number of components per grid point
1796: col - number of colors needed in one direction for single component problem
1797: */
1798: PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
1799: col = 2 * s + 1;
1801: PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1802: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1803: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1805: PetscCall(PetscMalloc1(col * col * col, &cols));
1807: PetscCall(DMGetLocalToGlobalMapping(da, <og));
1809: /* determine the matrix preallocation information */
1810: MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
1811: for (i = xs; i < xs + nx; i++) {
1812: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1813: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1814: for (j = ys; j < ys + ny; j++) {
1815: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1816: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1817: for (k = zs; k < zs + nz; k++) {
1818: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1819: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1821: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1823: /* Find block columns in block row */
1824: cnt = 0;
1825: for (ii = istart; ii < iend + 1; ii++) {
1826: for (jj = jstart; jj < jend + 1; jj++) {
1827: for (kk = kstart; kk < kend + 1; kk++) {
1828: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1829: cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1830: }
1831: }
1832: }
1833: }
1834: PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1835: }
1836: }
1837: }
1838: PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1839: PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1840: MatPreallocateEnd(dnz, onz);
1842: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1844: /*
1845: For each node in the grid: we get the neighbors in the local (on processor ordering
1846: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1847: PETSc ordering.
1848: */
1849: if (!da->prealloc_only) {
1850: PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
1851: for (i = xs; i < xs + nx; i++) {
1852: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1853: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1854: for (j = ys; j < ys + ny; j++) {
1855: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1856: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1857: for (k = zs; k < zs + nz; k++) {
1858: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1859: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1861: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1863: cnt = 0;
1864: for (ii = istart; ii < iend + 1; ii++) {
1865: for (jj = jstart; jj < jend + 1; jj++) {
1866: for (kk = kstart; kk < kend + 1; kk++) {
1867: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1868: cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1869: }
1870: }
1871: }
1872: }
1873: PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1874: }
1875: }
1876: }
1877: PetscCall(PetscFree(values));
1878: /* do not copy values to GPU since they are all zero and not yet needed there */
1879: PetscCall(MatBindToCPU(J, PETSC_TRUE));
1880: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1881: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1882: PetscCall(MatBindToCPU(J, PETSC_FALSE));
1883: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1884: }
1885: PetscCall(PetscFree(cols));
1886: PetscFunctionReturn(PETSC_SUCCESS);
1887: }
1889: /*
1890: This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1891: identify in the local ordering with periodic domain.
1892: */
1893: static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog, PetscInt *row, PetscInt *cnt, PetscInt col[])
1894: {
1895: PetscInt i, n;
1897: PetscFunctionBegin;
1898: PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, 1, row, row));
1899: PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, *cnt, col, col));
1900: for (i = 0, n = 0; i < *cnt; i++) {
1901: if (col[i] >= *row) col[n++] = col[i];
1902: }
1903: *cnt = n;
1904: PetscFunctionReturn(PETSC_SUCCESS);
1905: }
1907: PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da, Mat J)
1908: {
1909: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1910: PetscInt m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1911: PetscInt istart, iend, jstart, jend, ii, jj;
1912: MPI_Comm comm;
1913: PetscScalar *values;
1914: DMBoundaryType bx, by;
1915: DMDAStencilType st;
1916: ISLocalToGlobalMapping ltog;
1918: PetscFunctionBegin;
1919: /*
1920: nc - number of components per grid point
1921: col - number of colors needed in one direction for single component problem
1922: */
1923: PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1924: col = 2 * s + 1;
1926: PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1927: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1928: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1930: PetscCall(PetscMalloc1(col * col * nc * nc, &cols));
1932: PetscCall(DMGetLocalToGlobalMapping(da, <og));
1934: /* determine the matrix preallocation information */
1935: MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1936: for (i = xs; i < xs + nx; i++) {
1937: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1938: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1939: for (j = ys; j < ys + ny; j++) {
1940: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1941: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1942: slot = i - gxs + gnx * (j - gys);
1944: /* Find block columns in block row */
1945: cnt = 0;
1946: for (ii = istart; ii < iend + 1; ii++) {
1947: for (jj = jstart; jj < jend + 1; jj++) {
1948: if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1949: }
1950: }
1951: PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
1952: PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
1953: }
1954: }
1955: PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
1956: PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1957: MatPreallocateEnd(dnz, onz);
1959: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1961: /*
1962: For each node in the grid: we get the neighbors in the local (on processor ordering
1963: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1964: PETSc ordering.
1965: */
1966: if (!da->prealloc_only) {
1967: PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1968: for (i = xs; i < xs + nx; i++) {
1969: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1970: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1971: for (j = ys; j < ys + ny; j++) {
1972: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1973: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1974: slot = i - gxs + gnx * (j - gys);
1976: /* Find block columns in block row */
1977: cnt = 0;
1978: for (ii = istart; ii < iend + 1; ii++) {
1979: for (jj = jstart; jj < jend + 1; jj++) {
1980: if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1981: }
1982: }
1983: PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
1984: PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1985: }
1986: }
1987: PetscCall(PetscFree(values));
1988: /* do not copy values to GPU since they are all zero and not yet needed there */
1989: PetscCall(MatBindToCPU(J, PETSC_TRUE));
1990: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1991: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1992: PetscCall(MatBindToCPU(J, PETSC_FALSE));
1993: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1994: }
1995: PetscCall(PetscFree(cols));
1996: PetscFunctionReturn(PETSC_SUCCESS);
1997: }
1999: PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da, Mat J)
2000: {
2001: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2002: PetscInt m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
2003: PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
2004: MPI_Comm comm;
2005: PetscScalar *values;
2006: DMBoundaryType bx, by, bz;
2007: DMDAStencilType st;
2008: ISLocalToGlobalMapping ltog;
2010: PetscFunctionBegin;
2011: /*
2012: nc - number of components per grid point
2013: col - number of colors needed in one direction for single component problem
2014: */
2015: PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
2016: col = 2 * s + 1;
2018: PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2019: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2020: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
2022: /* create the matrix */
2023: PetscCall(PetscMalloc1(col * col * col, &cols));
2025: PetscCall(DMGetLocalToGlobalMapping(da, <og));
2027: /* determine the matrix preallocation information */
2028: MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
2029: for (i = xs; i < xs + nx; i++) {
2030: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2031: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2032: for (j = ys; j < ys + ny; j++) {
2033: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2034: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2035: for (k = zs; k < zs + nz; k++) {
2036: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2037: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2039: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2041: /* Find block columns in block row */
2042: cnt = 0;
2043: for (ii = istart; ii < iend + 1; ii++) {
2044: for (jj = jstart; jj < jend + 1; jj++) {
2045: for (kk = kstart; kk < kend + 1; kk++) {
2046: if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2047: }
2048: }
2049: }
2050: PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2051: PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
2052: }
2053: }
2054: }
2055: PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
2056: PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
2057: MatPreallocateEnd(dnz, onz);
2059: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
2061: /*
2062: For each node in the grid: we get the neighbors in the local (on processor ordering
2063: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2064: PETSc ordering.
2065: */
2066: if (!da->prealloc_only) {
2067: PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
2068: for (i = xs; i < xs + nx; i++) {
2069: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2070: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2071: for (j = ys; j < ys + ny; j++) {
2072: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2073: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2074: for (k = zs; k < zs + nz; k++) {
2075: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2076: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2078: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2080: cnt = 0;
2081: for (ii = istart; ii < iend + 1; ii++) {
2082: for (jj = jstart; jj < jend + 1; jj++) {
2083: for (kk = kstart; kk < kend + 1; kk++) {
2084: if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2085: }
2086: }
2087: }
2088: PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2089: PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
2090: }
2091: }
2092: }
2093: PetscCall(PetscFree(values));
2094: /* do not copy values to GPU since they are all zero and not yet needed there */
2095: PetscCall(MatBindToCPU(J, PETSC_TRUE));
2096: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2097: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2098: PetscCall(MatBindToCPU(J, PETSC_FALSE));
2099: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2100: }
2101: PetscCall(PetscFree(cols));
2102: PetscFunctionReturn(PETSC_SUCCESS);
2103: }
2105: PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da, Mat J)
2106: {
2107: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2108: PetscInt m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, *dnz, *onz;
2109: PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
2110: DM_DA *dd = (DM_DA *)da->data;
2111: PetscInt ifill_col, *dfill = dd->dfill, *ofill = dd->ofill;
2112: MPI_Comm comm;
2113: PetscScalar *values;
2114: DMBoundaryType bx, by, bz;
2115: ISLocalToGlobalMapping ltog;
2116: DMDAStencilType st;
2117: PetscBool removedups = PETSC_FALSE;
2119: PetscFunctionBegin;
2120: /*
2121: nc - number of components per grid point
2122: col - number of colors needed in one direction for single component problem
2123: */
2124: PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
2125: col = 2 * s + 1;
2127: /*
2128: With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2129: because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2130: */
2131: if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
2132: if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
2133: if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;
2135: PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2136: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2137: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
2139: PetscCall(PetscMalloc1(col * col * col * nc, &cols));
2140: PetscCall(DMGetLocalToGlobalMapping(da, <og));
2142: /* determine the matrix preallocation information */
2143: MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
2145: PetscCall(MatSetBlockSize(J, nc));
2146: for (i = xs; i < xs + nx; i++) {
2147: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2148: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2149: for (j = ys; j < ys + ny; j++) {
2150: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2151: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2152: for (k = zs; k < zs + nz; k++) {
2153: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2154: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2156: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2158: for (l = 0; l < nc; l++) {
2159: cnt = 0;
2160: for (ii = istart; ii < iend + 1; ii++) {
2161: for (jj = jstart; jj < jend + 1; jj++) {
2162: for (kk = kstart; kk < kend + 1; kk++) {
2163: if (ii || jj || kk) {
2164: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2165: for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2166: }
2167: } else {
2168: if (dfill) {
2169: for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2170: } else {
2171: for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2172: }
2173: }
2174: }
2175: }
2176: }
2177: row = l + nc * (slot);
2178: maxcnt = PetscMax(maxcnt, cnt);
2179: if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2180: else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2181: }
2182: }
2183: }
2184: }
2185: PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
2186: PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
2187: MatPreallocateEnd(dnz, onz);
2188: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
2190: /*
2191: For each node in the grid: we get the neighbors in the local (on processor ordering
2192: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2193: PETSc ordering.
2194: */
2195: if (!da->prealloc_only) {
2196: PetscCall(PetscCalloc1(maxcnt, &values));
2197: for (i = xs; i < xs + nx; i++) {
2198: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2199: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2200: for (j = ys; j < ys + ny; j++) {
2201: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2202: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2203: for (k = zs; k < zs + nz; k++) {
2204: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2205: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2207: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2209: for (l = 0; l < nc; l++) {
2210: cnt = 0;
2211: for (ii = istart; ii < iend + 1; ii++) {
2212: for (jj = jstart; jj < jend + 1; jj++) {
2213: for (kk = kstart; kk < kend + 1; kk++) {
2214: if (ii || jj || kk) {
2215: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2216: for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2217: }
2218: } else {
2219: if (dfill) {
2220: for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2221: } else {
2222: for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2223: }
2224: }
2225: }
2226: }
2227: }
2228: row = l + nc * (slot);
2229: PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, values, INSERT_VALUES));
2230: }
2231: }
2232: }
2233: }
2234: PetscCall(PetscFree(values));
2235: /* do not copy values to GPU since they are all zero and not yet needed there */
2236: PetscCall(MatBindToCPU(J, PETSC_TRUE));
2237: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2238: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2239: PetscCall(MatBindToCPU(J, PETSC_FALSE));
2240: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2241: }
2242: PetscCall(PetscFree(cols));
2243: PetscFunctionReturn(PETSC_SUCCESS);
2244: }