Actual source code: fdda.c
1: #include <petsc/private/dmdaimpl.h>
2: #include <petscmat.h>
3: #include <petscbt.h>
5: extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM, ISColoringType, ISColoring *);
6: extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM, ISColoringType, ISColoring *);
7: extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM, ISColoringType, ISColoring *);
8: extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM, ISColoringType, ISColoring *);
10: /*
11: For ghost i that may be negative or greater than the upper bound this
12: maps it into the 0:m-1 range using periodicity
13: */
14: #define SetInRange(i, m) ((i < 0) ? m + i : ((i >= m) ? i - m : i))
16: static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill, PetscInt w, PetscInt **rfill)
17: {
18: PetscInt i, j, nz, *fill;
20: PetscFunctionBegin;
21: if (!dfill) PetscFunctionReturn(PETSC_SUCCESS);
23: /* count number nonzeros */
24: nz = 0;
25: for (i = 0; i < w; i++) {
26: for (j = 0; j < w; j++) {
27: if (dfill[w * i + j]) nz++;
28: }
29: }
30: PetscCall(PetscMalloc1(nz + w + 1, &fill));
31: /* construct modified CSR storage of nonzero structure */
32: /* fill[0 -- w] marks starts of each row of column indices (and end of last row)
33: so fill[1] - fill[0] gives number of nonzeros in first row etc */
34: nz = w + 1;
35: for (i = 0; i < w; i++) {
36: fill[i] = nz;
37: for (j = 0; j < w; j++) {
38: if (dfill[w * i + j]) {
39: fill[nz] = j;
40: nz++;
41: }
42: }
43: }
44: fill[w] = nz;
46: *rfill = fill;
47: PetscFunctionReturn(PETSC_SUCCESS);
48: }
50: static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse, PetscInt w, PetscInt **rfill)
51: {
52: PetscInt nz;
54: PetscFunctionBegin;
55: if (!dfillsparse) PetscFunctionReturn(PETSC_SUCCESS);
57: /* Determine number of non-zeros */
58: nz = (dfillsparse[w] - w - 1);
60: /* Allocate space for our copy of the given sparse matrix representation. */
61: PetscCall(PetscMalloc1(nz + w + 1, rfill));
62: PetscCall(PetscArraycpy(*rfill, dfillsparse, nz + w + 1));
63: PetscFunctionReturn(PETSC_SUCCESS);
64: }
66: static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
67: {
68: PetscInt i, k, cnt = 1;
70: PetscFunctionBegin;
71: /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
72: columns to the left with any nonzeros in them plus 1 */
73: PetscCall(PetscCalloc1(dd->w, &dd->ofillcols));
74: for (i = 0; i < dd->w; i++) {
75: for (k = dd->ofill[i]; k < dd->ofill[i + 1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
76: }
77: for (i = 0; i < dd->w; i++) {
78: if (dd->ofillcols[i]) dd->ofillcols[i] = cnt++;
79: }
80: PetscFunctionReturn(PETSC_SUCCESS);
81: }
83: /*@
84: DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
85: of the matrix returned by `DMCreateMatrix()`.
87: Logically Collective
89: Input Parameters:
90: + da - the `DMDA`
91: . dfill - the fill pattern in the diagonal block (may be `NULL`, means use dense block)
92: - ofill - the fill pattern in the off-diagonal blocks
94: Level: developer
96: Notes:
97: This only makes sense when you are doing multicomponent problems but using the
98: `MATMPIAIJ` matrix format
100: The format for `dfill` and `ofill` is a 2 dimensional dof by dof matrix with 1 entries
101: representing coupling and 0 entries for missing coupling. For example
102: .vb
103: dfill[9] = {1, 0, 0,
104: 1, 1, 0,
105: 0, 1, 1}
106: .ve
107: means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
108: itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
109: diagonal block).
111: `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
112: can be represented in the `dfill`, `ofill` format
114: Contributed by\:
115: Glenn Hammond
117: .seealso: [](sec_struct), `DM`, `DMDA`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`, `DMDASetBlockFillsSparse()`
118: @*/
119: PetscErrorCode DMDASetBlockFills(DM da, const PetscInt *dfill, const PetscInt *ofill)
120: {
121: DM_DA *dd = (DM_DA *)da->data;
123: PetscFunctionBegin;
124: /* save the given dfill and ofill information */
125: PetscCall(DMDASetBlockFills_Private(dfill, dd->w, &dd->dfill));
126: PetscCall(DMDASetBlockFills_Private(ofill, dd->w, &dd->ofill));
128: /* count nonzeros in ofill columns */
129: PetscCall(DMDASetBlockFills_Private2(dd));
130: PetscFunctionReturn(PETSC_SUCCESS);
131: }
133: /*@
134: DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
135: of the matrix returned by `DMCreateMatrix()`, using sparse representations
136: of fill patterns.
138: Logically Collective
140: Input Parameters:
141: + da - the `DMDA`
142: . dfillsparse - the sparse fill pattern in the diagonal block (may be `NULL`, means use dense block)
143: - ofillsparse - the sparse fill pattern in the off-diagonal blocks
145: Level: developer
147: Notes:
148: This only makes sense when you are doing multicomponent problems but using the
149: `MATMPIAIJ` matrix format
151: The format for `dfill` and `ofill` is a sparse representation of a
152: dof-by-dof matrix with 1 entries representing coupling and 0 entries
153: for missing coupling. The sparse representation is a 1 dimensional
154: array of length nz + dof + 1, where nz is the number of non-zeros in
155: the matrix. The first dof entries in the array give the
156: starting array indices of each row's items in the rest of the array,
157: the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
158: and the remaining nz items give the column indices of each of
159: the 1s within the logical 2D matrix. Each row's items within
160: the array are the column indices of the 1s within that row
161: of the 2D matrix. PETSc developers may recognize that this is the
162: same format as that computed by the `DMDASetBlockFills_Private()`
163: function from a dense 2D matrix representation.
165: `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
166: can be represented in the `dfill`, `ofill` format
168: Contributed by\:
169: Philip C. Roth
171: .seealso: [](sec_struct), `DM`, `DMDA`, `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`
172: @*/
173: PetscErrorCode DMDASetBlockFillsSparse(DM da, const PetscInt *dfillsparse, const PetscInt *ofillsparse)
174: {
175: DM_DA *dd = (DM_DA *)da->data;
177: PetscFunctionBegin;
178: /* save the given dfill and ofill information */
179: PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse, dd->w, &dd->dfill));
180: PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse, dd->w, &dd->ofill));
182: /* count nonzeros in ofill columns */
183: PetscCall(DMDASetBlockFills_Private2(dd));
184: PetscFunctionReturn(PETSC_SUCCESS);
185: }
187: PetscErrorCode DMCreateColoring_DA(DM da, ISColoringType ctype, ISColoring *coloring)
188: {
189: PetscInt dim, m, n, p, nc;
190: DMBoundaryType bx, by, bz;
191: MPI_Comm comm;
192: PetscMPIInt size;
193: PetscBool isBAIJ;
194: DM_DA *dd = (DM_DA *)da->data;
196: PetscFunctionBegin;
197: /*
198: m
199: ------------------------------------------------------
200: | |
201: | |
202: | ---------------------- |
203: | | | |
204: n | yn | | |
205: | | | |
206: | .--------------------- |
207: | (xs,ys) xn |
208: | . |
209: | (gxs,gys) |
210: | |
211: -----------------------------------------------------
212: */
214: /*
215: nc - number of components per grid point
216: col - number of colors needed in one direction for single component problem
218: */
219: PetscCall(DMDAGetInfo(da, &dim, NULL, NULL, NULL, &m, &n, &p, &nc, NULL, &bx, &by, &bz, NULL));
221: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
222: PetscCallMPI(MPI_Comm_size(comm, &size));
223: if (ctype == IS_COLORING_LOCAL) {
224: PetscCheck(!((m == 1 && bx == DM_BOUNDARY_PERIODIC) || (n == 1 && by == DM_BOUNDARY_PERIODIC) || (p == 1 && bz == DM_BOUNDARY_PERIODIC)), PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_LOCAL cannot be used for periodic boundary condition having both sides of the domain on the same process");
225: }
227: /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
228: matrices is for the blocks, not the individual matrix elements */
229: PetscCall(PetscStrbeginswith(da->mattype, MATBAIJ, &isBAIJ));
230: if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATMPIBAIJ, &isBAIJ));
231: if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATSEQBAIJ, &isBAIJ));
232: if (isBAIJ) {
233: dd->w = 1;
234: dd->xs = dd->xs / nc;
235: dd->xe = dd->xe / nc;
236: dd->Xs = dd->Xs / nc;
237: dd->Xe = dd->Xe / nc;
238: }
240: /*
241: We do not provide a getcoloring function in the DMDA operations because
242: the basic DMDA does not know about matrices. We think of DMDA as being
243: more low-level then matrices.
244: */
245: if (dim == 1) PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da, ctype, coloring));
246: else if (dim == 2) PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da, ctype, coloring));
247: else if (dim == 3) PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da, ctype, coloring));
248: else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code", dim);
249: if (isBAIJ) {
250: dd->w = nc;
251: dd->xs = dd->xs * nc;
252: dd->xe = dd->xe * nc;
253: dd->Xs = dd->Xs * nc;
254: dd->Xe = dd->Xe * nc;
255: }
256: PetscFunctionReturn(PETSC_SUCCESS);
257: }
259: PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
260: {
261: PetscInt xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, M, N, dim, s, k, nc, col;
262: PetscInt ncolors = 0;
263: MPI_Comm comm;
264: DMBoundaryType bx, by;
265: DMDAStencilType st;
266: ISColoringValue *colors;
267: DM_DA *dd = (DM_DA *)da->data;
269: PetscFunctionBegin;
270: /*
271: nc - number of components per grid point
272: col - number of colors needed in one direction for single component problem
274: */
275: PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
276: col = 2 * s + 1;
277: PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
278: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
279: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
281: /* special case as taught to us by Paul Hovland */
282: if (st == DMDA_STENCIL_STAR && s == 1) {
283: PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da, ctype, coloring));
284: } else {
285: if (ctype == IS_COLORING_GLOBAL) {
286: if (!dd->localcoloring) {
287: PetscCall(PetscMalloc1(nc * nx * ny, &colors));
288: ii = 0;
289: for (j = ys; j < ys + ny; j++) {
290: for (i = xs; i < xs + nx; i++) {
291: for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((i % col) + col * (j % col));
292: }
293: }
294: ncolors = nc + nc * (col - 1 + col * (col - 1));
295: PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
296: }
297: *coloring = dd->localcoloring;
298: } else if (ctype == IS_COLORING_LOCAL) {
299: if (!dd->ghostedcoloring) {
300: PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
301: ii = 0;
302: for (j = gys; j < gys + gny; j++) {
303: for (i = gxs; i < gxs + gnx; i++) {
304: for (k = 0; k < nc; k++) {
305: /* the complicated stuff is to handle periodic boundaries */
306: colors[ii++] = k + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col));
307: }
308: }
309: }
310: ncolors = nc + nc * (col - 1 + col * (col - 1));
311: PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
312: /* PetscIntView(ncolors,(PetscInt*)colors,0); */
314: PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
315: }
316: *coloring = dd->ghostedcoloring;
317: } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
318: }
319: PetscCall(ISColoringReference(*coloring));
320: PetscFunctionReturn(PETSC_SUCCESS);
321: }
323: PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
324: {
325: PetscInt xs, ys, nx, ny, i, j, gxs, gys, gnx, gny, m, n, p, dim, s, k, nc, col, zs, gzs, ii, l, nz, gnz, M, N, P;
326: PetscInt ncolors;
327: MPI_Comm comm;
328: DMBoundaryType bx, by, bz;
329: DMDAStencilType st;
330: ISColoringValue *colors;
331: DM_DA *dd = (DM_DA *)da->data;
333: PetscFunctionBegin;
334: /*
335: nc - number of components per grid point
336: col - number of colors needed in one direction for single component problem
337: */
338: PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
339: col = 2 * s + 1;
340: PetscCheck(bx != DM_BOUNDARY_PERIODIC || (m % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in X is divisible by 2*stencil_width + 1");
341: PetscCheck(by != DM_BOUNDARY_PERIODIC || (n % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Y is divisible by 2*stencil_width + 1");
342: PetscCheck(bz != DM_BOUNDARY_PERIODIC || (p % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Z is divisible by 2*stencil_width + 1");
344: PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
345: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
346: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
348: /* create the coloring */
349: if (ctype == IS_COLORING_GLOBAL) {
350: if (!dd->localcoloring) {
351: PetscCall(PetscMalloc1(nc * nx * ny * nz, &colors));
352: ii = 0;
353: for (k = zs; k < zs + nz; k++) {
354: for (j = ys; j < ys + ny; j++) {
355: for (i = xs; i < xs + nx; i++) {
356: for (l = 0; l < nc; l++) colors[ii++] = l + nc * ((i % col) + col * (j % col) + col * col * (k % col));
357: }
358: }
359: }
360: ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
361: PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny * nz, colors, PETSC_OWN_POINTER, &dd->localcoloring));
362: }
363: *coloring = dd->localcoloring;
364: } else if (ctype == IS_COLORING_LOCAL) {
365: if (!dd->ghostedcoloring) {
366: PetscCall(PetscMalloc1(nc * gnx * gny * gnz, &colors));
367: ii = 0;
368: for (k = gzs; k < gzs + gnz; k++) {
369: for (j = gys; j < gys + gny; j++) {
370: for (i = gxs; i < gxs + gnx; i++) {
371: for (l = 0; l < nc; l++) {
372: /* the complicated stuff is to handle periodic boundaries */
373: colors[ii++] = l + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col) + col * col * (SetInRange(k, p) % col));
374: }
375: }
376: }
377: }
378: ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
379: PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny * gnz, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
380: PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
381: }
382: *coloring = dd->ghostedcoloring;
383: } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
384: PetscCall(ISColoringReference(*coloring));
385: PetscFunctionReturn(PETSC_SUCCESS);
386: }
388: PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
389: {
390: PetscInt xs, nx, i, i1, gxs, gnx, l, m, M, dim, s, nc, col;
391: PetscInt ncolors;
392: MPI_Comm comm;
393: DMBoundaryType bx;
394: ISColoringValue *colors;
395: DM_DA *dd = (DM_DA *)da->data;
397: PetscFunctionBegin;
398: /*
399: nc - number of components per grid point
400: col - number of colors needed in one direction for single component problem
401: */
402: PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, &M, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
403: col = 2 * s + 1;
404: PetscCheck(bx != DM_BOUNDARY_PERIODIC || !(m % col), PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_GLOBAL can only be used for periodic boundary conditions if the number of grid points %" PetscInt_FMT " is divisible by the number of colors %" PetscInt_FMT, m, col);
405: PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
406: PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
407: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
409: /* create the coloring */
410: if (ctype == IS_COLORING_GLOBAL) {
411: if (!dd->localcoloring) {
412: PetscCall(PetscMalloc1(nc * nx, &colors));
413: if (dd->ofillcols) {
414: PetscInt tc = 0;
415: for (i = 0; i < nc; i++) tc += (PetscInt)(dd->ofillcols[i] > 0);
416: i1 = 0;
417: for (i = xs; i < xs + nx; i++) {
418: for (l = 0; l < nc; l++) {
419: if (dd->ofillcols[l] && (i % col)) {
420: colors[i1++] = nc - 1 + tc * ((i % col) - 1) + dd->ofillcols[l];
421: } else {
422: colors[i1++] = l;
423: }
424: }
425: }
426: ncolors = nc + 2 * s * tc;
427: } else {
428: i1 = 0;
429: for (i = xs; i < xs + nx; i++) {
430: for (l = 0; l < nc; l++) colors[i1++] = l + nc * (i % col);
431: }
432: ncolors = nc + nc * (col - 1);
433: }
434: PetscCall(ISColoringCreate(comm, ncolors, nc * nx, colors, PETSC_OWN_POINTER, &dd->localcoloring));
435: }
436: *coloring = dd->localcoloring;
437: } else if (ctype == IS_COLORING_LOCAL) {
438: if (!dd->ghostedcoloring) {
439: PetscCall(PetscMalloc1(nc * gnx, &colors));
440: i1 = 0;
441: for (i = gxs; i < gxs + gnx; i++) {
442: for (l = 0; l < nc; l++) {
443: /* the complicated stuff is to handle periodic boundaries */
444: colors[i1++] = l + nc * (SetInRange(i, m) % col);
445: }
446: }
447: ncolors = nc + nc * (col - 1);
448: PetscCall(ISColoringCreate(comm, ncolors, nc * gnx, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
449: PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
450: }
451: *coloring = dd->ghostedcoloring;
452: } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
453: PetscCall(ISColoringReference(*coloring));
454: PetscFunctionReturn(PETSC_SUCCESS);
455: }
457: PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
458: {
459: PetscInt xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, dim, s, k, nc;
460: PetscInt ncolors;
461: MPI_Comm comm;
462: DMBoundaryType bx, by;
463: ISColoringValue *colors;
464: DM_DA *dd = (DM_DA *)da->data;
466: PetscFunctionBegin;
467: /*
468: nc - number of components per grid point
469: col - number of colors needed in one direction for single component problem
470: */
471: PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, NULL));
472: PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
473: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
474: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
475: /* create the coloring */
476: if (ctype == IS_COLORING_GLOBAL) {
477: if (!dd->localcoloring) {
478: PetscCall(PetscMalloc1(nc * nx * ny, &colors));
479: ii = 0;
480: for (j = ys; j < ys + ny; j++) {
481: for (i = xs; i < xs + nx; i++) {
482: for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * j + i) % 5);
483: }
484: }
485: ncolors = 5 * nc;
486: PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
487: }
488: *coloring = dd->localcoloring;
489: } else if (ctype == IS_COLORING_LOCAL) {
490: if (!dd->ghostedcoloring) {
491: PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
492: ii = 0;
493: for (j = gys; j < gys + gny; j++) {
494: for (i = gxs; i < gxs + gnx; i++) {
495: for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * SetInRange(j, n) + SetInRange(i, m)) % 5);
496: }
497: }
498: ncolors = 5 * nc;
499: PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
500: PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
501: }
502: *coloring = dd->ghostedcoloring;
503: } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
504: PetscFunctionReturn(PETSC_SUCCESS);
505: }
507: /* =========================================================================== */
508: extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM, Mat);
509: extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM, Mat);
510: extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM, Mat);
511: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM, Mat);
512: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM, Mat);
513: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM, Mat);
514: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM, Mat);
515: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM, Mat);
516: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM, Mat);
517: extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM, Mat);
518: extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM, Mat);
519: extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM, Mat);
520: extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM, Mat);
521: extern PetscErrorCode DMCreateMatrix_DA_IS(DM, Mat);
523: static PetscErrorCode MatView_MPI_DA(Mat A, PetscViewer viewer)
524: {
525: DM da;
526: const char *prefix;
527: Mat AA, Anatural;
528: AO ao;
529: PetscInt rstart, rend, *petsc, i;
530: IS is;
531: MPI_Comm comm;
532: PetscViewerFormat format;
533: PetscBool flag;
535: PetscFunctionBegin;
536: /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
537: PetscCall(PetscViewerGetFormat(viewer, &format));
538: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(PETSC_SUCCESS);
540: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
541: PetscCall(MatGetDM(A, &da));
542: PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");
544: PetscCall(DMDAGetAO(da, &ao));
545: PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
546: PetscCall(PetscMalloc1(rend - rstart, &petsc));
547: for (i = rstart; i < rend; i++) petsc[i - rstart] = i;
548: PetscCall(AOApplicationToPetsc(ao, rend - rstart, petsc));
549: PetscCall(ISCreateGeneral(comm, rend - rstart, petsc, PETSC_OWN_POINTER, &is));
551: /* call viewer on natural ordering */
552: PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPISELL, &flag));
553: if (flag) {
554: PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &AA));
555: A = AA;
556: }
557: PetscCall(MatCreateSubMatrix(A, is, is, MAT_INITIAL_MATRIX, &Anatural));
558: PetscCall(ISDestroy(&is));
559: PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A, &prefix));
560: PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural, prefix));
561: PetscCall(PetscObjectSetName((PetscObject)Anatural, ((PetscObject)A)->name));
562: ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
563: PetscCall(MatView(Anatural, viewer));
564: ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
565: PetscCall(MatDestroy(&Anatural));
566: if (flag) PetscCall(MatDestroy(&AA));
567: PetscFunctionReturn(PETSC_SUCCESS);
568: }
570: static PetscErrorCode MatLoad_MPI_DA(Mat A, PetscViewer viewer)
571: {
572: DM da;
573: Mat Anatural, Aapp;
574: AO ao;
575: PetscInt rstart, rend, *app, i, m, n, M, N;
576: IS is;
577: MPI_Comm comm;
579: PetscFunctionBegin;
580: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
581: PetscCall(MatGetDM(A, &da));
582: PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");
584: /* Load the matrix in natural ordering */
585: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Anatural));
586: PetscCall(MatSetType(Anatural, ((PetscObject)A)->type_name));
587: PetscCall(MatGetSize(A, &M, &N));
588: PetscCall(MatGetLocalSize(A, &m, &n));
589: PetscCall(MatSetSizes(Anatural, m, n, M, N));
590: PetscCall(MatLoad(Anatural, viewer));
592: /* Map natural ordering to application ordering and create IS */
593: PetscCall(DMDAGetAO(da, &ao));
594: PetscCall(MatGetOwnershipRange(Anatural, &rstart, &rend));
595: PetscCall(PetscMalloc1(rend - rstart, &app));
596: for (i = rstart; i < rend; i++) app[i - rstart] = i;
597: PetscCall(AOPetscToApplication(ao, rend - rstart, app));
598: PetscCall(ISCreateGeneral(comm, rend - rstart, app, PETSC_OWN_POINTER, &is));
600: /* Do permutation and replace header */
601: PetscCall(MatCreateSubMatrix(Anatural, is, is, MAT_INITIAL_MATRIX, &Aapp));
602: PetscCall(MatHeaderReplace(A, &Aapp));
603: PetscCall(ISDestroy(&is));
604: PetscCall(MatDestroy(&Anatural));
605: PetscFunctionReturn(PETSC_SUCCESS);
606: }
608: PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
609: {
610: PetscInt dim, dof, nx, ny, nz, dims[3], starts[3], M, N, P;
611: Mat A;
612: MPI_Comm comm;
613: MatType Atype;
614: MatType mtype;
615: PetscMPIInt size;
616: DM_DA *dd = (DM_DA *)da->data;
617: void (*aij)(void) = NULL, (*baij)(void) = NULL, (*sbaij)(void) = NULL, (*sell)(void) = NULL, (*is)(void) = NULL;
619: PetscFunctionBegin;
620: PetscCall(MatInitializePackage());
621: mtype = da->mattype;
623: /*
624: m
625: ------------------------------------------------------
626: | |
627: | |
628: | ---------------------- |
629: | | | |
630: n | ny | | |
631: | | | |
632: | .--------------------- |
633: | (xs,ys) nx |
634: | . |
635: | (gxs,gys) |
636: | |
637: -----------------------------------------------------
638: */
640: /*
641: nc - number of components per grid point
642: col - number of colors needed in one direction for single component problem
643: */
644: M = dd->M;
645: N = dd->N;
646: P = dd->P;
647: dim = da->dim;
648: dof = dd->w;
649: /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */
650: PetscCall(DMDAGetCorners(da, NULL, NULL, NULL, &nx, &ny, &nz));
651: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
652: PetscCall(MatCreate(comm, &A));
653: PetscCall(MatSetSizes(A, dof * nx * ny * nz, dof * nx * ny * nz, dof * M * N * P, dof * M * N * P));
654: PetscCall(MatSetType(A, mtype));
655: PetscCall(MatSetFromOptions(A));
656: if (dof * nx * ny * nz < da->bind_below) {
657: PetscCall(MatSetBindingPropagates(A, PETSC_TRUE));
658: PetscCall(MatBindToCPU(A, PETSC_TRUE));
659: }
660: PetscCall(MatSetDM(A, da));
661: if (da->structure_only) PetscCall(MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE));
662: PetscCall(MatGetType(A, &Atype));
663: /*
664: We do not provide a getmatrix function in the DMDA operations because
665: the basic DMDA does not know about matrices. We think of DMDA as being more
666: more low-level than matrices. This is kind of cheating but, cause sometimes
667: we think of DMDA has higher level than matrices.
669: We could switch based on Atype (or mtype), but we do not since the
670: specialized setting routines depend only on the particular preallocation
671: details of the matrix, not the type itself.
672: */
673: if (!da->prealloc_skip) { // Flag is likely set when user intends to use MatSetPreallocationCOO()
674: PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", &aij));
675: if (!aij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJSetPreallocation_C", &aij));
676: if (!aij) {
677: PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIBAIJSetPreallocation_C", &baij));
678: if (!baij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", &baij));
679: if (!baij) {
680: PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISBAIJSetPreallocation_C", &sbaij));
681: if (!sbaij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSBAIJSetPreallocation_C", &sbaij));
682: if (!sbaij) {
683: PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", &sell));
684: if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", &sell));
685: }
686: if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatISSetPreallocation_C", &is));
687: }
688: }
689: }
690: if (aij) {
691: if (dim == 1) {
692: if (dd->ofill) {
693: PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da, A));
694: } else {
695: DMBoundaryType bx;
696: PetscMPIInt size;
697: PetscCall(DMDAGetInfo(da, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &bx, NULL, NULL, NULL));
698: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));
699: if (size == 1 && bx == DM_BOUNDARY_NONE) {
700: PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da, A));
701: } else {
702: PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da, A));
703: }
704: }
705: } else if (dim == 2) {
706: if (dd->ofill) {
707: PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da, A));
708: } else {
709: PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da, A));
710: }
711: } else if (dim == 3) {
712: if (dd->ofill) {
713: PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da, A));
714: } else {
715: PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da, A));
716: }
717: }
718: } else if (baij) {
719: if (dim == 2) {
720: PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da, A));
721: } else if (dim == 3) {
722: PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da, A));
723: } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
724: } else if (sbaij) {
725: if (dim == 2) {
726: PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da, A));
727: } else if (dim == 3) {
728: PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da, A));
729: } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
730: } else if (sell) {
731: if (dim == 2) {
732: PetscCall(DMCreateMatrix_DA_2d_MPISELL(da, A));
733: } else if (dim == 3) {
734: PetscCall(DMCreateMatrix_DA_3d_MPISELL(da, A));
735: } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
736: } else if (is) {
737: PetscCall(DMCreateMatrix_DA_IS(da, A));
738: } else { // unknown type or da->prealloc_skip so structural information may be needed, but no prealloc
739: ISLocalToGlobalMapping ltog;
741: PetscCall(MatSetBlockSize(A, dof));
742: PetscCall(MatSetUp(A));
743: PetscCall(DMGetLocalToGlobalMapping(da, <og));
744: PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog));
745: }
746: PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2]));
747: PetscCall(MatSetStencil(A, dim, dims, starts, dof));
748: PetscCall(MatSetDM(A, da));
749: PetscCallMPI(MPI_Comm_size(comm, &size));
750: if (size > 1) {
751: /* change viewer to display matrix in natural ordering */
752: PetscCall(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA));
753: PetscCall(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA));
754: }
755: *J = A;
756: PetscFunctionReturn(PETSC_SUCCESS);
757: }
759: PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]);
761: PetscErrorCode DMCreateMatrix_DA_IS(DM dm, Mat J)
762: {
763: DM_DA *da = (DM_DA *)dm->data;
764: Mat lJ, P;
765: ISLocalToGlobalMapping ltog;
766: IS is;
767: PetscBT bt;
768: const PetscInt *e_loc, *idx;
769: PetscInt i, nel, nen, nv, dof, *gidx, n, N;
771: /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
772: We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */
773: PetscFunctionBegin;
774: dof = da->w;
775: PetscCall(MatSetBlockSize(J, dof));
776: PetscCall(DMGetLocalToGlobalMapping(dm, <og));
778: /* flag local elements indices in local DMDA numbering */
779: PetscCall(ISLocalToGlobalMappingGetSize(ltog, &nv));
780: PetscCall(PetscBTCreate(nv / dof, &bt));
781: PetscCall(DMDAGetElements(dm, &nel, &nen, &e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
782: for (i = 0; i < nel * nen; i++) PetscCall(PetscBTSet(bt, e_loc[i]));
784: /* filter out (set to -1) the global indices not used by the local elements */
785: PetscCall(PetscMalloc1(nv / dof, &gidx));
786: PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog, &idx));
787: PetscCall(PetscArraycpy(gidx, idx, nv / dof));
788: PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog, &idx));
789: for (i = 0; i < nv / dof; i++)
790: if (!PetscBTLookup(bt, i)) gidx[i] = -1;
791: PetscCall(PetscBTDestroy(&bt));
792: PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm), dof, nv / dof, gidx, PETSC_OWN_POINTER, &is));
793: PetscCall(ISLocalToGlobalMappingCreateIS(is, <og));
794: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
795: PetscCall(ISLocalToGlobalMappingDestroy(<og));
796: PetscCall(ISDestroy(&is));
798: /* Preallocation */
799: if (dm->prealloc_skip) {
800: PetscCall(MatSetUp(J));
801: } else {
802: PetscCall(MatISGetLocalMat(J, &lJ));
803: PetscCall(MatGetLocalToGlobalMapping(lJ, <og, NULL));
804: PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ), &P));
805: PetscCall(MatSetType(P, MATPREALLOCATOR));
806: PetscCall(MatSetLocalToGlobalMapping(P, ltog, ltog));
807: PetscCall(MatGetSize(lJ, &N, NULL));
808: PetscCall(MatGetLocalSize(lJ, &n, NULL));
809: PetscCall(MatSetSizes(P, n, n, N, N));
810: PetscCall(MatSetBlockSize(P, dof));
811: PetscCall(MatSetUp(P));
812: for (i = 0; i < nel; i++) PetscCall(MatSetValuesBlockedLocal(P, nen, e_loc + i * nen, nen, e_loc + i * nen, NULL, INSERT_VALUES));
813: PetscCall(MatPreallocatorPreallocate(P, (PetscBool)!da->prealloc_only, lJ));
814: PetscCall(MatISRestoreLocalMat(J, &lJ));
815: PetscCall(DMDARestoreElements(dm, &nel, &nen, &e_loc));
816: PetscCall(MatDestroy(&P));
818: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
819: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
820: }
821: PetscFunctionReturn(PETSC_SUCCESS);
822: }
824: PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da, Mat J)
825: {
826: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p;
827: PetscInt lstart, lend, pstart, pend, *dnz, *onz;
828: MPI_Comm comm;
829: PetscScalar *values;
830: DMBoundaryType bx, by;
831: ISLocalToGlobalMapping ltog;
832: DMDAStencilType st;
834: PetscFunctionBegin;
835: /*
836: nc - number of components per grid point
837: col - number of colors needed in one direction for single component problem
838: */
839: PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
840: col = 2 * s + 1;
841: PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
842: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
843: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
845: PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
846: PetscCall(DMGetLocalToGlobalMapping(da, <og));
848: PetscCall(MatSetBlockSize(J, nc));
849: /* determine the matrix preallocation information */
850: MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
851: for (i = xs; i < xs + nx; i++) {
852: pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
853: pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
855: for (j = ys; j < ys + ny; j++) {
856: slot = i - gxs + gnx * (j - gys);
858: lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
859: lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
861: cnt = 0;
862: for (k = 0; k < nc; k++) {
863: for (l = lstart; l < lend + 1; l++) {
864: for (p = pstart; p < pend + 1; p++) {
865: if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
866: cols[cnt++] = k + nc * (slot + gnx * l + p);
867: }
868: }
869: }
870: rows[k] = k + nc * (slot);
871: }
872: PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
873: }
874: }
875: PetscCall(MatSetBlockSize(J, nc));
876: PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
877: PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
878: MatPreallocateEnd(dnz, onz);
880: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
882: /*
883: For each node in the grid: we get the neighbors in the local (on processor ordering
884: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
885: PETSc ordering.
886: */
887: if (!da->prealloc_only) {
888: PetscCall(PetscCalloc1(col * col * nc * nc, &values));
889: for (i = xs; i < xs + nx; i++) {
890: pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
891: pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
893: for (j = ys; j < ys + ny; j++) {
894: slot = i - gxs + gnx * (j - gys);
896: lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
897: lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
899: cnt = 0;
900: for (k = 0; k < nc; k++) {
901: for (l = lstart; l < lend + 1; l++) {
902: for (p = pstart; p < pend + 1; p++) {
903: if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
904: cols[cnt++] = k + nc * (slot + gnx * l + p);
905: }
906: }
907: }
908: rows[k] = k + nc * (slot);
909: }
910: PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
911: }
912: }
913: PetscCall(PetscFree(values));
914: /* do not copy values to GPU since they are all zero and not yet needed there */
915: PetscCall(MatBindToCPU(J, PETSC_TRUE));
916: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
917: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
918: PetscCall(MatBindToCPU(J, PETSC_FALSE));
919: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
920: }
921: PetscCall(PetscFree2(rows, cols));
922: PetscFunctionReturn(PETSC_SUCCESS);
923: }
925: PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da, Mat J)
926: {
927: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
928: PetscInt m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
929: PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
930: MPI_Comm comm;
931: PetscScalar *values;
932: DMBoundaryType bx, by, bz;
933: ISLocalToGlobalMapping ltog;
934: DMDAStencilType st;
936: PetscFunctionBegin;
937: /*
938: nc - number of components per grid point
939: col - number of colors needed in one direction for single component problem
940: */
941: PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
942: col = 2 * s + 1;
943: PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
944: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
945: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
947: PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
948: PetscCall(DMGetLocalToGlobalMapping(da, <og));
950: PetscCall(MatSetBlockSize(J, nc));
951: /* determine the matrix preallocation information */
952: MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
953: for (i = xs; i < xs + nx; i++) {
954: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
955: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
956: for (j = ys; j < ys + ny; j++) {
957: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
958: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
959: for (k = zs; k < zs + nz; k++) {
960: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
961: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
963: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
965: cnt = 0;
966: for (l = 0; l < nc; l++) {
967: for (ii = istart; ii < iend + 1; ii++) {
968: for (jj = jstart; jj < jend + 1; jj++) {
969: for (kk = kstart; kk < kend + 1; kk++) {
970: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
971: cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
972: }
973: }
974: }
975: }
976: rows[l] = l + nc * (slot);
977: }
978: PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
979: }
980: }
981: }
982: PetscCall(MatSetBlockSize(J, nc));
983: PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
984: PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
985: MatPreallocateEnd(dnz, onz);
986: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
988: /*
989: For each node in the grid: we get the neighbors in the local (on processor ordering
990: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
991: PETSc ordering.
992: */
993: if (!da->prealloc_only) {
994: PetscCall(PetscCalloc1(col * col * col * nc * nc * nc, &values));
995: for (i = xs; i < xs + nx; i++) {
996: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
997: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
998: for (j = ys; j < ys + ny; j++) {
999: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1000: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1001: for (k = zs; k < zs + nz; k++) {
1002: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1003: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1005: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1007: cnt = 0;
1008: for (l = 0; l < nc; l++) {
1009: for (ii = istart; ii < iend + 1; ii++) {
1010: for (jj = jstart; jj < jend + 1; jj++) {
1011: for (kk = kstart; kk < kend + 1; kk++) {
1012: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1013: cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1014: }
1015: }
1016: }
1017: }
1018: rows[l] = l + nc * (slot);
1019: }
1020: PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
1021: }
1022: }
1023: }
1024: PetscCall(PetscFree(values));
1025: /* do not copy values to GPU since they are all zero and not yet needed there */
1026: PetscCall(MatBindToCPU(J, PETSC_TRUE));
1027: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1028: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1029: PetscCall(MatBindToCPU(J, PETSC_FALSE));
1030: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1031: }
1032: PetscCall(PetscFree2(rows, cols));
1033: PetscFunctionReturn(PETSC_SUCCESS);
1034: }
1036: PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da, Mat J)
1037: {
1038: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, M, N;
1039: PetscInt lstart, lend, pstart, pend, *dnz, *onz;
1040: MPI_Comm comm;
1041: DMBoundaryType bx, by;
1042: ISLocalToGlobalMapping ltog, mltog;
1043: DMDAStencilType st;
1044: PetscBool removedups = PETSC_FALSE, alreadyboundtocpu = PETSC_TRUE;
1046: PetscFunctionBegin;
1047: /*
1048: nc - number of components per grid point
1049: col - number of colors needed in one direction for single component problem
1050: */
1051: PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1052: if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1053: col = 2 * s + 1;
1054: /*
1055: With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1056: because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1057: */
1058: if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1059: if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1060: PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1061: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1062: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1064: PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
1065: PetscCall(DMGetLocalToGlobalMapping(da, <og));
1067: PetscCall(MatSetBlockSize(J, nc));
1068: /* determine the matrix preallocation information */
1069: MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1070: for (i = xs; i < xs + nx; i++) {
1071: pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1072: pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1074: for (j = ys; j < ys + ny; j++) {
1075: slot = i - gxs + gnx * (j - gys);
1077: lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1078: lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1080: cnt = 0;
1081: for (k = 0; k < nc; k++) {
1082: for (l = lstart; l < lend + 1; l++) {
1083: for (p = pstart; p < pend + 1; p++) {
1084: if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1085: cols[cnt++] = k + nc * (slot + gnx * l + p);
1086: }
1087: }
1088: }
1089: rows[k] = k + nc * (slot);
1090: }
1091: if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1092: else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1093: }
1094: }
1095: PetscCall(MatSetBlockSize(J, nc));
1096: PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1097: PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1098: MatPreallocateEnd(dnz, onz);
1099: PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1100: if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1102: /*
1103: For each node in the grid: we get the neighbors in the local (on processor ordering
1104: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1105: PETSc ordering.
1106: */
1107: if (!da->prealloc_only) {
1108: for (i = xs; i < xs + nx; i++) {
1109: pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1110: pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1112: for (j = ys; j < ys + ny; j++) {
1113: slot = i - gxs + gnx * (j - gys);
1115: lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1116: lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1118: cnt = 0;
1119: for (l = lstart; l < lend + 1; l++) {
1120: for (p = pstart; p < pend + 1; p++) {
1121: if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1122: cols[cnt++] = nc * (slot + gnx * l + p);
1123: for (k = 1; k < nc; k++) {
1124: cols[cnt] = 1 + cols[cnt - 1];
1125: cnt++;
1126: }
1127: }
1128: }
1129: }
1130: for (k = 0; k < nc; k++) rows[k] = k + nc * (slot);
1131: PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1132: }
1133: }
1134: /* do not copy values to GPU since they are all zero and not yet needed there */
1135: PetscCall(MatBoundToCPU(J, &alreadyboundtocpu));
1136: PetscCall(MatBindToCPU(J, PETSC_TRUE));
1137: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1138: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1139: if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J, PETSC_FALSE));
1140: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1141: if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1142: }
1143: PetscCall(PetscFree2(rows, cols));
1144: PetscFunctionReturn(PETSC_SUCCESS);
1145: }
1147: PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da, Mat J)
1148: {
1149: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1150: PetscInt m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, M, N;
1151: PetscInt lstart, lend, pstart, pend, *dnz, *onz;
1152: DM_DA *dd = (DM_DA *)da->data;
1153: PetscInt ifill_col, *ofill = dd->ofill, *dfill = dd->dfill;
1154: MPI_Comm comm;
1155: DMBoundaryType bx, by;
1156: ISLocalToGlobalMapping ltog;
1157: DMDAStencilType st;
1158: PetscBool removedups = PETSC_FALSE;
1160: PetscFunctionBegin;
1161: /*
1162: nc - number of components per grid point
1163: col - number of colors needed in one direction for single component problem
1164: */
1165: PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1166: col = 2 * s + 1;
1167: /*
1168: With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1169: because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1170: */
1171: if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1172: if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1173: PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1174: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1175: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1177: PetscCall(PetscMalloc1(col * col * nc, &cols));
1178: PetscCall(DMGetLocalToGlobalMapping(da, <og));
1180: PetscCall(MatSetBlockSize(J, nc));
1181: /* determine the matrix preallocation information */
1182: MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1183: for (i = xs; i < xs + nx; i++) {
1184: pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1185: pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1187: for (j = ys; j < ys + ny; j++) {
1188: slot = i - gxs + gnx * (j - gys);
1190: lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1191: lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1193: for (k = 0; k < nc; k++) {
1194: cnt = 0;
1195: for (l = lstart; l < lend + 1; l++) {
1196: for (p = pstart; p < pend + 1; p++) {
1197: if (l || p) {
1198: if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1199: for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1200: }
1201: } else {
1202: if (dfill) {
1203: for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1204: } else {
1205: for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1206: }
1207: }
1208: }
1209: }
1210: row = k + nc * (slot);
1211: maxcnt = PetscMax(maxcnt, cnt);
1212: if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1213: else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1214: }
1215: }
1216: }
1217: PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1218: PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1219: MatPreallocateEnd(dnz, onz);
1220: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1222: /*
1223: For each node in the grid: we get the neighbors in the local (on processor ordering
1224: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1225: PETSc ordering.
1226: */
1227: if (!da->prealloc_only) {
1228: for (i = xs; i < xs + nx; i++) {
1229: pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1230: pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1232: for (j = ys; j < ys + ny; j++) {
1233: slot = i - gxs + gnx * (j - gys);
1235: lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1236: lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1238: for (k = 0; k < nc; k++) {
1239: cnt = 0;
1240: for (l = lstart; l < lend + 1; l++) {
1241: for (p = pstart; p < pend + 1; p++) {
1242: if (l || p) {
1243: if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1244: for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1245: }
1246: } else {
1247: if (dfill) {
1248: for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1249: } else {
1250: for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1251: }
1252: }
1253: }
1254: }
1255: row = k + nc * (slot);
1256: PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1257: }
1258: }
1259: }
1260: /* do not copy values to GPU since they are all zero and not yet needed there */
1261: PetscCall(MatBindToCPU(J, PETSC_TRUE));
1262: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1263: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1264: PetscCall(MatBindToCPU(J, PETSC_FALSE));
1265: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1266: }
1267: PetscCall(PetscFree(cols));
1268: PetscFunctionReturn(PETSC_SUCCESS);
1269: }
1271: PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da, Mat J)
1272: {
1273: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1274: PetscInt m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
1275: PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
1276: MPI_Comm comm;
1277: DMBoundaryType bx, by, bz;
1278: ISLocalToGlobalMapping ltog, mltog;
1279: DMDAStencilType st;
1280: PetscBool removedups = PETSC_FALSE;
1282: PetscFunctionBegin;
1283: /*
1284: nc - number of components per grid point
1285: col - number of colors needed in one direction for single component problem
1286: */
1287: PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
1288: if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1289: col = 2 * s + 1;
1291: /*
1292: With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1293: because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1294: */
1295: if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1296: if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1297: if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;
1299: PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1300: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1301: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1303: PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
1304: PetscCall(DMGetLocalToGlobalMapping(da, <og));
1306: PetscCall(MatSetBlockSize(J, nc));
1307: /* determine the matrix preallocation information */
1308: MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
1309: for (i = xs; i < xs + nx; i++) {
1310: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1311: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1312: for (j = ys; j < ys + ny; j++) {
1313: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1314: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1315: for (k = zs; k < zs + nz; k++) {
1316: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1317: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1319: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1321: cnt = 0;
1322: for (l = 0; l < nc; l++) {
1323: for (ii = istart; ii < iend + 1; ii++) {
1324: for (jj = jstart; jj < jend + 1; jj++) {
1325: for (kk = kstart; kk < kend + 1; kk++) {
1326: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1327: cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1328: }
1329: }
1330: }
1331: }
1332: rows[l] = l + nc * (slot);
1333: }
1334: if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1335: else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1336: }
1337: }
1338: }
1339: PetscCall(MatSetBlockSize(J, nc));
1340: PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1341: PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1342: MatPreallocateEnd(dnz, onz);
1343: PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1344: if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1346: /*
1347: For each node in the grid: we get the neighbors in the local (on processor ordering
1348: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1349: PETSc ordering.
1350: */
1351: if (!da->prealloc_only) {
1352: for (i = xs; i < xs + nx; i++) {
1353: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1354: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1355: for (j = ys; j < ys + ny; j++) {
1356: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1357: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1358: for (k = zs; k < zs + nz; k++) {
1359: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1360: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1362: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1364: cnt = 0;
1365: for (kk = kstart; kk < kend + 1; kk++) {
1366: for (jj = jstart; jj < jend + 1; jj++) {
1367: for (ii = istart; ii < iend + 1; ii++) {
1368: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1369: cols[cnt++] = nc * (slot + ii + gnx * jj + gnx * gny * kk);
1370: for (l = 1; l < nc; l++) {
1371: cols[cnt] = 1 + cols[cnt - 1];
1372: cnt++;
1373: }
1374: }
1375: }
1376: }
1377: }
1378: rows[0] = nc * (slot);
1379: for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1380: PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1381: }
1382: }
1383: }
1384: /* do not copy values to GPU since they are all zero and not yet needed there */
1385: PetscCall(MatBindToCPU(J, PETSC_TRUE));
1386: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1387: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1388: if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1389: PetscCall(MatBindToCPU(J, PETSC_FALSE));
1390: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1391: }
1392: PetscCall(PetscFree2(rows, cols));
1393: PetscFunctionReturn(PETSC_SUCCESS);
1394: }
1396: PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da, Mat J)
1397: {
1398: DM_DA *dd = (DM_DA *)da->data;
1399: PetscInt xs, nx, i, j, gxs, gnx, row, k, l;
1400: PetscInt m, dim, s, *cols = NULL, nc, cnt, maxcnt = 0, *ocols;
1401: PetscInt *ofill = dd->ofill, *dfill = dd->dfill;
1402: DMBoundaryType bx;
1403: ISLocalToGlobalMapping ltog;
1404: PetscMPIInt rank, size;
1406: PetscFunctionBegin;
1407: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da), &rank));
1408: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));
1410: /*
1411: nc - number of components per grid point
1412: */
1413: PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1414: PetscCheck(s <= 1, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Matrix creation for 1d not implemented correctly for stencil width larger than 1");
1415: PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1416: PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1418: PetscCall(MatSetBlockSize(J, nc));
1419: PetscCall(PetscCalloc2(nx * nc, &cols, nx * nc, &ocols));
1421: /*
1422: note should be smaller for first and last process with no periodic
1423: does not handle dfill
1424: */
1425: cnt = 0;
1426: /* coupling with process to the left */
1427: for (i = 0; i < s; i++) {
1428: for (j = 0; j < nc; j++) {
1429: ocols[cnt] = ((rank == 0) ? 0 : (s - i) * (ofill[j + 1] - ofill[j]));
1430: cols[cnt] = dfill[j + 1] - dfill[j] + (s + i) * (ofill[j + 1] - ofill[j]);
1431: if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1432: if (size > 1) ocols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1433: else cols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1434: }
1435: maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1436: cnt++;
1437: }
1438: }
1439: for (i = s; i < nx - s; i++) {
1440: for (j = 0; j < nc; j++) {
1441: cols[cnt] = dfill[j + 1] - dfill[j] + 2 * s * (ofill[j + 1] - ofill[j]);
1442: maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1443: cnt++;
1444: }
1445: }
1446: /* coupling with process to the right */
1447: for (i = nx - s; i < nx; i++) {
1448: for (j = 0; j < nc; j++) {
1449: ocols[cnt] = ((rank == (size - 1)) ? 0 : (i - nx + s + 1) * (ofill[j + 1] - ofill[j]));
1450: cols[cnt] = dfill[j + 1] - dfill[j] + (s + nx - i - 1) * (ofill[j + 1] - ofill[j]);
1451: if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1452: if (size > 1) ocols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1453: else cols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1454: }
1455: maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1456: cnt++;
1457: }
1458: }
1460: PetscCall(MatSeqAIJSetPreallocation(J, 0, cols));
1461: PetscCall(MatMPIAIJSetPreallocation(J, 0, cols, 0, ocols));
1462: PetscCall(PetscFree2(cols, ocols));
1464: PetscCall(DMGetLocalToGlobalMapping(da, <og));
1465: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1467: /*
1468: For each node in the grid: we get the neighbors in the local (on processor ordering
1469: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1470: PETSc ordering.
1471: */
1472: if (!da->prealloc_only) {
1473: PetscCall(PetscMalloc1(maxcnt, &cols));
1474: row = xs * nc;
1475: /* coupling with process to the left */
1476: for (i = xs; i < xs + s; i++) {
1477: for (j = 0; j < nc; j++) {
1478: cnt = 0;
1479: if (rank) {
1480: for (l = 0; l < s; l++) {
1481: for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1482: }
1483: }
1484: if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1485: for (l = 0; l < s; l++) {
1486: for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (m + i - s - l) * nc + ofill[k];
1487: }
1488: }
1489: if (dfill) {
1490: for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1491: } else {
1492: for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1493: }
1494: for (l = 0; l < s; l++) {
1495: for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1496: }
1497: PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1498: row++;
1499: }
1500: }
1501: for (i = xs + s; i < xs + nx - s; i++) {
1502: for (j = 0; j < nc; j++) {
1503: cnt = 0;
1504: for (l = 0; l < s; l++) {
1505: for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1506: }
1507: if (dfill) {
1508: for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1509: } else {
1510: for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1511: }
1512: for (l = 0; l < s; l++) {
1513: for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1514: }
1515: PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1516: row++;
1517: }
1518: }
1519: /* coupling with process to the right */
1520: for (i = xs + nx - s; i < xs + nx; i++) {
1521: for (j = 0; j < nc; j++) {
1522: cnt = 0;
1523: for (l = 0; l < s; l++) {
1524: for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1525: }
1526: if (dfill) {
1527: for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1528: } else {
1529: for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1530: }
1531: if (rank < size - 1) {
1532: for (l = 0; l < s; l++) {
1533: for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1534: }
1535: }
1536: if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1537: for (l = 0; l < s; l++) {
1538: for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s - l - m + 2) * nc + ofill[k];
1539: }
1540: }
1541: PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1542: row++;
1543: }
1544: }
1545: PetscCall(PetscFree(cols));
1546: /* do not copy values to GPU since they are all zero and not yet needed there */
1547: PetscCall(MatBindToCPU(J, PETSC_TRUE));
1548: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1549: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1550: PetscCall(MatBindToCPU(J, PETSC_FALSE));
1551: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1552: }
1553: PetscFunctionReturn(PETSC_SUCCESS);
1554: }
1556: PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da, Mat J)
1557: {
1558: PetscInt xs, nx, i, i1, slot, gxs, gnx;
1559: PetscInt m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1560: PetscInt istart, iend;
1561: DMBoundaryType bx;
1562: ISLocalToGlobalMapping ltog, mltog;
1564: PetscFunctionBegin;
1565: /*
1566: nc - number of components per grid point
1567: col - number of colors needed in one direction for single component problem
1568: */
1569: PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1570: if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1571: col = 2 * s + 1;
1573: PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1574: PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1576: PetscCall(MatSetBlockSize(J, nc));
1577: PetscCall(MatSeqAIJSetPreallocation(J, col * nc, NULL));
1578: PetscCall(MatMPIAIJSetPreallocation(J, col * nc, NULL, col * nc, NULL));
1580: PetscCall(DMGetLocalToGlobalMapping(da, <og));
1581: PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1582: if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1584: /*
1585: For each node in the grid: we get the neighbors in the local (on processor ordering
1586: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1587: PETSc ordering.
1588: */
1589: if (!da->prealloc_only) {
1590: PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1591: for (i = xs; i < xs + nx; i++) {
1592: istart = PetscMax(-s, gxs - i);
1593: iend = PetscMin(s, gxs + gnx - i - 1);
1594: slot = i - gxs;
1596: cnt = 0;
1597: for (i1 = istart; i1 < iend + 1; i1++) {
1598: cols[cnt++] = nc * (slot + i1);
1599: for (l = 1; l < nc; l++) {
1600: cols[cnt] = 1 + cols[cnt - 1];
1601: cnt++;
1602: }
1603: }
1604: rows[0] = nc * (slot);
1605: for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1606: PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1607: }
1608: /* do not copy values to GPU since they are all zero and not yet needed there */
1609: PetscCall(MatBindToCPU(J, PETSC_TRUE));
1610: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1611: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1612: if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1613: PetscCall(MatBindToCPU(J, PETSC_FALSE));
1614: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1615: PetscCall(PetscFree2(rows, cols));
1616: }
1617: PetscFunctionReturn(PETSC_SUCCESS);
1618: }
1620: PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da, Mat J)
1621: {
1622: PetscInt xs, nx, i, i1, slot, gxs, gnx;
1623: PetscInt m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1624: PetscInt istart, iend;
1625: DMBoundaryType bx;
1626: ISLocalToGlobalMapping ltog, mltog;
1628: PetscFunctionBegin;
1629: /*
1630: nc - number of components per grid point
1631: col - number of colors needed in one direction for single component problem
1632: */
1633: PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1634: col = 2 * s + 1;
1636: PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1637: PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1639: PetscCall(MatSetBlockSize(J, nc));
1640: PetscCall(MatSeqAIJSetTotalPreallocation(J, nx * nc * col * nc));
1642: PetscCall(DMGetLocalToGlobalMapping(da, <og));
1643: PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1644: if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1646: /*
1647: For each node in the grid: we get the neighbors in the local (on processor ordering
1648: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1649: PETSc ordering.
1650: */
1651: if (!da->prealloc_only) {
1652: PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1653: for (i = xs; i < xs + nx; i++) {
1654: istart = PetscMax(-s, gxs - i);
1655: iend = PetscMin(s, gxs + gnx - i - 1);
1656: slot = i - gxs;
1658: cnt = 0;
1659: for (i1 = istart; i1 < iend + 1; i1++) {
1660: cols[cnt++] = nc * (slot + i1);
1661: for (l = 1; l < nc; l++) {
1662: cols[cnt] = 1 + cols[cnt - 1];
1663: cnt++;
1664: }
1665: }
1666: rows[0] = nc * (slot);
1667: for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1668: PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1669: }
1670: /* do not copy values to GPU since they are all zero and not yet needed there */
1671: PetscCall(MatBindToCPU(J, PETSC_TRUE));
1672: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1673: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1674: if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1675: PetscCall(MatBindToCPU(J, PETSC_FALSE));
1676: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1677: PetscCall(PetscFree2(rows, cols));
1678: }
1679: PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1680: PetscFunctionReturn(PETSC_SUCCESS);
1681: }
1683: PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da, Mat J)
1684: {
1685: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1686: PetscInt m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1687: PetscInt istart, iend, jstart, jend, ii, jj;
1688: MPI_Comm comm;
1689: PetscScalar *values;
1690: DMBoundaryType bx, by;
1691: DMDAStencilType st;
1692: ISLocalToGlobalMapping ltog;
1694: PetscFunctionBegin;
1695: /*
1696: nc - number of components per grid point
1697: col - number of colors needed in one direction for single component problem
1698: */
1699: PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1700: col = 2 * s + 1;
1702: PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1703: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1704: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1706: PetscCall(PetscMalloc1(col * col * nc * nc, &cols));
1708: PetscCall(DMGetLocalToGlobalMapping(da, <og));
1710: /* determine the matrix preallocation information */
1711: MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1712: for (i = xs; i < xs + nx; i++) {
1713: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1714: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1715: for (j = ys; j < ys + ny; j++) {
1716: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1717: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1718: slot = i - gxs + gnx * (j - gys);
1720: /* Find block columns in block row */
1721: cnt = 0;
1722: for (ii = istart; ii < iend + 1; ii++) {
1723: for (jj = jstart; jj < jend + 1; jj++) {
1724: if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1725: cols[cnt++] = slot + ii + gnx * jj;
1726: }
1727: }
1728: }
1729: PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1730: }
1731: }
1732: PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1733: PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1734: MatPreallocateEnd(dnz, onz);
1736: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1738: /*
1739: For each node in the grid: we get the neighbors in the local (on processor ordering
1740: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1741: PETSc ordering.
1742: */
1743: if (!da->prealloc_only) {
1744: PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1745: for (i = xs; i < xs + nx; i++) {
1746: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1747: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1748: for (j = ys; j < ys + ny; j++) {
1749: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1750: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1751: slot = i - gxs + gnx * (j - gys);
1752: cnt = 0;
1753: for (ii = istart; ii < iend + 1; ii++) {
1754: for (jj = jstart; jj < jend + 1; jj++) {
1755: if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1756: cols[cnt++] = slot + ii + gnx * jj;
1757: }
1758: }
1759: }
1760: PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1761: }
1762: }
1763: PetscCall(PetscFree(values));
1764: /* do not copy values to GPU since they are all zero and not yet needed there */
1765: PetscCall(MatBindToCPU(J, PETSC_TRUE));
1766: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1767: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1768: PetscCall(MatBindToCPU(J, PETSC_FALSE));
1769: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1770: }
1771: PetscCall(PetscFree(cols));
1772: PetscFunctionReturn(PETSC_SUCCESS);
1773: }
1775: PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da, Mat J)
1776: {
1777: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1778: PetscInt m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
1779: PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
1780: MPI_Comm comm;
1781: PetscScalar *values;
1782: DMBoundaryType bx, by, bz;
1783: DMDAStencilType st;
1784: ISLocalToGlobalMapping ltog;
1786: PetscFunctionBegin;
1787: /*
1788: nc - number of components per grid point
1789: col - number of colors needed in one direction for single component problem
1790: */
1791: PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
1792: col = 2 * s + 1;
1794: PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1795: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1796: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1798: PetscCall(PetscMalloc1(col * col * col, &cols));
1800: PetscCall(DMGetLocalToGlobalMapping(da, <og));
1802: /* determine the matrix preallocation information */
1803: MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
1804: for (i = xs; i < xs + nx; i++) {
1805: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1806: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1807: for (j = ys; j < ys + ny; j++) {
1808: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1809: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1810: for (k = zs; k < zs + nz; k++) {
1811: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1812: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1814: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1816: /* Find block columns in block row */
1817: cnt = 0;
1818: for (ii = istart; ii < iend + 1; ii++) {
1819: for (jj = jstart; jj < jend + 1; jj++) {
1820: for (kk = kstart; kk < kend + 1; kk++) {
1821: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1822: cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1823: }
1824: }
1825: }
1826: }
1827: PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1828: }
1829: }
1830: }
1831: PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1832: PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1833: MatPreallocateEnd(dnz, onz);
1835: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1837: /*
1838: For each node in the grid: we get the neighbors in the local (on processor ordering
1839: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1840: PETSc ordering.
1841: */
1842: if (!da->prealloc_only) {
1843: PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
1844: for (i = xs; i < xs + nx; i++) {
1845: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1846: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1847: for (j = ys; j < ys + ny; j++) {
1848: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1849: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1850: for (k = zs; k < zs + nz; k++) {
1851: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1852: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1854: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1856: cnt = 0;
1857: for (ii = istart; ii < iend + 1; ii++) {
1858: for (jj = jstart; jj < jend + 1; jj++) {
1859: for (kk = kstart; kk < kend + 1; kk++) {
1860: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1861: cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1862: }
1863: }
1864: }
1865: }
1866: PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1867: }
1868: }
1869: }
1870: PetscCall(PetscFree(values));
1871: /* do not copy values to GPU since they are all zero and not yet needed there */
1872: PetscCall(MatBindToCPU(J, PETSC_TRUE));
1873: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1874: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1875: PetscCall(MatBindToCPU(J, PETSC_FALSE));
1876: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1877: }
1878: PetscCall(PetscFree(cols));
1879: PetscFunctionReturn(PETSC_SUCCESS);
1880: }
1882: /*
1883: This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1884: identify in the local ordering with periodic domain.
1885: */
1886: static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog, PetscInt *row, PetscInt *cnt, PetscInt col[])
1887: {
1888: PetscInt i, n;
1890: PetscFunctionBegin;
1891: PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, 1, row, row));
1892: PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, *cnt, col, col));
1893: for (i = 0, n = 0; i < *cnt; i++) {
1894: if (col[i] >= *row) col[n++] = col[i];
1895: }
1896: *cnt = n;
1897: PetscFunctionReturn(PETSC_SUCCESS);
1898: }
1900: PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da, Mat J)
1901: {
1902: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1903: PetscInt m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1904: PetscInt istart, iend, jstart, jend, ii, jj;
1905: MPI_Comm comm;
1906: PetscScalar *values;
1907: DMBoundaryType bx, by;
1908: DMDAStencilType st;
1909: ISLocalToGlobalMapping ltog;
1911: PetscFunctionBegin;
1912: /*
1913: nc - number of components per grid point
1914: col - number of colors needed in one direction for single component problem
1915: */
1916: PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1917: col = 2 * s + 1;
1919: PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1920: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1921: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1923: PetscCall(PetscMalloc1(col * col * nc * nc, &cols));
1925: PetscCall(DMGetLocalToGlobalMapping(da, <og));
1927: /* determine the matrix preallocation information */
1928: MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1929: for (i = xs; i < xs + nx; i++) {
1930: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1931: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1932: for (j = ys; j < ys + ny; j++) {
1933: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1934: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1935: slot = i - gxs + gnx * (j - gys);
1937: /* Find block columns in block row */
1938: cnt = 0;
1939: for (ii = istart; ii < iend + 1; ii++) {
1940: for (jj = jstart; jj < jend + 1; jj++) {
1941: if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1942: }
1943: }
1944: PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
1945: PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
1946: }
1947: }
1948: PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
1949: PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1950: MatPreallocateEnd(dnz, onz);
1952: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1954: /*
1955: For each node in the grid: we get the neighbors in the local (on processor ordering
1956: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1957: PETSc ordering.
1958: */
1959: if (!da->prealloc_only) {
1960: PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1961: for (i = xs; i < xs + nx; i++) {
1962: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1963: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1964: for (j = ys; j < ys + ny; j++) {
1965: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1966: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1967: slot = i - gxs + gnx * (j - gys);
1969: /* Find block columns in block row */
1970: cnt = 0;
1971: for (ii = istart; ii < iend + 1; ii++) {
1972: for (jj = jstart; jj < jend + 1; jj++) {
1973: if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1974: }
1975: }
1976: PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
1977: PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1978: }
1979: }
1980: PetscCall(PetscFree(values));
1981: /* do not copy values to GPU since they are all zero and not yet needed there */
1982: PetscCall(MatBindToCPU(J, PETSC_TRUE));
1983: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1984: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1985: PetscCall(MatBindToCPU(J, PETSC_FALSE));
1986: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1987: }
1988: PetscCall(PetscFree(cols));
1989: PetscFunctionReturn(PETSC_SUCCESS);
1990: }
1992: PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da, Mat J)
1993: {
1994: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1995: PetscInt m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
1996: PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
1997: MPI_Comm comm;
1998: PetscScalar *values;
1999: DMBoundaryType bx, by, bz;
2000: DMDAStencilType st;
2001: ISLocalToGlobalMapping ltog;
2003: PetscFunctionBegin;
2004: /*
2005: nc - number of components per grid point
2006: col - number of colors needed in one direction for single component problem
2007: */
2008: PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
2009: col = 2 * s + 1;
2011: PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2012: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2013: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
2015: /* create the matrix */
2016: PetscCall(PetscMalloc1(col * col * col, &cols));
2018: PetscCall(DMGetLocalToGlobalMapping(da, <og));
2020: /* determine the matrix preallocation information */
2021: MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
2022: for (i = xs; i < xs + nx; i++) {
2023: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2024: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2025: for (j = ys; j < ys + ny; j++) {
2026: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2027: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2028: for (k = zs; k < zs + nz; k++) {
2029: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2030: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2032: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2034: /* Find block columns in block row */
2035: cnt = 0;
2036: for (ii = istart; ii < iend + 1; ii++) {
2037: for (jj = jstart; jj < jend + 1; jj++) {
2038: for (kk = kstart; kk < kend + 1; kk++) {
2039: if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2040: }
2041: }
2042: }
2043: PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2044: PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
2045: }
2046: }
2047: }
2048: PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
2049: PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
2050: MatPreallocateEnd(dnz, onz);
2052: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
2054: /*
2055: For each node in the grid: we get the neighbors in the local (on processor ordering
2056: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2057: PETSc ordering.
2058: */
2059: if (!da->prealloc_only) {
2060: PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
2061: for (i = xs; i < xs + nx; i++) {
2062: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2063: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2064: for (j = ys; j < ys + ny; j++) {
2065: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2066: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2067: for (k = zs; k < zs + nz; k++) {
2068: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2069: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2071: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2073: cnt = 0;
2074: for (ii = istart; ii < iend + 1; ii++) {
2075: for (jj = jstart; jj < jend + 1; jj++) {
2076: for (kk = kstart; kk < kend + 1; kk++) {
2077: if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2078: }
2079: }
2080: }
2081: PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2082: PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
2083: }
2084: }
2085: }
2086: PetscCall(PetscFree(values));
2087: /* do not copy values to GPU since they are all zero and not yet needed there */
2088: PetscCall(MatBindToCPU(J, PETSC_TRUE));
2089: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2090: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2091: PetscCall(MatBindToCPU(J, PETSC_FALSE));
2092: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2093: }
2094: PetscCall(PetscFree(cols));
2095: PetscFunctionReturn(PETSC_SUCCESS);
2096: }
2098: PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da, Mat J)
2099: {
2100: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2101: PetscInt m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, *dnz, *onz;
2102: PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
2103: DM_DA *dd = (DM_DA *)da->data;
2104: PetscInt ifill_col, *dfill = dd->dfill, *ofill = dd->ofill;
2105: MPI_Comm comm;
2106: PetscScalar *values;
2107: DMBoundaryType bx, by, bz;
2108: ISLocalToGlobalMapping ltog;
2109: DMDAStencilType st;
2110: PetscBool removedups = PETSC_FALSE;
2112: PetscFunctionBegin;
2113: /*
2114: nc - number of components per grid point
2115: col - number of colors needed in one direction for single component problem
2116: */
2117: PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
2118: col = 2 * s + 1;
2120: /*
2121: With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2122: because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2123: */
2124: if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
2125: if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
2126: if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;
2128: PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2129: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2130: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
2132: PetscCall(PetscMalloc1(col * col * col * nc, &cols));
2133: PetscCall(DMGetLocalToGlobalMapping(da, <og));
2135: /* determine the matrix preallocation information */
2136: MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
2138: PetscCall(MatSetBlockSize(J, nc));
2139: for (i = xs; i < xs + nx; i++) {
2140: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2141: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2142: for (j = ys; j < ys + ny; j++) {
2143: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2144: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2145: for (k = zs; k < zs + nz; k++) {
2146: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2147: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2149: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2151: for (l = 0; l < nc; l++) {
2152: cnt = 0;
2153: for (ii = istart; ii < iend + 1; ii++) {
2154: for (jj = jstart; jj < jend + 1; jj++) {
2155: for (kk = kstart; kk < kend + 1; kk++) {
2156: if (ii || jj || kk) {
2157: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2158: for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2159: }
2160: } else {
2161: if (dfill) {
2162: for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2163: } else {
2164: for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2165: }
2166: }
2167: }
2168: }
2169: }
2170: row = l + nc * (slot);
2171: maxcnt = PetscMax(maxcnt, cnt);
2172: if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2173: else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2174: }
2175: }
2176: }
2177: }
2178: PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
2179: PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
2180: MatPreallocateEnd(dnz, onz);
2181: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
2183: /*
2184: For each node in the grid: we get the neighbors in the local (on processor ordering
2185: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2186: PETSc ordering.
2187: */
2188: if (!da->prealloc_only) {
2189: PetscCall(PetscCalloc1(maxcnt, &values));
2190: for (i = xs; i < xs + nx; i++) {
2191: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2192: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2193: for (j = ys; j < ys + ny; j++) {
2194: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2195: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2196: for (k = zs; k < zs + nz; k++) {
2197: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2198: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2200: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2202: for (l = 0; l < nc; l++) {
2203: cnt = 0;
2204: for (ii = istart; ii < iend + 1; ii++) {
2205: for (jj = jstart; jj < jend + 1; jj++) {
2206: for (kk = kstart; kk < kend + 1; kk++) {
2207: if (ii || jj || kk) {
2208: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2209: for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2210: }
2211: } else {
2212: if (dfill) {
2213: for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2214: } else {
2215: for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2216: }
2217: }
2218: }
2219: }
2220: }
2221: row = l + nc * (slot);
2222: PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, values, INSERT_VALUES));
2223: }
2224: }
2225: }
2226: }
2227: PetscCall(PetscFree(values));
2228: /* do not copy values to GPU since they are all zero and not yet needed there */
2229: PetscCall(MatBindToCPU(J, PETSC_TRUE));
2230: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2231: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2232: PetscCall(MatBindToCPU(J, PETSC_FALSE));
2233: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2234: }
2235: PetscCall(PetscFree(cols));
2236: PetscFunctionReturn(PETSC_SUCCESS);
2237: }