Actual source code: fdda.c
petsc-3.12.5 2020-03-29
2: #include <petsc/private/dmdaimpl.h>
3: #include <petscmat.h>
5: extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*);
6: extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*);
7: extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*);
8: extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*);
10: /*
11: For ghost i that may be negative or greater than the upper bound this
12: maps it into the 0:m-1 range using periodicity
13: */
14: #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i))
16: static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill)
17: {
19: PetscInt i,j,nz,*fill;
22: if (!dfill) return(0);
24: /* count number nonzeros */
25: nz = 0;
26: for (i=0; i<w; i++) {
27: for (j=0; j<w; j++) {
28: if (dfill[w*i+j]) nz++;
29: }
30: }
31: PetscMalloc1(nz + w + 1,&fill);
32: /* construct modified CSR storage of nonzero structure */
33: /* fill[0 -- w] marks starts of each row of column indices (and end of last row)
34: so fill[1] - fill[0] gives number of nonzeros in first row etc */
35: nz = w + 1;
36: for (i=0; i<w; i++) {
37: fill[i] = nz;
38: for (j=0; j<w; j++) {
39: if (dfill[w*i+j]) {
40: fill[nz] = j;
41: nz++;
42: }
43: }
44: }
45: fill[w] = nz;
47: *rfill = fill;
48: return(0);
49: }
52: static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse,PetscInt w,PetscInt **rfill)
53: {
55: PetscInt nz;
58: if (!dfillsparse) return(0);
60: /* Determine number of non-zeros */
61: nz = (dfillsparse[w] - w - 1);
63: /* Allocate space for our copy of the given sparse matrix representation. */
64: PetscMalloc1(nz + w + 1,rfill);
65: PetscArraycpy(*rfill,dfillsparse,nz+w+1);
66: return(0);
67: }
70: static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
71: {
73: PetscInt i,k,cnt = 1;
77: /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
78: columns to the left with any nonzeros in them plus 1 */
79: PetscCalloc1(dd->w,&dd->ofillcols);
80: for (i=0; i<dd->w; i++) {
81: for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
82: }
83: for (i=0; i<dd->w; i++) {
84: if (dd->ofillcols[i]) {
85: dd->ofillcols[i] = cnt++;
86: }
87: }
88: return(0);
89: }
93: /*@
94: DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
95: of the matrix returned by DMCreateMatrix().
97: Logically Collective on da
99: Input Parameter:
100: + da - the distributed array
101: . dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
102: - ofill - the fill pattern in the off-diagonal blocks
105: Level: developer
107: Notes:
108: This only makes sense when you are doing multicomponent problems but using the
109: MPIAIJ matrix format
111: The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
112: representing coupling and 0 entries for missing coupling. For example
113: $ dfill[9] = {1, 0, 0,
114: $ 1, 1, 0,
115: $ 0, 1, 1}
116: means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
117: itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
118: diagonal block).
120: DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
121: can be represented in the dfill, ofill format
123: Contributed by Glenn Hammond
125: .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
127: @*/
128: PetscErrorCode DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill)
129: {
130: DM_DA *dd = (DM_DA*)da->data;
134: /* save the given dfill and ofill information */
135: DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);
136: DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);
138: /* count nonzeros in ofill columns */
139: DMDASetBlockFills_Private2(dd);
141: return(0);
142: }
145: /*@
146: DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
147: of the matrix returned by DMCreateMatrix(), using sparse representations
148: of fill patterns.
150: Logically Collective on da
152: Input Parameter:
153: + da - the distributed array
154: . dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block)
155: - ofill - the sparse fill pattern in the off-diagonal blocks
158: Level: developer
160: Notes: This only makes sense when you are doing multicomponent problems but using the
161: MPIAIJ matrix format
163: The format for dfill and ofill is a sparse representation of a
164: dof-by-dof matrix with 1 entries representing coupling and 0 entries
165: for missing coupling. The sparse representation is a 1 dimensional
166: array of length nz + dof + 1, where nz is the number of non-zeros in
167: the matrix. The first dof entries in the array give the
168: starting array indices of each row's items in the rest of the array,
169: the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
170: and the remaining nz items give the column indices of each of
171: the 1s within the logical 2D matrix. Each row's items within
172: the array are the column indices of the 1s within that row
173: of the 2D matrix. PETSc developers may recognize that this is the
174: same format as that computed by the DMDASetBlockFills_Private()
175: function from a dense 2D matrix representation.
177: DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
178: can be represented in the dfill, ofill format
180: Contributed by Philip C. Roth
182: .seealso DMDASetBlockFills(), DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
184: @*/
185: PetscErrorCode DMDASetBlockFillsSparse(DM da,const PetscInt *dfillsparse,const PetscInt *ofillsparse)
186: {
187: DM_DA *dd = (DM_DA*)da->data;
191: /* save the given dfill and ofill information */
192: DMDASetBlockFillsSparse_Private(dfillsparse,dd->w,&dd->dfill);
193: DMDASetBlockFillsSparse_Private(ofillsparse,dd->w,&dd->ofill);
195: /* count nonzeros in ofill columns */
196: DMDASetBlockFills_Private2(dd);
198: return(0);
199: }
202: PetscErrorCode DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring *coloring)
203: {
204: PetscErrorCode ierr;
205: PetscInt dim,m,n,p,nc;
206: DMBoundaryType bx,by,bz;
207: MPI_Comm comm;
208: PetscMPIInt size;
209: PetscBool isBAIJ;
210: DM_DA *dd = (DM_DA*)da->data;
213: /*
214: m
215: ------------------------------------------------------
216: | |
217: | |
218: | ---------------------- |
219: | | | |
220: n | yn | | |
221: | | | |
222: | .--------------------- |
223: | (xs,ys) xn |
224: | . |
225: | (gxs,gys) |
226: | |
227: -----------------------------------------------------
228: */
230: /*
231: nc - number of components per grid point
232: col - number of colors needed in one direction for single component problem
234: */
235: DMDAGetInfo(da,&dim,0,0,0,&m,&n,&p,&nc,0,&bx,&by,&bz,0);
237: PetscObjectGetComm((PetscObject)da,&comm);
238: MPI_Comm_size(comm,&size);
239: if (ctype == IS_COLORING_LOCAL) {
240: if (size == 1) {
241: ctype = IS_COLORING_GLOBAL;
242: } else if (dim > 1) {
243: if ((m==1 && bx == DM_BOUNDARY_PERIODIC) || (n==1 && by == DM_BOUNDARY_PERIODIC) || (p==1 && bz == DM_BOUNDARY_PERIODIC)) {
244: SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process");
245: }
246: }
247: }
249: /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
250: matrices is for the blocks, not the individual matrix elements */
251: PetscStrbeginswith(da->mattype,MATBAIJ,&isBAIJ);
252: if (!isBAIJ) {PetscStrbeginswith(da->mattype,MATMPIBAIJ,&isBAIJ);}
253: if (!isBAIJ) {PetscStrbeginswith(da->mattype,MATSEQBAIJ,&isBAIJ);}
254: if (isBAIJ) {
255: dd->w = 1;
256: dd->xs = dd->xs/nc;
257: dd->xe = dd->xe/nc;
258: dd->Xs = dd->Xs/nc;
259: dd->Xe = dd->Xe/nc;
260: }
262: /*
263: We do not provide a getcoloring function in the DMDA operations because
264: the basic DMDA does not know about matrices. We think of DMDA as being more
265: more low-level then matrices.
266: */
267: if (dim == 1) {
268: DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);
269: } else if (dim == 2) {
270: DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);
271: } else if (dim == 3) {
272: DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);
273: } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
274: if (isBAIJ) {
275: dd->w = nc;
276: dd->xs = dd->xs*nc;
277: dd->xe = dd->xe*nc;
278: dd->Xs = dd->Xs*nc;
279: dd->Xe = dd->Xe*nc;
280: }
281: return(0);
282: }
284: /* ---------------------------------------------------------------------------------*/
286: PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
287: {
288: PetscErrorCode ierr;
289: PetscInt xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
290: PetscInt ncolors;
291: MPI_Comm comm;
292: DMBoundaryType bx,by;
293: DMDAStencilType st;
294: ISColoringValue *colors;
295: DM_DA *dd = (DM_DA*)da->data;
298: /*
299: nc - number of components per grid point
300: col - number of colors needed in one direction for single component problem
302: */
303: DMDAGetInfo(da,&dim,&m,&n,0,&M,&N,0,&nc,&s,&bx,&by,0,&st);
304: col = 2*s + 1;
305: DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);
306: DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);
307: PetscObjectGetComm((PetscObject)da,&comm);
309: /* special case as taught to us by Paul Hovland */
310: if (st == DMDA_STENCIL_STAR && s == 1) {
311: DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);
312: } else {
314: if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X (%d) is divisible\n\
315: by 2*stencil_width + 1 (%d)\n", m, col);
316: if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y (%d) is divisible\n\
317: by 2*stencil_width + 1 (%d)\n", n, col);
318: if (ctype == IS_COLORING_GLOBAL) {
319: if (!dd->localcoloring) {
320: PetscMalloc1(nc*nx*ny,&colors);
321: ii = 0;
322: for (j=ys; j<ys+ny; j++) {
323: for (i=xs; i<xs+nx; i++) {
324: for (k=0; k<nc; k++) {
325: colors[ii++] = k + nc*((i % col) + col*(j % col));
326: }
327: }
328: }
329: ncolors = nc + nc*(col-1 + col*(col-1));
330: ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);
331: }
332: *coloring = dd->localcoloring;
333: } else if (ctype == IS_COLORING_LOCAL) {
334: if (!dd->ghostedcoloring) {
335: PetscMalloc1(nc*gnx*gny,&colors);
336: ii = 0;
337: for (j=gys; j<gys+gny; j++) {
338: for (i=gxs; i<gxs+gnx; i++) {
339: for (k=0; k<nc; k++) {
340: /* the complicated stuff is to handle periodic boundaries */
341: colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
342: }
343: }
344: }
345: ncolors = nc + nc*(col - 1 + col*(col-1));
346: ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);
347: /* PetscIntView(ncolors,(PetscInt*)colors,0); */
349: ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);
350: }
351: *coloring = dd->ghostedcoloring;
352: } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
353: }
354: ISColoringReference(*coloring);
355: return(0);
356: }
358: /* ---------------------------------------------------------------------------------*/
360: PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
361: {
362: PetscErrorCode ierr;
363: PetscInt xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
364: PetscInt ncolors;
365: MPI_Comm comm;
366: DMBoundaryType bx,by,bz;
367: DMDAStencilType st;
368: ISColoringValue *colors;
369: DM_DA *dd = (DM_DA*)da->data;
372: /*
373: nc - number of components per grid point
374: col - number of colors needed in one direction for single component problem
376: */
377: DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);
378: col = 2*s + 1;
379: if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
380: by 2*stencil_width + 1\n");
381: if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
382: by 2*stencil_width + 1\n");
383: if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
384: by 2*stencil_width + 1\n");
386: DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);
387: DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);
388: PetscObjectGetComm((PetscObject)da,&comm);
390: /* create the coloring */
391: if (ctype == IS_COLORING_GLOBAL) {
392: if (!dd->localcoloring) {
393: PetscMalloc1(nc*nx*ny*nz,&colors);
394: ii = 0;
395: for (k=zs; k<zs+nz; k++) {
396: for (j=ys; j<ys+ny; j++) {
397: for (i=xs; i<xs+nx; i++) {
398: for (l=0; l<nc; l++) {
399: colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
400: }
401: }
402: }
403: }
404: ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
405: ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,PETSC_OWN_POINTER,&dd->localcoloring);
406: }
407: *coloring = dd->localcoloring;
408: } else if (ctype == IS_COLORING_LOCAL) {
409: if (!dd->ghostedcoloring) {
410: PetscMalloc1(nc*gnx*gny*gnz,&colors);
411: ii = 0;
412: for (k=gzs; k<gzs+gnz; k++) {
413: for (j=gys; j<gys+gny; j++) {
414: for (i=gxs; i<gxs+gnx; i++) {
415: for (l=0; l<nc; l++) {
416: /* the complicated stuff is to handle periodic boundaries */
417: colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
418: }
419: }
420: }
421: }
422: ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
423: ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);
424: ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);
425: }
426: *coloring = dd->ghostedcoloring;
427: } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
428: ISColoringReference(*coloring);
429: return(0);
430: }
432: /* ---------------------------------------------------------------------------------*/
434: PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
435: {
436: PetscErrorCode ierr;
437: PetscInt xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
438: PetscInt ncolors;
439: MPI_Comm comm;
440: DMBoundaryType bx;
441: ISColoringValue *colors;
442: DM_DA *dd = (DM_DA*)da->data;
445: /*
446: nc - number of components per grid point
447: col - number of colors needed in one direction for single component problem
449: */
450: DMDAGetInfo(da,&dim,&m,0,0,&M,0,0,&nc,&s,&bx,0,0,0);
451: col = 2*s + 1;
453: if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points %d is divisible\n\
454: by 2*stencil_width + 1 %d\n",(int)m,(int)col);
456: DMDAGetCorners(da,&xs,0,0,&nx,0,0);
457: DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);
458: PetscObjectGetComm((PetscObject)da,&comm);
460: /* create the coloring */
461: if (ctype == IS_COLORING_GLOBAL) {
462: if (!dd->localcoloring) {
463: PetscMalloc1(nc*nx,&colors);
464: if (dd->ofillcols) {
465: PetscInt tc = 0;
466: for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0);
467: i1 = 0;
468: for (i=xs; i<xs+nx; i++) {
469: for (l=0; l<nc; l++) {
470: if (dd->ofillcols[l] && (i % col)) {
471: colors[i1++] = nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l];
472: } else {
473: colors[i1++] = l;
474: }
475: }
476: }
477: ncolors = nc + 2*s*tc;
478: } else {
479: i1 = 0;
480: for (i=xs; i<xs+nx; i++) {
481: for (l=0; l<nc; l++) {
482: colors[i1++] = l + nc*(i % col);
483: }
484: }
485: ncolors = nc + nc*(col-1);
486: }
487: ISColoringCreate(comm,ncolors,nc*nx,colors,PETSC_OWN_POINTER,&dd->localcoloring);
488: }
489: *coloring = dd->localcoloring;
490: } else if (ctype == IS_COLORING_LOCAL) {
491: if (!dd->ghostedcoloring) {
492: PetscMalloc1(nc*gnx,&colors);
493: i1 = 0;
494: for (i=gxs; i<gxs+gnx; i++) {
495: for (l=0; l<nc; l++) {
496: /* the complicated stuff is to handle periodic boundaries */
497: colors[i1++] = l + nc*(SetInRange(i,m) % col);
498: }
499: }
500: ncolors = nc + nc*(col-1);
501: ISColoringCreate(comm,ncolors,nc*gnx,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);
502: ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);
503: }
504: *coloring = dd->ghostedcoloring;
505: } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
506: ISColoringReference(*coloring);
507: return(0);
508: }
510: PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
511: {
512: PetscErrorCode ierr;
513: PetscInt xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
514: PetscInt ncolors;
515: MPI_Comm comm;
516: DMBoundaryType bx,by;
517: ISColoringValue *colors;
518: DM_DA *dd = (DM_DA*)da->data;
521: /*
522: nc - number of components per grid point
523: col - number of colors needed in one direction for single component problem
525: */
526: DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,0);
527: DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);
528: DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);
529: PetscObjectGetComm((PetscObject)da,&comm);
531: if (bx == DM_BOUNDARY_PERIODIC && (m % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible by 5\n");
532: if (by == DM_BOUNDARY_PERIODIC && (n % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible by 5\n");
534: /* create the coloring */
535: if (ctype == IS_COLORING_GLOBAL) {
536: if (!dd->localcoloring) {
537: PetscMalloc1(nc*nx*ny,&colors);
538: ii = 0;
539: for (j=ys; j<ys+ny; j++) {
540: for (i=xs; i<xs+nx; i++) {
541: for (k=0; k<nc; k++) {
542: colors[ii++] = k + nc*((3*j+i) % 5);
543: }
544: }
545: }
546: ncolors = 5*nc;
547: ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);
548: }
549: *coloring = dd->localcoloring;
550: } else if (ctype == IS_COLORING_LOCAL) {
551: if (!dd->ghostedcoloring) {
552: PetscMalloc1(nc*gnx*gny,&colors);
553: ii = 0;
554: for (j=gys; j<gys+gny; j++) {
555: for (i=gxs; i<gxs+gnx; i++) {
556: for (k=0; k<nc; k++) {
557: colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
558: }
559: }
560: }
561: ncolors = 5*nc;
562: ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);
563: ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);
564: }
565: *coloring = dd->ghostedcoloring;
566: } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
567: return(0);
568: }
570: /* =========================================================================== */
571: extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat,PetscBool);
572: extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat);
573: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat,PetscBool);
574: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
575: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat,PetscBool);
576: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
577: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat);
578: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat);
579: extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat);
580: extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat);
581: extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM,Mat);
582: extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM,Mat);
583: extern PetscErrorCode DMCreateMatrix_DA_IS(DM,Mat);
585: /*@C
586: MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix
588: Logically Collective on mat
590: Input Parameters:
591: + mat - the matrix
592: - da - the da
594: Level: intermediate
596: @*/
597: PetscErrorCode MatSetupDM(Mat mat,DM da)
598: {
604: PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));
605: return(0);
606: }
608: PetscErrorCode MatView_MPI_DA(Mat A,PetscViewer viewer)
609: {
610: DM da;
611: PetscErrorCode ierr;
612: const char *prefix;
613: Mat Anatural;
614: AO ao;
615: PetscInt rstart,rend,*petsc,i;
616: IS is;
617: MPI_Comm comm;
618: PetscViewerFormat format;
621: /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
622: PetscViewerGetFormat(viewer,&format);
623: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) return(0);
625: PetscObjectGetComm((PetscObject)A,&comm);
626: MatGetDM(A, &da);
627: if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
629: DMDAGetAO(da,&ao);
630: MatGetOwnershipRange(A,&rstart,&rend);
631: PetscMalloc1(rend-rstart,&petsc);
632: for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
633: AOApplicationToPetsc(ao,rend-rstart,petsc);
634: ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);
636: /* call viewer on natural ordering */
637: MatCreateSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);
638: ISDestroy(&is);
639: PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);
640: PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);
641: PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);
642: ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
643: MatView(Anatural,viewer);
644: ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
645: MatDestroy(&Anatural);
646: return(0);
647: }
649: PetscErrorCode MatLoad_MPI_DA(Mat A,PetscViewer viewer)
650: {
651: DM da;
653: Mat Anatural,Aapp;
654: AO ao;
655: PetscInt rstart,rend,*app,i,m,n,M,N;
656: IS is;
657: MPI_Comm comm;
660: PetscObjectGetComm((PetscObject)A,&comm);
661: MatGetDM(A, &da);
662: if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
664: /* Load the matrix in natural ordering */
665: MatCreate(PetscObjectComm((PetscObject)A),&Anatural);
666: MatSetType(Anatural,((PetscObject)A)->type_name);
667: MatGetSize(A,&M,&N);
668: MatGetLocalSize(A,&m,&n);
669: MatSetSizes(Anatural,m,n,M,N);
670: MatLoad(Anatural,viewer);
672: /* Map natural ordering to application ordering and create IS */
673: DMDAGetAO(da,&ao);
674: MatGetOwnershipRange(Anatural,&rstart,&rend);
675: PetscMalloc1(rend-rstart,&app);
676: for (i=rstart; i<rend; i++) app[i-rstart] = i;
677: AOPetscToApplication(ao,rend-rstart,app);
678: ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);
680: /* Do permutation and replace header */
681: MatCreateSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);
682: MatHeaderReplace(A,&Aapp);
683: ISDestroy(&is);
684: MatDestroy(&Anatural);
685: return(0);
686: }
688: PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
689: {
691: PetscInt dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
692: Mat A;
693: MPI_Comm comm;
694: MatType Atype;
695: void (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL,(*sell)(void)=NULL,(*is)(void)=NULL;
696: MatType mtype;
697: PetscMPIInt size;
698: DM_DA *dd = (DM_DA*)da->data;
701: MatInitializePackage();
702: mtype = da->mattype;
704: /*
705: m
706: ------------------------------------------------------
707: | |
708: | |
709: | ---------------------- |
710: | | | |
711: n | ny | | |
712: | | | |
713: | .--------------------- |
714: | (xs,ys) nx |
715: | . |
716: | (gxs,gys) |
717: | |
718: -----------------------------------------------------
719: */
721: /*
722: nc - number of components per grid point
723: col - number of colors needed in one direction for single component problem
725: */
726: M = dd->M;
727: N = dd->N;
728: P = dd->P;
729: dim = da->dim;
730: dof = dd->w;
731: /* DMDAGetInfo(da,&dim,&M,&N,&P,0,0,0,&dof,0,0,0,0,0); */
732: DMDAGetCorners(da,0,0,0,&nx,&ny,&nz);
733: PetscObjectGetComm((PetscObject)da,&comm);
734: MatCreate(comm,&A);
735: MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);
736: MatSetType(A,mtype);
737: MatSetFromOptions(A);
738: MatSetDM(A,da);
739: if (da->structure_only) {
740: MatSetOption(A,MAT_STRUCTURE_ONLY,PETSC_TRUE);
741: }
742: MatGetType(A,&Atype);
743: /*
744: We do not provide a getmatrix function in the DMDA operations because
745: the basic DMDA does not know about matrices. We think of DMDA as being more
746: more low-level than matrices. This is kind of cheating but, cause sometimes
747: we think of DMDA has higher level than matrices.
749: We could switch based on Atype (or mtype), but we do not since the
750: specialized setting routines depend only on the particular preallocation
751: details of the matrix, not the type itself.
752: */
753: PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);
754: if (!aij) {
755: PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);
756: }
757: if (!aij) {
758: PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);
759: if (!baij) {
760: PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);
761: }
762: if (!baij) {
763: PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);
764: if (!sbaij) {
765: PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);
766: }
767: if (!sbaij) {
768: PetscObjectQueryFunction((PetscObject)A,"MatMPISELLSetPreallocation_C",&sell);
769: if (!sell) {
770: PetscObjectQueryFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",&sell);
771: }
772: }
773: if (!sell) {
774: PetscObjectQueryFunction((PetscObject)A,"MatISSetPreallocation_C",&is);
775: }
776: }
777: }
778: if (aij) {
779: if (dim == 1) {
780: if (dd->ofill) {
781: DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A);
782: } else {
783: DMCreateMatrix_DA_1d_MPIAIJ(da,A,PETSC_FALSE);
784: }
785: } else if (dim == 2) {
786: if (dd->ofill) {
787: DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);
788: } else {
789: DMCreateMatrix_DA_2d_MPIAIJ(da,A,PETSC_FALSE);
790: }
791: } else if (dim == 3) {
792: if (dd->ofill) {
793: DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);
794: } else {
795: DMCreateMatrix_DA_3d_MPIAIJ(da,A,PETSC_FALSE);
796: }
797: }
798: } else if (baij) {
799: if (dim == 2) {
800: DMCreateMatrix_DA_2d_MPIBAIJ(da,A);
801: } else if (dim == 3) {
802: DMCreateMatrix_DA_3d_MPIBAIJ(da,A);
803: } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
804: } else if (sbaij) {
805: if (dim == 2) {
806: DMCreateMatrix_DA_2d_MPISBAIJ(da,A);
807: } else if (dim == 3) {
808: DMCreateMatrix_DA_3d_MPISBAIJ(da,A);
809: } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
810: } else if (sell) {
811: if (dim == 2) {
812: DMCreateMatrix_DA_2d_MPISELL(da,A);
813: } else if (dim == 3) {
814: DMCreateMatrix_DA_3d_MPISELL(da,A);
815: } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
816: } else if (is) {
817: DMCreateMatrix_DA_IS(da,A);
818: } else {
819: ISLocalToGlobalMapping ltog;
821: MatSetBlockSize(A,dof);
822: MatSetUp(A);
823: DMGetLocalToGlobalMapping(da,<og);
824: MatSetLocalToGlobalMapping(A,ltog,ltog);
825: }
826: DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);
827: MatSetStencil(A,dim,dims,starts,dof);
828: MatSetDM(A,da);
829: MPI_Comm_size(comm,&size);
830: if (size > 1) {
831: /* change viewer to display matrix in natural ordering */
832: MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA);
833: MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA);
834: }
835: *J = A;
836: return(0);
837: }
839: /* ---------------------------------------------------------------------------------*/
840: PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
842: PetscErrorCode DMCreateMatrix_DA_IS(DM dm,Mat J)
843: {
844: DM_DA *da = (DM_DA*)dm->data;
845: Mat lJ;
846: ISLocalToGlobalMapping ltog;
847: IS is_loc_filt, is_glob;
848: const PetscInt *e_loc,*idx;
849: PetscInt nel,nen,nv,dof,dim,*gidx,nb;
850: PetscBool flg;
851: PetscErrorCode ierr;
853: /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
854: We need to filter the local indices that are represented through the DMDAGetElements decomposition
855: This is because the size of the local matrices in MATIS is the local size of the l2g map */
857: dof = da->w;
858: dim = dm->dim;
860: MatSetBlockSize(J,dof);
862: /* get local elements indices in local DMDA numbering */
863: DMDAGetElements(dm,&nel,&nen,&e_loc); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
864: ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nel*nen,e_loc,PETSC_COPY_VALUES,&is_loc_filt);
865: DMDARestoreElements(dm,&nel,&nen,&e_loc);
867: /* obtain a consistent local ordering for MATIS */
868: ISSortRemoveDups(is_loc_filt);
869: ISBlockGetLocalSize(is_loc_filt,&nb);
870: DMGetLocalToGlobalMapping(dm,<og);
871: ISLocalToGlobalMappingGetSize(ltog,&nv);
872: PetscMalloc1(PetscMax(nb,nv/dof),&gidx);
873: ISBlockGetIndices(is_loc_filt,&idx);
874: ISLocalToGlobalMappingApplyBlock(ltog,nb,idx,gidx);
875: ISBlockRestoreIndices(is_loc_filt,&idx);
876: ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nb,gidx,PETSC_USE_POINTER,&is_glob);
877: ISLocalToGlobalMappingCreateIS(is_glob,<og);
878: ISDestroy(&is_glob);
879: MatSetLocalToGlobalMapping(J,ltog,ltog);
880: ISLocalToGlobalMappingDestroy(<og);
882: /* We also attach a l2g map to the local matrices to have MatSetValueLocal to work */
883: MatISGetLocalMat(J,&lJ);
884: ISLocalToGlobalMappingCreateIS(is_loc_filt,<og);
885: ISDestroy(&is_loc_filt);
886: ISCreateStride(PetscObjectComm((PetscObject)lJ),nv/dof,0,1,&is_glob);
887: ISGetIndices(is_glob,&idx);
888: ISGlobalToLocalMappingApplyBlock(ltog,IS_GTOLM_MASK,nv/dof,idx,&nb,gidx);
889: ISRestoreIndices(is_glob,&idx);
890: ISDestroy(&is_glob);
891: ISLocalToGlobalMappingDestroy(<og);
892: ISCreateBlock(PETSC_COMM_SELF,dof,nb,gidx,PETSC_USE_POINTER,&is_loc_filt);
893: ISLocalToGlobalMappingCreateIS(is_loc_filt,<og);
894: ISDestroy(&is_loc_filt);
895: MatSetLocalToGlobalMapping(lJ,ltog,ltog);
896: ISLocalToGlobalMappingDestroy(<og);
897: PetscFree(gidx);
899: /* Preallocation (not exact): we reuse the preallocation routines of the assembled version */
900: flg = dm->prealloc_only;
901: dm->prealloc_only = PETSC_TRUE;
902: switch (dim) {
903: case 1:
904: PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);
905: DMCreateMatrix_DA_1d_MPIAIJ(dm,J,PETSC_TRUE);
906: PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);
907: break;
908: case 2:
909: PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);
910: DMCreateMatrix_DA_2d_MPIAIJ(dm,J,PETSC_TRUE);
911: PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);
912: break;
913: case 3:
914: PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);
915: DMCreateMatrix_DA_3d_MPIAIJ(dm,J,PETSC_TRUE);
916: PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);
917: break;
918: default:
919: SETERRQ1(PetscObjectComm((PetscObject)dm),PETSC_ERR_SUP,"Unhandled dimension %d",dim);
920: break;
921: }
922: dm->prealloc_only = flg;
923: return(0);
924: }
926: PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da,Mat J)
927: {
928: PetscErrorCode ierr;
929: PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p;
930: PetscInt lstart,lend,pstart,pend,*dnz,*onz;
931: MPI_Comm comm;
932: PetscScalar *values;
933: DMBoundaryType bx,by;
934: ISLocalToGlobalMapping ltog;
935: DMDAStencilType st;
938: /*
939: nc - number of components per grid point
940: col - number of colors needed in one direction for single component problem
942: */
943: DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);
944: col = 2*s + 1;
945: DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);
946: DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);
947: PetscObjectGetComm((PetscObject)da,&comm);
949: PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);
950: DMGetLocalToGlobalMapping(da,<og);
952: MatSetBlockSize(J,nc);
953: /* determine the matrix preallocation information */
954: MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);
955: for (i=xs; i<xs+nx; i++) {
957: pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
958: pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1));
960: for (j=ys; j<ys+ny; j++) {
961: slot = i - gxs + gnx*(j - gys);
963: lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
964: lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1));
966: cnt = 0;
967: for (k=0; k<nc; k++) {
968: for (l=lstart; l<lend+1; l++) {
969: for (p=pstart; p<pend+1; p++) {
970: if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
971: cols[cnt++] = k + nc*(slot + gnx*l + p);
972: }
973: }
974: }
975: rows[k] = k + nc*(slot);
976: }
977: MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);
978: }
979: }
980: MatSetBlockSize(J,nc);
981: MatSeqSELLSetPreallocation(J,0,dnz);
982: MatMPISELLSetPreallocation(J,0,dnz,0,onz);
983: MatPreallocateFinalize(dnz,onz);
985: MatSetLocalToGlobalMapping(J,ltog,ltog);
987: /*
988: For each node in the grid: we get the neighbors in the local (on processor ordering
989: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
990: PETSc ordering.
991: */
992: if (!da->prealloc_only) {
993: PetscCalloc1(col*col*nc*nc,&values);
994: for (i=xs; i<xs+nx; i++) {
996: pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
997: pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1));
999: for (j=ys; j<ys+ny; j++) {
1000: slot = i - gxs + gnx*(j - gys);
1002: lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1003: lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1));
1005: cnt = 0;
1006: for (k=0; k<nc; k++) {
1007: for (l=lstart; l<lend+1; l++) {
1008: for (p=pstart; p<pend+1; p++) {
1009: if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1010: cols[cnt++] = k + nc*(slot + gnx*l + p);
1011: }
1012: }
1013: }
1014: rows[k] = k + nc*(slot);
1015: }
1016: MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);
1017: }
1018: }
1019: PetscFree(values);
1020: /* do not copy values to GPU since they are all zero and not yet needed there */
1021: MatPinToCPU(J,PETSC_TRUE);
1022: MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1023: MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1024: MatPinToCPU(J,PETSC_FALSE);
1025: MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1026: }
1027: PetscFree2(rows,cols);
1028: return(0);
1029: }
1031: PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da,Mat J)
1032: {
1033: PetscErrorCode ierr;
1034: PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1035: PetscInt m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1036: PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1037: MPI_Comm comm;
1038: PetscScalar *values;
1039: DMBoundaryType bx,by,bz;
1040: ISLocalToGlobalMapping ltog;
1041: DMDAStencilType st;
1044: /*
1045: nc - number of components per grid point
1046: col - number of colors needed in one direction for single component problem
1048: */
1049: DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);
1050: col = 2*s + 1;
1051: DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);
1052: DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);
1053: PetscObjectGetComm((PetscObject)da,&comm);
1055: PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);
1056: DMGetLocalToGlobalMapping(da,<og);
1058: MatSetBlockSize(J,nc);
1059: /* determine the matrix preallocation information */
1060: MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);
1061: for (i=xs; i<xs+nx; i++) {
1062: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1063: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1));
1064: for (j=ys; j<ys+ny; j++) {
1065: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1066: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1));
1067: for (k=zs; k<zs+nz; k++) {
1068: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1069: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1));
1071: slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1073: cnt = 0;
1074: for (l=0; l<nc; l++) {
1075: for (ii=istart; ii<iend+1; ii++) {
1076: for (jj=jstart; jj<jend+1; jj++) {
1077: for (kk=kstart; kk<kend+1; kk++) {
1078: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1079: cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1080: }
1081: }
1082: }
1083: }
1084: rows[l] = l + nc*(slot);
1085: }
1086: MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);
1087: }
1088: }
1089: }
1090: MatSetBlockSize(J,nc);
1091: MatSeqSELLSetPreallocation(J,0,dnz);
1092: MatMPISELLSetPreallocation(J,0,dnz,0,onz);
1093: MatPreallocateFinalize(dnz,onz);
1094: MatSetLocalToGlobalMapping(J,ltog,ltog);
1096: /*
1097: For each node in the grid: we get the neighbors in the local (on processor ordering
1098: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1099: PETSc ordering.
1100: */
1101: if (!da->prealloc_only) {
1102: PetscCalloc1(col*col*col*nc*nc*nc,&values);
1103: for (i=xs; i<xs+nx; i++) {
1104: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1105: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1));
1106: for (j=ys; j<ys+ny; j++) {
1107: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1108: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1));
1109: for (k=zs; k<zs+nz; k++) {
1110: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1111: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1));
1113: slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1115: cnt = 0;
1116: for (l=0; l<nc; l++) {
1117: for (ii=istart; ii<iend+1; ii++) {
1118: for (jj=jstart; jj<jend+1; jj++) {
1119: for (kk=kstart; kk<kend+1; kk++) {
1120: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1121: cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1122: }
1123: }
1124: }
1125: }
1126: rows[l] = l + nc*(slot);
1127: }
1128: MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);
1129: }
1130: }
1131: }
1132: PetscFree(values);
1133: /* do not copy values to GPU since they are all zero and not yet needed there */
1134: MatPinToCPU(J,PETSC_TRUE);
1135: MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1136: MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1137: MatPinToCPU(J,PETSC_FALSE);
1138: MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1139: }
1140: PetscFree2(rows,cols);
1141: return(0);
1142: }
1144: PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J,PetscBool isIS)
1145: {
1146: PetscErrorCode ierr;
1147: PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,M,N;
1148: PetscInt lstart,lend,pstart,pend,*dnz,*onz;
1149: MPI_Comm comm;
1150: DMBoundaryType bx,by;
1151: ISLocalToGlobalMapping ltog,mltog;
1152: DMDAStencilType st;
1153: PetscBool removedups = PETSC_FALSE;
1156: /*
1157: nc - number of components per grid point
1158: col - number of colors needed in one direction for single component problem
1160: */
1161: DMDAGetInfo(da,&dim,&m,&n,&M,&N,0,0,&nc,&s,&bx,&by,0,&st);
1162: if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) {
1163: MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);
1164: }
1165: col = 2*s + 1;
1166: /*
1167: With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1168: because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1169: */
1170: if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1171: if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1172: DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);
1173: DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);
1174: PetscObjectGetComm((PetscObject)da,&comm);
1176: PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);
1177: DMGetLocalToGlobalMapping(da,<og);
1179: MatSetBlockSize(J,nc);
1180: /* determine the matrix preallocation information */
1181: MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);
1182: for (i=xs; i<xs+nx; i++) {
1184: pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1185: pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1));
1187: for (j=ys; j<ys+ny; j++) {
1188: slot = i - gxs + gnx*(j - gys);
1190: lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1191: lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1));
1193: cnt = 0;
1194: for (k=0; k<nc; k++) {
1195: for (l=lstart; l<lend+1; l++) {
1196: for (p=pstart; p<pend+1; p++) {
1197: if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1198: cols[cnt++] = k + nc*(slot + gnx*l + p);
1199: }
1200: }
1201: }
1202: rows[k] = k + nc*(slot);
1203: }
1204: if (removedups) {
1205: MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);
1206: } else {
1207: MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);
1208: }
1209: }
1210: }
1211: MatSetBlockSize(J,nc);
1212: MatSeqAIJSetPreallocation(J,0,dnz);
1213: MatMPIAIJSetPreallocation(J,0,dnz,0,onz);
1214: MatPreallocateFinalize(dnz,onz);
1215: MatGetLocalToGlobalMapping(J,&mltog,NULL);
1216: if (!mltog) {
1217: MatSetLocalToGlobalMapping(J,ltog,ltog);
1218: }
1220: /*
1221: For each node in the grid: we get the neighbors in the local (on processor ordering
1222: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1223: PETSc ordering.
1224: */
1225: if (!da->prealloc_only) {
1226: for (i=xs; i<xs+nx; i++) {
1228: pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1229: pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1));
1231: for (j=ys; j<ys+ny; j++) {
1232: slot = i - gxs + gnx*(j - gys);
1234: lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1235: lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1));
1237: cnt = 0;
1238: for (l=lstart; l<lend+1; l++) {
1239: for (p=pstart; p<pend+1; p++) {
1240: if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1241: cols[cnt++] = nc*(slot + gnx*l + p);
1242: for (k=1; k<nc; k++) {
1243: cols[cnt] = 1 + cols[cnt-1];cnt++;
1244: }
1245: }
1246: }
1247: }
1248: for (k=0; k<nc; k++) rows[k] = k + nc*(slot);
1249: MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);
1250: }
1251: }
1252: /* do not copy values to GPU since they are all zero and not yet needed there */
1253: MatPinToCPU(J,PETSC_TRUE);
1254: MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1255: MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1256: MatPinToCPU(J,PETSC_FALSE);
1257: MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1258: if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) {
1259: MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);
1260: }
1261: }
1262: PetscFree2(rows,cols);
1263: return(0);
1264: }
1266: PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
1267: {
1268: PetscErrorCode ierr;
1269: PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1270: PetscInt m,n,dim,s,*cols,k,nc,row,col,cnt,maxcnt = 0,l,p,M,N;
1271: PetscInt lstart,lend,pstart,pend,*dnz,*onz;
1272: DM_DA *dd = (DM_DA*)da->data;
1273: PetscInt ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
1274: MPI_Comm comm;
1275: DMBoundaryType bx,by;
1276: ISLocalToGlobalMapping ltog;
1277: DMDAStencilType st;
1278: PetscBool removedups = PETSC_FALSE;
1281: /*
1282: nc - number of components per grid point
1283: col - number of colors needed in one direction for single component problem
1285: */
1286: DMDAGetInfo(da,&dim,&m,&n,&M,&N,0,0,&nc,&s,&bx,&by,0,&st);
1287: col = 2*s + 1;
1288: /*
1289: With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1290: because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1291: */
1292: if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1293: if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1294: DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);
1295: DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);
1296: PetscObjectGetComm((PetscObject)da,&comm);
1298: PetscMalloc1(col*col*nc,&cols);
1299: DMGetLocalToGlobalMapping(da,<og);
1301: MatSetBlockSize(J,nc);
1302: /* determine the matrix preallocation information */
1303: MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);
1304: for (i=xs; i<xs+nx; i++) {
1306: pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1307: pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1));
1309: for (j=ys; j<ys+ny; j++) {
1310: slot = i - gxs + gnx*(j - gys);
1312: lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1313: lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1));
1315: for (k=0; k<nc; k++) {
1316: cnt = 0;
1317: for (l=lstart; l<lend+1; l++) {
1318: for (p=pstart; p<pend+1; p++) {
1319: if (l || p) {
1320: if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1321: for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1322: }
1323: } else {
1324: if (dfill) {
1325: for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1326: } else {
1327: for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1328: }
1329: }
1330: }
1331: }
1332: row = k + nc*(slot);
1333: maxcnt = PetscMax(maxcnt,cnt);
1334: if (removedups) {
1335: MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);
1336: } else {
1337: MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);
1338: }
1339: }
1340: }
1341: }
1342: MatSeqAIJSetPreallocation(J,0,dnz);
1343: MatMPIAIJSetPreallocation(J,0,dnz,0,onz);
1344: MatPreallocateFinalize(dnz,onz);
1345: MatSetLocalToGlobalMapping(J,ltog,ltog);
1347: /*
1348: For each node in the grid: we get the neighbors in the local (on processor ordering
1349: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1350: PETSc ordering.
1351: */
1352: if (!da->prealloc_only) {
1353: for (i=xs; i<xs+nx; i++) {
1355: pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1356: pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1));
1358: for (j=ys; j<ys+ny; j++) {
1359: slot = i - gxs + gnx*(j - gys);
1361: lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1362: lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1));
1364: for (k=0; k<nc; k++) {
1365: cnt = 0;
1366: for (l=lstart; l<lend+1; l++) {
1367: for (p=pstart; p<pend+1; p++) {
1368: if (l || p) {
1369: if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1370: for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1371: }
1372: } else {
1373: if (dfill) {
1374: for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1375: } else {
1376: for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1377: }
1378: }
1379: }
1380: }
1381: row = k + nc*(slot);
1382: MatSetValuesLocal(J,1,&row,cnt,cols,NULL,INSERT_VALUES);
1383: }
1384: }
1385: }
1386: /* do not copy values to GPU since they are all zero and not yet needed there */
1387: MatPinToCPU(J,PETSC_TRUE);
1388: MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1389: MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1390: MatPinToCPU(J,PETSC_FALSE);
1391: MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1392: }
1393: PetscFree(cols);
1394: return(0);
1395: }
1397: /* ---------------------------------------------------------------------------------*/
1399: PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J,PetscBool isIS)
1400: {
1401: PetscErrorCode ierr;
1402: PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1403: PetscInt m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1404: PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1405: MPI_Comm comm;
1406: DMBoundaryType bx,by,bz;
1407: ISLocalToGlobalMapping ltog,mltog;
1408: DMDAStencilType st;
1409: PetscBool removedups = PETSC_FALSE;
1412: /*
1413: nc - number of components per grid point
1414: col - number of colors needed in one direction for single component problem
1416: */
1417: DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);
1418: if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) {
1419: MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);
1420: }
1421: col = 2*s + 1;
1423: /*
1424: With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1425: because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1426: */
1427: if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1428: if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1429: if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
1431: DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);
1432: DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);
1433: PetscObjectGetComm((PetscObject)da,&comm);
1435: PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);
1436: DMGetLocalToGlobalMapping(da,<og);
1438: MatSetBlockSize(J,nc);
1439: /* determine the matrix preallocation information */
1440: MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);
1441: for (i=xs; i<xs+nx; i++) {
1442: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1443: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1));
1444: for (j=ys; j<ys+ny; j++) {
1445: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1446: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1));
1447: for (k=zs; k<zs+nz; k++) {
1448: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1449: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1));
1451: slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1453: cnt = 0;
1454: for (l=0; l<nc; l++) {
1455: for (ii=istart; ii<iend+1; ii++) {
1456: for (jj=jstart; jj<jend+1; jj++) {
1457: for (kk=kstart; kk<kend+1; kk++) {
1458: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1459: cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1460: }
1461: }
1462: }
1463: }
1464: rows[l] = l + nc*(slot);
1465: }
1466: if (removedups) {
1467: MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);
1468: } else {
1469: MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);
1470: }
1471: }
1472: }
1473: }
1474: MatSetBlockSize(J,nc);
1475: MatSeqAIJSetPreallocation(J,0,dnz);
1476: MatMPIAIJSetPreallocation(J,0,dnz,0,onz);
1477: MatPreallocateFinalize(dnz,onz);
1478: MatGetLocalToGlobalMapping(J,&mltog,NULL);
1479: if (!mltog) {
1480: MatSetLocalToGlobalMapping(J,ltog,ltog);
1481: }
1483: /*
1484: For each node in the grid: we get the neighbors in the local (on processor ordering
1485: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1486: PETSc ordering.
1487: */
1488: if (!da->prealloc_only) {
1489: for (i=xs; i<xs+nx; i++) {
1490: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1491: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1));
1492: for (j=ys; j<ys+ny; j++) {
1493: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1494: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1));
1495: for (k=zs; k<zs+nz; k++) {
1496: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1497: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1));
1499: slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1501: cnt = 0;
1502: for (kk=kstart; kk<kend+1; kk++) {
1503: for (jj=jstart; jj<jend+1; jj++) {
1504: for (ii=istart; ii<iend+1; ii++) {
1505: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1506: cols[cnt++] = nc*(slot + ii + gnx*jj + gnx*gny*kk);
1507: for (l=1; l<nc; l++) {
1508: cols[cnt] = 1 + cols[cnt-1];cnt++;
1509: }
1510: }
1511: }
1512: }
1513: }
1514: rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1515: MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);
1516: }
1517: }
1518: }
1519: /* do not copy values to GPU since they are all zero and not yet needed there */
1520: MatPinToCPU(J,PETSC_TRUE);
1521: MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1522: MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1523: if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) {
1524: MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);
1525: }
1526: MatPinToCPU(J,PETSC_FALSE);
1527: MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1528: }
1529: PetscFree2(rows,cols);
1530: return(0);
1531: }
1533: /* ---------------------------------------------------------------------------------*/
1535: PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J)
1536: {
1537: PetscErrorCode ierr;
1538: DM_DA *dd = (DM_DA*)da->data;
1539: PetscInt xs,nx,i,j,gxs,gnx,row,k,l;
1540: PetscInt m,dim,s,*cols = NULL,nc,cnt,maxcnt = 0,*ocols;
1541: PetscInt *ofill = dd->ofill,*dfill = dd->dfill;
1542: DMBoundaryType bx;
1543: ISLocalToGlobalMapping ltog;
1544: PetscMPIInt rank,size;
1547: MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank);
1548: MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);
1550: /*
1551: nc - number of components per grid point
1553: */
1554: DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);
1555: if (s > 1) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Matrix creation for 1d not implemented correctly for stencil width larger than 1");
1556: DMDAGetCorners(da,&xs,0,0,&nx,0,0);
1557: DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);
1559: MatSetBlockSize(J,nc);
1560: PetscCalloc2(nx*nc,&cols,nx*nc,&ocols);
1562: /*
1563: note should be smaller for first and last process with no periodic
1564: does not handle dfill
1565: */
1566: cnt = 0;
1567: /* coupling with process to the left */
1568: for (i=0; i<s; i++) {
1569: for (j=0; j<nc; j++) {
1570: ocols[cnt] = ((!rank) ? 0 : (s - i)*(ofill[j+1] - ofill[j]));
1571: cols[cnt] = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]);
1572: if (!rank && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1573: if (size > 1) ocols[cnt] += (s - i)*(ofill[j+1] - ofill[j]);
1574: else cols[cnt] += (s - i)*(ofill[j+1] - ofill[j]);
1575: }
1576: maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1577: cnt++;
1578: }
1579: }
1580: for (i=s; i<nx-s; i++) {
1581: for (j=0; j<nc; j++) {
1582: cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]);
1583: maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1584: cnt++;
1585: }
1586: }
1587: /* coupling with process to the right */
1588: for (i=nx-s; i<nx; i++) {
1589: for (j=0; j<nc; j++) {
1590: ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j]));
1591: cols[cnt] = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]);
1592: if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1593: if (size > 1) ocols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]);
1594: else cols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]);
1595: }
1596: maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1597: cnt++;
1598: }
1599: }
1601: MatSeqAIJSetPreallocation(J,0,cols);
1602: MatMPIAIJSetPreallocation(J,0,cols,0,ocols);
1603: PetscFree2(cols,ocols);
1605: DMGetLocalToGlobalMapping(da,<og);
1606: MatSetLocalToGlobalMapping(J,ltog,ltog);
1608: /*
1609: For each node in the grid: we get the neighbors in the local (on processor ordering
1610: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1611: PETSc ordering.
1612: */
1613: if (!da->prealloc_only) {
1614: PetscMalloc1(maxcnt,&cols);
1615: row = xs*nc;
1616: /* coupling with process to the left */
1617: for (i=xs; i<xs+s; i++) {
1618: for (j=0; j<nc; j++) {
1619: cnt = 0;
1620: if (rank) {
1621: for (l=0; l<s; l++) {
1622: for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1623: }
1624: }
1625: if (!rank && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1626: for (l=0; l<s; l++) {
1627: for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (m + i - s - l)*nc + ofill[k];
1628: }
1629: }
1630: if (dfill) {
1631: for (k=dfill[j]; k<dfill[j+1]; k++) {
1632: cols[cnt++] = i*nc + dfill[k];
1633: }
1634: } else {
1635: for (k=0; k<nc; k++) {
1636: cols[cnt++] = i*nc + k;
1637: }
1638: }
1639: for (l=0; l<s; l++) {
1640: for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1641: }
1642: MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);
1643: row++;
1644: }
1645: }
1646: for (i=xs+s; i<xs+nx-s; i++) {
1647: for (j=0; j<nc; j++) {
1648: cnt = 0;
1649: for (l=0; l<s; l++) {
1650: for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1651: }
1652: if (dfill) {
1653: for (k=dfill[j]; k<dfill[j+1]; k++) {
1654: cols[cnt++] = i*nc + dfill[k];
1655: }
1656: } else {
1657: for (k=0; k<nc; k++) {
1658: cols[cnt++] = i*nc + k;
1659: }
1660: }
1661: for (l=0; l<s; l++) {
1662: for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1663: }
1664: MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);
1665: row++;
1666: }
1667: }
1668: /* coupling with process to the right */
1669: for (i=xs+nx-s; i<xs+nx; i++) {
1670: for (j=0; j<nc; j++) {
1671: cnt = 0;
1672: for (l=0; l<s; l++) {
1673: for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1674: }
1675: if (dfill) {
1676: for (k=dfill[j]; k<dfill[j+1]; k++) {
1677: cols[cnt++] = i*nc + dfill[k];
1678: }
1679: } else {
1680: for (k=0; k<nc; k++) {
1681: cols[cnt++] = i*nc + k;
1682: }
1683: }
1684: if (rank < size-1) {
1685: for (l=0; l<s; l++) {
1686: for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1687: }
1688: }
1689: if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1690: for (l=0; l<s; l++) {
1691: for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s - l - m + 2)*nc + ofill[k];
1692: }
1693: }
1694: MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);
1695: row++;
1696: }
1697: }
1698: PetscFree(cols);
1699: /* do not copy values to GPU since they are all zero and not yet needed there */
1700: MatPinToCPU(J,PETSC_TRUE);
1701: MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1702: MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1703: MatPinToCPU(J,PETSC_FALSE);
1704: MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1705: }
1706: return(0);
1707: }
1709: /* ---------------------------------------------------------------------------------*/
1711: PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J,PetscBool isIS)
1712: {
1713: PetscErrorCode ierr;
1714: PetscInt xs,nx,i,i1,slot,gxs,gnx;
1715: PetscInt m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1716: PetscInt istart,iend;
1717: DMBoundaryType bx;
1718: ISLocalToGlobalMapping ltog,mltog;
1721: /*
1722: nc - number of components per grid point
1723: col - number of colors needed in one direction for single component problem
1725: */
1726: DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);
1727: if (!isIS && bx == DM_BOUNDARY_NONE) {
1728: MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);
1729: }
1730: col = 2*s + 1;
1732: DMDAGetCorners(da,&xs,0,0,&nx,0,0);
1733: DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);
1735: MatSetBlockSize(J,nc);
1736: MatSeqAIJSetPreallocation(J,col*nc,0);
1737: MatMPIAIJSetPreallocation(J,col*nc,0,col*nc,0);
1739: DMGetLocalToGlobalMapping(da,<og);
1740: MatGetLocalToGlobalMapping(J,&mltog,NULL);
1741: if (!mltog) {
1742: MatSetLocalToGlobalMapping(J,ltog,ltog);
1743: }
1745: /*
1746: For each node in the grid: we get the neighbors in the local (on processor ordering
1747: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1748: PETSc ordering.
1749: */
1750: if (!da->prealloc_only) {
1751: PetscMalloc2(nc,&rows,col*nc*nc,&cols);
1752: for (i=xs; i<xs+nx; i++) {
1753: istart = PetscMax(-s,gxs - i);
1754: iend = PetscMin(s,gxs + gnx - i - 1);
1755: slot = i - gxs;
1757: cnt = 0;
1758: for (i1=istart; i1<iend+1; i1++) {
1759: cols[cnt++] = nc*(slot + i1);
1760: for (l=1; l<nc; l++) {
1761: cols[cnt] = 1 + cols[cnt-1];cnt++;
1762: }
1763: }
1764: rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1765: MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);
1766: }
1767: /* do not copy values to GPU since they are all zero and not yet needed there */
1768: MatPinToCPU(J,PETSC_TRUE);
1769: MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1770: MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1771: if (!isIS && bx == DM_BOUNDARY_NONE) {
1772: MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);
1773: }
1774: MatPinToCPU(J,PETSC_FALSE);
1775: MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1776: PetscFree2(rows,cols);
1777: }
1778: return(0);
1779: }
1781: PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1782: {
1783: PetscErrorCode ierr;
1784: PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1785: PetscInt m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1786: PetscInt istart,iend,jstart,jend,ii,jj;
1787: MPI_Comm comm;
1788: PetscScalar *values;
1789: DMBoundaryType bx,by;
1790: DMDAStencilType st;
1791: ISLocalToGlobalMapping ltog;
1794: /*
1795: nc - number of components per grid point
1796: col - number of colors needed in one direction for single component problem
1797: */
1798: DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);
1799: col = 2*s + 1;
1801: DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);
1802: DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);
1803: PetscObjectGetComm((PetscObject)da,&comm);
1805: PetscMalloc1(col*col*nc*nc,&cols);
1807: DMGetLocalToGlobalMapping(da,<og);
1809: /* determine the matrix preallocation information */
1810: MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);
1811: for (i=xs; i<xs+nx; i++) {
1812: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1813: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1));
1814: for (j=ys; j<ys+ny; j++) {
1815: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1816: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1));
1817: slot = i - gxs + gnx*(j - gys);
1819: /* Find block columns in block row */
1820: cnt = 0;
1821: for (ii=istart; ii<iend+1; ii++) {
1822: for (jj=jstart; jj<jend+1; jj++) {
1823: if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1824: cols[cnt++] = slot + ii + gnx*jj;
1825: }
1826: }
1827: }
1828: MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);
1829: }
1830: }
1831: MatSeqBAIJSetPreallocation(J,nc,0,dnz);
1832: MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);
1833: MatPreallocateFinalize(dnz,onz);
1835: MatSetLocalToGlobalMapping(J,ltog,ltog);
1837: /*
1838: For each node in the grid: we get the neighbors in the local (on processor ordering
1839: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1840: PETSc ordering.
1841: */
1842: if (!da->prealloc_only) {
1843: PetscCalloc1(col*col*nc*nc,&values);
1844: for (i=xs; i<xs+nx; i++) {
1845: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1846: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1));
1847: for (j=ys; j<ys+ny; j++) {
1848: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1849: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1));
1850: slot = i - gxs + gnx*(j - gys);
1851: cnt = 0;
1852: for (ii=istart; ii<iend+1; ii++) {
1853: for (jj=jstart; jj<jend+1; jj++) {
1854: if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1855: cols[cnt++] = slot + ii + gnx*jj;
1856: }
1857: }
1858: }
1859: MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);
1860: }
1861: }
1862: PetscFree(values);
1863: /* do not copy values to GPU since they are all zero and not yet needed there */
1864: MatPinToCPU(J,PETSC_TRUE);
1865: MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1866: MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1867: MatPinToCPU(J,PETSC_FALSE);
1868: MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1869: }
1870: PetscFree(cols);
1871: return(0);
1872: }
1874: PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1875: {
1876: PetscErrorCode ierr;
1877: PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1878: PetscInt m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1879: PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1880: MPI_Comm comm;
1881: PetscScalar *values;
1882: DMBoundaryType bx,by,bz;
1883: DMDAStencilType st;
1884: ISLocalToGlobalMapping ltog;
1887: /*
1888: nc - number of components per grid point
1889: col - number of colors needed in one direction for single component problem
1891: */
1892: DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);
1893: col = 2*s + 1;
1895: DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);
1896: DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);
1897: PetscObjectGetComm((PetscObject)da,&comm);
1899: PetscMalloc1(col*col*col,&cols);
1901: DMGetLocalToGlobalMapping(da,<og);
1903: /* determine the matrix preallocation information */
1904: MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);
1905: for (i=xs; i<xs+nx; i++) {
1906: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1907: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1));
1908: for (j=ys; j<ys+ny; j++) {
1909: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1910: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1));
1911: for (k=zs; k<zs+nz; k++) {
1912: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1913: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1));
1915: slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1917: /* Find block columns in block row */
1918: cnt = 0;
1919: for (ii=istart; ii<iend+1; ii++) {
1920: for (jj=jstart; jj<jend+1; jj++) {
1921: for (kk=kstart; kk<kend+1; kk++) {
1922: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1923: cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1924: }
1925: }
1926: }
1927: }
1928: MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);
1929: }
1930: }
1931: }
1932: MatSeqBAIJSetPreallocation(J,nc,0,dnz);
1933: MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);
1934: MatPreallocateFinalize(dnz,onz);
1936: MatSetLocalToGlobalMapping(J,ltog,ltog);
1938: /*
1939: For each node in the grid: we get the neighbors in the local (on processor ordering
1940: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1941: PETSc ordering.
1942: */
1943: if (!da->prealloc_only) {
1944: PetscCalloc1(col*col*col*nc*nc,&values);
1945: for (i=xs; i<xs+nx; i++) {
1946: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1947: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1));
1948: for (j=ys; j<ys+ny; j++) {
1949: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1950: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1));
1951: for (k=zs; k<zs+nz; k++) {
1952: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1953: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1));
1955: slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1957: cnt = 0;
1958: for (ii=istart; ii<iend+1; ii++) {
1959: for (jj=jstart; jj<jend+1; jj++) {
1960: for (kk=kstart; kk<kend+1; kk++) {
1961: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1962: cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1963: }
1964: }
1965: }
1966: }
1967: MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);
1968: }
1969: }
1970: }
1971: PetscFree(values);
1972: /* do not copy values to GPU since they are all zero and not yet needed there */
1973: MatPinToCPU(J,PETSC_TRUE);
1974: MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1975: MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1976: MatPinToCPU(J,PETSC_FALSE);
1977: MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1978: }
1979: PetscFree(cols);
1980: return(0);
1981: }
1983: /*
1984: This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1985: identify in the local ordering with periodic domain.
1986: */
1987: static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
1988: {
1990: PetscInt i,n;
1993: ISLocalToGlobalMappingApplyBlock(ltog,1,row,row);
1994: ISLocalToGlobalMappingApplyBlock(ltog,*cnt,col,col);
1995: for (i=0,n=0; i<*cnt; i++) {
1996: if (col[i] >= *row) col[n++] = col[i];
1997: }
1998: *cnt = n;
1999: return(0);
2000: }
2002: PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
2003: {
2004: PetscErrorCode ierr;
2005: PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2006: PetscInt m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
2007: PetscInt istart,iend,jstart,jend,ii,jj;
2008: MPI_Comm comm;
2009: PetscScalar *values;
2010: DMBoundaryType bx,by;
2011: DMDAStencilType st;
2012: ISLocalToGlobalMapping ltog;
2015: /*
2016: nc - number of components per grid point
2017: col - number of colors needed in one direction for single component problem
2018: */
2019: DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);
2020: col = 2*s + 1;
2022: DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);
2023: DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);
2024: PetscObjectGetComm((PetscObject)da,&comm);
2026: PetscMalloc1(col*col*nc*nc,&cols);
2028: DMGetLocalToGlobalMapping(da,<og);
2030: /* determine the matrix preallocation information */
2031: MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);
2032: for (i=xs; i<xs+nx; i++) {
2033: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2034: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1));
2035: for (j=ys; j<ys+ny; j++) {
2036: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2037: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1));
2038: slot = i - gxs + gnx*(j - gys);
2040: /* Find block columns in block row */
2041: cnt = 0;
2042: for (ii=istart; ii<iend+1; ii++) {
2043: for (jj=jstart; jj<jend+1; jj++) {
2044: if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2045: cols[cnt++] = slot + ii + gnx*jj;
2046: }
2047: }
2048: }
2049: L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);
2050: MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);
2051: }
2052: }
2053: MatSeqSBAIJSetPreallocation(J,nc,0,dnz);
2054: MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);
2055: MatPreallocateFinalize(dnz,onz);
2057: MatSetLocalToGlobalMapping(J,ltog,ltog);
2059: /*
2060: For each node in the grid: we get the neighbors in the local (on processor ordering
2061: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2062: PETSc ordering.
2063: */
2064: if (!da->prealloc_only) {
2065: PetscCalloc1(col*col*nc*nc,&values);
2066: for (i=xs; i<xs+nx; i++) {
2067: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2068: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1));
2069: for (j=ys; j<ys+ny; j++) {
2070: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2071: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1));
2072: slot = i - gxs + gnx*(j - gys);
2074: /* Find block columns in block row */
2075: cnt = 0;
2076: for (ii=istart; ii<iend+1; ii++) {
2077: for (jj=jstart; jj<jend+1; jj++) {
2078: if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2079: cols[cnt++] = slot + ii + gnx*jj;
2080: }
2081: }
2082: }
2083: L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);
2084: MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);
2085: }
2086: }
2087: PetscFree(values);
2088: /* do not copy values to GPU since they are all zero and not yet needed there */
2089: MatPinToCPU(J,PETSC_TRUE);
2090: MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
2091: MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
2092: MatPinToCPU(J,PETSC_FALSE);
2093: MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
2094: }
2095: PetscFree(cols);
2096: return(0);
2097: }
2099: PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
2100: {
2101: PetscErrorCode ierr;
2102: PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2103: PetscInt m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
2104: PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
2105: MPI_Comm comm;
2106: PetscScalar *values;
2107: DMBoundaryType bx,by,bz;
2108: DMDAStencilType st;
2109: ISLocalToGlobalMapping ltog;
2112: /*
2113: nc - number of components per grid point
2114: col - number of colors needed in one direction for single component problem
2115: */
2116: DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);
2117: col = 2*s + 1;
2119: DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);
2120: DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);
2121: PetscObjectGetComm((PetscObject)da,&comm);
2123: /* create the matrix */
2124: PetscMalloc1(col*col*col,&cols);
2126: DMGetLocalToGlobalMapping(da,<og);
2128: /* determine the matrix preallocation information */
2129: MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);
2130: for (i=xs; i<xs+nx; i++) {
2131: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2132: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1));
2133: for (j=ys; j<ys+ny; j++) {
2134: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2135: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1));
2136: for (k=zs; k<zs+nz; k++) {
2137: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2138: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1));
2140: slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2142: /* Find block columns in block row */
2143: cnt = 0;
2144: for (ii=istart; ii<iend+1; ii++) {
2145: for (jj=jstart; jj<jend+1; jj++) {
2146: for (kk=kstart; kk<kend+1; kk++) {
2147: if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2148: cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2149: }
2150: }
2151: }
2152: }
2153: L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);
2154: MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);
2155: }
2156: }
2157: }
2158: MatSeqSBAIJSetPreallocation(J,nc,0,dnz);
2159: MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);
2160: MatPreallocateFinalize(dnz,onz);
2162: MatSetLocalToGlobalMapping(J,ltog,ltog);
2164: /*
2165: For each node in the grid: we get the neighbors in the local (on processor ordering
2166: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2167: PETSc ordering.
2168: */
2169: if (!da->prealloc_only) {
2170: PetscCalloc1(col*col*col*nc*nc,&values);
2171: for (i=xs; i<xs+nx; i++) {
2172: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2173: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1));
2174: for (j=ys; j<ys+ny; j++) {
2175: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2176: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1));
2177: for (k=zs; k<zs+nz; k++) {
2178: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2179: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1));
2181: slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2183: cnt = 0;
2184: for (ii=istart; ii<iend+1; ii++) {
2185: for (jj=jstart; jj<jend+1; jj++) {
2186: for (kk=kstart; kk<kend+1; kk++) {
2187: if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2188: cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2189: }
2190: }
2191: }
2192: }
2193: L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);
2194: MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);
2195: }
2196: }
2197: }
2198: PetscFree(values);
2199: /* do not copy values to GPU since they are all zero and not yet needed there */
2200: MatPinToCPU(J,PETSC_TRUE);
2201: MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
2202: MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
2203: MatPinToCPU(J,PETSC_FALSE);
2204: MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
2205: }
2206: PetscFree(cols);
2207: return(0);
2208: }
2210: /* ---------------------------------------------------------------------------------*/
2212: PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
2213: {
2214: PetscErrorCode ierr;
2215: PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2216: PetscInt m,n,dim,s,*cols,k,nc,row,col,cnt, maxcnt = 0,l,p,*dnz,*onz;
2217: PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
2218: DM_DA *dd = (DM_DA*)da->data;
2219: PetscInt ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
2220: MPI_Comm comm;
2221: PetscScalar *values;
2222: DMBoundaryType bx,by,bz;
2223: ISLocalToGlobalMapping ltog;
2224: DMDAStencilType st;
2225: PetscBool removedups = PETSC_FALSE;
2228: /*
2229: nc - number of components per grid point
2230: col - number of colors needed in one direction for single component problem
2232: */
2233: DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);
2234: col = 2*s + 1;
2235: if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
2236: by 2*stencil_width + 1\n");
2237: if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
2238: by 2*stencil_width + 1\n");
2239: if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
2240: by 2*stencil_width + 1\n");
2242: /*
2243: With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2244: because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2245: */
2246: if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
2247: if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
2248: if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
2250: DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);
2251: DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);
2252: PetscObjectGetComm((PetscObject)da,&comm);
2254: PetscMalloc1(col*col*col*nc,&cols);
2255: DMGetLocalToGlobalMapping(da,<og);
2257: /* determine the matrix preallocation information */
2258: MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);
2260: MatSetBlockSize(J,nc);
2261: for (i=xs; i<xs+nx; i++) {
2262: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2263: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1));
2264: for (j=ys; j<ys+ny; j++) {
2265: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2266: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1));
2267: for (k=zs; k<zs+nz; k++) {
2268: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2269: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1));
2271: slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2273: for (l=0; l<nc; l++) {
2274: cnt = 0;
2275: for (ii=istart; ii<iend+1; ii++) {
2276: for (jj=jstart; jj<jend+1; jj++) {
2277: for (kk=kstart; kk<kend+1; kk++) {
2278: if (ii || jj || kk) {
2279: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2280: for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2281: }
2282: } else {
2283: if (dfill) {
2284: for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2285: } else {
2286: for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2287: }
2288: }
2289: }
2290: }
2291: }
2292: row = l + nc*(slot);
2293: maxcnt = PetscMax(maxcnt,cnt);
2294: if (removedups) {
2295: MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);
2296: } else {
2297: MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);
2298: }
2299: }
2300: }
2301: }
2302: }
2303: MatSeqAIJSetPreallocation(J,0,dnz);
2304: MatMPIAIJSetPreallocation(J,0,dnz,0,onz);
2305: MatPreallocateFinalize(dnz,onz);
2306: MatSetLocalToGlobalMapping(J,ltog,ltog);
2308: /*
2309: For each node in the grid: we get the neighbors in the local (on processor ordering
2310: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2311: PETSc ordering.
2312: */
2313: if (!da->prealloc_only) {
2314: PetscCalloc1(maxcnt,&values);
2315: for (i=xs; i<xs+nx; i++) {
2316: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2317: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1));
2318: for (j=ys; j<ys+ny; j++) {
2319: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2320: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1));
2321: for (k=zs; k<zs+nz; k++) {
2322: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2323: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1));
2325: slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2327: for (l=0; l<nc; l++) {
2328: cnt = 0;
2329: for (ii=istart; ii<iend+1; ii++) {
2330: for (jj=jstart; jj<jend+1; jj++) {
2331: for (kk=kstart; kk<kend+1; kk++) {
2332: if (ii || jj || kk) {
2333: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2334: for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2335: }
2336: } else {
2337: if (dfill) {
2338: for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2339: } else {
2340: for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2341: }
2342: }
2343: }
2344: }
2345: }
2346: row = l + nc*(slot);
2347: MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);
2348: }
2349: }
2350: }
2351: }
2352: PetscFree(values);
2353: /* do not copy values to GPU since they are all zero and not yet needed there */
2354: MatPinToCPU(J,PETSC_TRUE);
2355: MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
2356: MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
2357: MatPinToCPU(J,PETSC_FALSE);
2358: MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
2359: }
2360: PetscFree(cols);
2361: return(0);
2362: }