Actual source code: stag2d.c

petsc-3.12.5 2020-03-29
Report Typos and Errors
  1: /* Functions specific to the 2-dimensional implementation of DMStag */
  2:  #include <petsc/private/dmstagimpl.h>

  4: /*@C
  5:   DMStagCreate2d - Create an object to manage data living on the faces, edges, and vertices of a parallelized regular 2D grid.

  7:   Collective

  9:   Input Parameters:
 10: + comm - MPI communicator
 11: . bndx,bndy - boundary type: DM_BOUNDARY_NONE, DM_BOUNDARY_PERIODIC, or DM_BOUNDARY_GHOSTED
 12: . M,N - global number of grid points in x,y directions
 13: . m,n - number of ranks in the x,y directions (may be PETSC_DECIDE)
 14: . dof0 - number of degrees of freedom per vertex/point/node/0-cell
 15: . dof1 - number of degrees of freedom per edge/1-cell
 16: . dof2 - number of degrees of freedom per element/2-cell
 17: . stencilType - ghost/halo region type: DMSTAG_STENCIL_NONE, DMSTAG_STENCIL_BOX, or DMSTAG_STENCIL_STAR
 18: . stencilWidth - width, in elements, of halo/ghost region
 19: - lx,ly - arrays of local x,y element counts, of length equal to m,n, summing to M,N

 21:   Output Parameter:
 22: . dm - the new DMStag object

 24:   Options Database Keys:
 25: + -dm_view - calls DMViewFromOptions() a the conclusion of DMSetUp()
 26: . -stag_grid_x <nx> - number of elements in the x direction
 27: . -stag_grid_y <ny> - number of elements in the y direction
 28: . -stag_ranks_x <rx> - number of ranks in the x direction
 29: . -stag_ranks_y <ry> - number of ranks in the y direction
 30: . -stag_ghost_stencil_width - width of ghost region, in elements
 31: . -stag_boundary_type_x <none,ghosted,periodic> - DMBoundaryType value
 32: - -stag_boundary_type_y <none,ghosted,periodic> - DMBoundaryType value

 34:   Notes:
 35:   You must call DMSetUp() after this call, before using the DM.
 36:   If you wish to use the options database (see the keys above) to change values in the DMStag, you must call
 37:   DMSetFromOptions() after this function but before DMSetUp().

 39:   Level: beginner

 41: .seealso: DMSTAG, DMStagCreate1d(), DMStagCreate3d(), DMDestroy(), DMView(), DMCreateGlobalVector(), DMCreateLocalVector(), DMLocalToGlobalBegin(), DMDACreate2d()
 42: @*/
 43: PETSC_EXTERN PetscErrorCode DMStagCreate2d(MPI_Comm comm, DMBoundaryType bndx ,DMBoundaryType bndy, PetscInt M,PetscInt N, PetscInt m,PetscInt n, PetscInt dof0,PetscInt dof1,PetscInt dof2,DMStagStencilType stencilType,PetscInt stencilWidth,const PetscInt lx[],const PetscInt ly[],DM* dm)
 44: {
 46:   DM_Stag        *stag;

 49:   DMCreate(comm,dm);
 50:   DMSetDimension(*dm,2); /* Must precede DMSetType */
 51:   DMSetType(*dm,DMSTAG);
 52:   stag = (DM_Stag*)(*dm)->data;

 54:   /* Global sizes and flags (derived quantities set in DMSetUp_Stag) */
 55:   stag->boundaryType[0] = bndx;
 56:   stag->boundaryType[1] = bndy;
 57:   stag->N[0]            = M;
 58:   stag->N[1]            = N;
 59:   stag->nRanks[0]       = m; /* Adjusted later in DMSetUp_Stag */
 60:   stag->nRanks[1]       = n; /* Adjusted later in DMSetUp_Stag */
 61:   stag->stencilType     = stencilType;
 62:   stag->stencilWidth    = stencilWidth;
 63:   DMStagSetDOF(*dm,dof0,dof1,dof2,0);
 64:   DMStagSetOwnershipRanges(*dm,lx,ly,NULL);

 66:   return(0);
 67: }

 69: PETSC_INTERN PetscErrorCode DMStagSetUniformCoordinatesExplicit_2d(DM dm,PetscReal xmin,PetscReal xmax,PetscReal ymin,PetscReal ymax)
 70: {
 72:   DM_Stag        *stagCoord;
 73:   DM             dmCoord;
 74:   Vec            coordLocal,coord;
 75:   PetscReal      h[2],min[2];
 76:   PetscScalar    ***arr;
 77:   PetscInt       ind[2],start[2],n[2],nExtra[2],s,c;
 78:   PetscInt       idownleft,idown,ileft,ielement;

 81:   DMGetCoordinateDM(dm, &dmCoord);
 82:   stagCoord = (DM_Stag*) dmCoord->data;
 83:   for (s=0; s<3; ++s) {
 84:     if (stagCoord->dof[s] !=0 && stagCoord->dof[s] != 2) SETERRQ2(PetscObjectComm((PetscObject)dm),PETSC_ERR_PLIB,"Coordinate DM in 2 dimensions must have 0 or 2 dof on each stratum, but stratum %d has %d dof",s,stagCoord->dof[s]);
 85:   }
 86:   DMGetLocalVector(dmCoord,&coordLocal);

 88:   DMStagVecGetArrayDOF(dmCoord,coordLocal,&arr);
 89:   if (stagCoord->dof[0]) {
 90:     DMStagGetLocationSlot(dmCoord,DMSTAG_DOWN_LEFT,0,&idownleft);
 91:   }
 92:   if (stagCoord->dof[1]) {
 93:     DMStagGetLocationSlot(dmCoord,DMSTAG_DOWN     ,0,&idown    );
 94:     DMStagGetLocationSlot(dmCoord,DMSTAG_LEFT     ,0,&ileft    );
 95:   }
 96:   if (stagCoord->dof[2]) {
 97:     DMStagGetLocationSlot(dmCoord,DMSTAG_ELEMENT  ,0,&ielement );
 98:   }
 99:   DMStagGetCorners(dmCoord,&start[0],&start[1],NULL,&n[0],&n[1],NULL,&nExtra[0],&nExtra[1],NULL);

101:   min[0] = xmin; min[1]= ymin;
102:   h[0] = (xmax-xmin)/stagCoord->N[0];
103:   h[1] = (ymax-ymin)/stagCoord->N[1];

105:   for(ind[1]=start[1]; ind[1]<start[1] + n[1] + nExtra[1]; ++ind[1]) {
106:     for(ind[0]=start[0]; ind[0]<start[0] + n[0] + nExtra[0]; ++ind[0]) {
107:       if (stagCoord->dof[0]) {
108:         const PetscReal offs[2] = {0.0,0.0};
109:         for (c=0; c<2; ++c) {
110:           arr[ind[1]][ind[0]][idownleft + c] = min[c] + ((PetscReal)ind[c] + offs[c]) * h[c];
111:         }
112:       }
113:       if (stagCoord->dof[1]) {
114:         const PetscReal offs[2] = {0.5,0.0};
115:         for (c=0; c<2; ++c) {
116:           arr[ind[1]][ind[0]][idown + c] = min[c] + ((PetscReal)ind[c] + offs[c]) * h[c];
117:         }
118:       }
119:       if (stagCoord->dof[1]) {
120:         const PetscReal offs[2] = {0.0,0.5};
121:         for (c=0; c<2; ++c) {
122:           arr[ind[1]][ind[0]][ileft + c] = min[c] + ((PetscReal)ind[c] + offs[c]) * h[c];
123:         }
124:       }
125:       if (stagCoord->dof[2]) {
126:         const PetscReal offs[2] = {0.5,0.5};
127:         for (c=0; c<2; ++c) {
128:           arr[ind[1]][ind[0]][ielement + c] = min[c] + ((PetscReal)ind[c] + offs[c]) * h[c];
129:         }
130:       }
131:     }
132:   }
133:   DMStagVecRestoreArrayDOF(dmCoord,coordLocal,&arr);
134:   DMCreateGlobalVector(dmCoord,&coord);
135:   DMLocalToGlobalBegin(dmCoord,coordLocal,INSERT_VALUES,coord);
136:   DMLocalToGlobalEnd(dmCoord,coordLocal,INSERT_VALUES,coord);
137:   DMSetCoordinates(dm,coord);
138:   PetscLogObjectParent((PetscObject)dm,(PetscObject)coord);
139:   DMRestoreLocalVector(dmCoord,&coordLocal);
140:   VecDestroy(&coord);
141:   return(0);
142: }

144: /* Helper functions used in DMSetUp_Stag() */
145: static PetscErrorCode DMStagSetUpBuildRankGrid_2d(DM);
146: static PetscErrorCode DMStagSetUpBuildNeighbors_2d(DM);
147: static PetscErrorCode DMStagSetUpBuildGlobalOffsets_2d(DM,PetscInt**);
148: static PetscErrorCode DMStagComputeLocationOffsets_2d(DM);

150: PETSC_INTERN PetscErrorCode DMSetUp_Stag_2d(DM dm)
151: {
152:   PetscErrorCode  ierr;
153:   DM_Stag * const stag = (DM_Stag*)dm->data;
154:   PetscMPIInt     size,rank;
155:   PetscInt        i,j,d,entriesPerElementRowGhost,entriesPerCorner,entriesPerEdge,entriesPerElementRow;
156:   MPI_Comm        comm;
157:   PetscInt        *globalOffsets;
158:   PetscBool       star,dummyStart[2],dummyEnd[2];
159:   const PetscInt  dim = 2;

162:   PetscObjectGetComm((PetscObject)dm,&comm);
163:   MPI_Comm_size(comm,&size);
164:   MPI_Comm_rank(comm,&rank);

166:   /* Rank grid sizes (populates stag->nRanks) */
167:   DMStagSetUpBuildRankGrid_2d(dm);

169:   /* Determine location of rank in grid (these get extra boundary points on the last element)
170:      Order is x-fast, as usual */
171:     stag->rank[0] = rank % stag->nRanks[0];
172:     stag->rank[1] = rank / stag->nRanks[0];
173:     for(i=0; i<dim; ++i) {
174:       stag->firstRank[i] = PetscNot(stag->rank[i]);
175:       stag->lastRank[i]  = (PetscBool)(stag->rank[i] == stag->nRanks[i]-1);
176:     }

178:   /* Determine Locally owned region

180:    Divide equally, giving lower ranks in each dimension and extra element if needbe.

182:    Note that this uses O(P) storage. If this ever becomes an issue, this could
183:    be refactored to not keep this data around.  */
184:   for (i=0; i<dim; ++i) {
185:     if (!stag->l[i]) {
186:       const PetscInt Ni = stag->N[i], nRanksi = stag->nRanks[i];
187:       PetscMalloc1(stag->nRanks[i],&stag->l[i]);
188:       for (j=0; j<stag->nRanks[i]; ++j){
189:         stag->l[i][j] = Ni/nRanksi + ((Ni % nRanksi) > j);
190:       }
191:     }
192:   }

194:   /* Retrieve local size in stag->n */
195:   for (i=0; i<dim; ++i) stag->n[i] = stag->l[i][stag->rank[i]];
196: #if defined(PETSC_USE_DEBUG)
197:   for (i=0; i<dim; ++i) {
198:     PetscInt Ncheck,j;
199:     Ncheck = 0;
200:     for (j=0; j<stag->nRanks[i]; ++j) Ncheck += stag->l[i][j];
201:     if (Ncheck != stag->N[i]) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local sizes in dimension %d don't add up. %d != %d\n",i,Ncheck,stag->N[i]);
202:   }
203: #endif

205:   /* Compute starting elements */
206:   for (i=0; i<dim; ++i) {
207:     stag->start[i] = 0;
208:     for(j=0;j<stag->rank[i];++j){
209:       stag->start[i] += stag->l[i][j];
210:     }
211:   }

213:   /* Determine ranks of neighbors, using DMDA's convention

215:      n6 n7 n8
216:      n3    n5
217:      n0 n1 n2                                               */
218:   DMStagSetUpBuildNeighbors_2d(dm);

220:     /* Determine whether the ghost region includes dummies or not. This is currently
221:        equivalent to having a non-periodic boundary. If not, then
222:        ghostOffset{Start,End}[d] elements correspond to elements on the neighbor.
223:        If true, then
224:        - at the start, there are ghostOffsetStart[d] ghost elements
225:        - at the end, there is a layer of extra "physical" points inside a layer of
226:          ghostOffsetEnd[d] ghost elements
227:        Note that this computation should be updated if any boundary types besides
228:        NONE, GHOSTED, and PERIODIC are supported.  */
229:     for (d=0; d<2; ++d) dummyStart[d] = (PetscBool)(stag->firstRank[d] && stag->boundaryType[d] != DM_BOUNDARY_PERIODIC);
230:     for (d=0; d<2; ++d) dummyEnd[d]   = (PetscBool)(stag->lastRank[d]  && stag->boundaryType[d] != DM_BOUNDARY_PERIODIC);

232:   /* Define useful sizes */
233:   stag->entriesPerElement = stag->dof[0] + 2*stag->dof[1] + stag->dof[2];
234:   entriesPerEdge          = stag->dof[0] + stag->dof[1];
235:   entriesPerCorner        = stag->dof[0];
236:   entriesPerElementRow    = stag->n[0]*stag->entriesPerElement + (dummyEnd[0] ? entriesPerEdge : 0);
237:   stag->entries           = stag->n[1]*entriesPerElementRow +  (dummyEnd[1] ? stag->n[0]*entriesPerEdge : 0 ) + (dummyEnd[0] && dummyEnd[1] ? entriesPerCorner: 0);

239:   /* Compute offsets for each rank into global vectors
240:      This again requires O(P) storage, which could be replaced with some global
241:      communication.  */
242:   DMStagSetUpBuildGlobalOffsets_2d(dm,&globalOffsets);

244:   for (d=0; d<dim; ++d) if (stag->boundaryType[d] != DM_BOUNDARY_NONE && stag->boundaryType[d] != DM_BOUNDARY_PERIODIC && stag->boundaryType[d] != DM_BOUNDARY_GHOSTED) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Unsupported boundary type");

246:   /* Define ghosted/local sizes */
247:   for (d=0; d<dim; ++d) {
248:     switch (stag->boundaryType[d]) {
249:       case DM_BOUNDARY_NONE:
250:         /* Note: for a elements-only DMStag, the extra elements on the edges aren't necessary but we include them anyway */
251:         switch (stag->stencilType) {
252:           case DMSTAG_STENCIL_NONE : /* only the extra one on the right/top edges */
253:             stag->nGhost[d] = stag->n[d];
254:             stag->startGhost[d] = stag->start[d];
255:             if (stag->lastRank[d]) stag->nGhost[d] += 1;
256:             break;
257:           case DMSTAG_STENCIL_STAR : /* allocate the corners but don't use them */
258:           case DMSTAG_STENCIL_BOX :
259:             stag->nGhost[d] = stag->n[d];
260:             stag->startGhost[d] = stag->start[d];
261:             if (!stag->firstRank[d]) {
262:               stag->nGhost[d]     += stag->stencilWidth; /* add interior ghost elements */
263:               stag->startGhost[d] -= stag->stencilWidth;
264:             }
265:             if (!stag->lastRank[d]) {
266:               stag->nGhost[d] += stag->stencilWidth; /* add interior ghost elements */
267:             } else {
268:               stag->nGhost[d] += 1; /* one element on the boundary to complete blocking */
269:             }
270:             break;
271:           default :
272:             SETERRQ1(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Unrecognized ghost stencil type %d",stag->stencilType);
273:         }
274:         break;
275:       case DM_BOUNDARY_GHOSTED:
276:         switch (stag->stencilType) {
277:           case DMSTAG_STENCIL_NONE :
278:             stag->startGhost[d] = stag->start[d];
279:             stag->nGhost[d]     = stag->n[d] + (stag->lastRank[d] ? 1 : 0);
280:             break;
281:           case DMSTAG_STENCIL_STAR :
282:           case DMSTAG_STENCIL_BOX :
283:             stag->startGhost[d] = stag->start[d] - stag->stencilWidth; /* This value may be negative */
284:             stag->nGhost[d]     = stag->n[d] + 2*stag->stencilWidth + (stag->lastRank[d] && stag->stencilWidth == 0 ? 1 : 0);
285:             break;
286:           default :
287:             SETERRQ1(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Unrecognized ghost stencil type %d",stag->stencilType);
288:         }
289:         break;
290:       case DM_BOUNDARY_PERIODIC:
291:         switch (stag->stencilType) {
292:           case DMSTAG_STENCIL_NONE : /* only the extra one on the right/top edges */
293:             stag->nGhost[d] = stag->n[d];
294:             stag->startGhost[d] = stag->start[d];
295:             break;
296:           case DMSTAG_STENCIL_STAR :
297:           case DMSTAG_STENCIL_BOX :
298:             stag->nGhost[d] = stag->n[d] + 2*stag->stencilWidth;
299:             stag->startGhost[d] = stag->start[d] - stag->stencilWidth;
300:             break;
301:           default :
302:             SETERRQ1(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Unrecognized ghost stencil type %d",stag->stencilType);
303:         }
304:         break;
305:       default: SETERRQ1(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Unsupported boundary type in dimension %D",d);
306:     }
307:   }
308:   stag->entriesGhost = stag->nGhost[0]*stag->nGhost[1]*stag->entriesPerElement;
309:   entriesPerElementRowGhost = stag->nGhost[0]*stag->entriesPerElement;

311:   /* Create global-->local VecScatter and local->global ISLocalToGlobalMapping

313:      We iterate over all local points twice. First, we iterate over each neighbor, populating
314:      1. idxLocal[] : the subset of points, in local numbering ("S" from 0 on all points including ghosts), which correspond to global points. That is, the set of all non-dummy points in the ghosted representation
315:      2. idxGlobal[]: the corresponding global points, in global numbering (Nested "S"s - ranks then non-ghost points in each rank)

317:      Next, we iterate over all points in the local ordering, populating
318:      3. idxGlobalAll[] : entry i is the global point corresponding to local point i, or -1 if local point i is a dummy.

320:      Note further here that the local/ghosted vectors:
321:      - Are always an integral number of elements-worth of points, in all directions.
322:      - Contain three flavors of points:
323:      1. Points which "live here" in the global representation
324:      2. Ghost points which correspond to points on other ranks in the global representation
325:      3. Ghost points, which we call "dummy points," which do not correspond to any point in the global representation

327:      Dummy ghost points arise in at least three ways:
328:      1. As padding for the right, top, and front physical boundaries, to complete partial elements
329:      2. As unused space in the "corners" on interior ranks when using a star stencil
330:      3. As additional work space on all physical boundaries, when DM_BOUNDARY_GHOSTED is used

332:      Note that, because of the boundary dummies,
333:      with a stencil width of zero, on 1 rank, local and global vectors
334:      are still different!

336:      We assume that the size on each rank is greater than or equal to the
337:      stencil width.
338:      */

340:   if (stag->n[0] < stag->stencilWidth || stag->n[1] < stag->stencilWidth) {
341:     SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_SUP,"DMStag 2d setup does not support local sizes (%D x %D) smaller than the elementwise stencil width (%D)",stag->n[0],stag->n[1],stag->stencilWidth);
342:   }

344:   /* Check stencil type */
345:   if (stag->stencilType != DMSTAG_STENCIL_NONE && stag->stencilType != DMSTAG_STENCIL_BOX && stag->stencilType != DMSTAG_STENCIL_STAR) SETERRQ1(PetscObjectComm((PetscObject)dm),PETSC_ERR_SUP,"Unsupported stencil type %s",DMStagStencilTypes[stag->stencilType]);
346:   if (stag->stencilType == DMSTAG_STENCIL_NONE && stag->stencilWidth != 0) SETERRQ(PetscObjectComm((PetscObject)dm),PETSC_ERR_SUP,"DMStag 2d setup requries stencil width 0 with stencil type none");
347:   star = (PetscBool)(stag->stencilType == DMSTAG_STENCIL_STAR);

349:   {
350:     PetscInt *idxLocal,*idxGlobal,*idxGlobalAll;
351:     PetscInt  count,countAll,entriesToTransferTotal,i,j,d,ghostOffsetStart[2],ghostOffsetEnd[2];
352:     IS        isLocal,isGlobal;
353:     PetscInt  jghost,ighost;
354:     PetscInt  nNeighbors[9][2];
355:     PetscBool nextToDummyEnd[2];

357:     /* Compute numbers of elements on each neighbor */
358:     for (i=0; i<9; ++i) {
359:       const PetscInt neighborRank = stag->neighbors[i];
360:       if (neighborRank >= 0) { /* note we copy the values for our own rank (neighbor 4) */
361:         nNeighbors[i][0] =  stag->l[0][neighborRank % stag->nRanks[0]];
362:         nNeighbors[i][1] =  stag->l[1][neighborRank / stag->nRanks[0]];
363:       } /* else leave uninitialized - error if accessed */
364:     }

366:     /* These offsets should always be non-negative, and describe how many
367:        ghost elements exist at each boundary. These are not always equal to the stencil width,
368:        because we may have different numbers of ghost elements at the boundaries. In particular,
369:        we always have at least one ghost (dummy) element at the right/top/front. */
370:     for (d=0; d<2; ++d) ghostOffsetStart[d] = stag->start[d] - stag->startGhost[d];
371:     for (d=0; d<2; ++d) ghostOffsetEnd[d]   = stag->startGhost[d]+stag->nGhost[d] - (stag->start[d]+stag->n[d]);

373:     /* Compute whether the next rank has an extra point (only used in x direction) */
374:     for (d=0;d<2;++d) nextToDummyEnd[d] = (PetscBool)(stag->boundaryType[d] != DM_BOUNDARY_PERIODIC && stag->rank[d] == stag->nRanks[d]-2);

376:     /* Compute the number of local entries which correspond to any global entry */
377:     {
378:       PetscInt nNonDummyGhost[2];
379:       for (d=0; d<2; ++d) nNonDummyGhost[d] = stag->nGhost[d] - (dummyStart[d] ? ghostOffsetStart[d] : 0) - (dummyEnd[d] ? ghostOffsetEnd[d] : 0);
380:       if (star) {
381:         entriesToTransferTotal = (nNonDummyGhost[0] * stag->n[1] + stag->n[0] * nNonDummyGhost[1] - stag->n[0] * stag->n[1]) * stag->entriesPerElement
382:           + (dummyEnd[0]                ? nNonDummyGhost[1] * entriesPerEdge   : 0)
383:           + (dummyEnd[1]                ? nNonDummyGhost[0] * entriesPerEdge   : 0)
384:           + (dummyEnd[0] && dummyEnd[1] ?                     entriesPerCorner : 0);
385:       } else {
386:         entriesToTransferTotal = nNonDummyGhost[0] * nNonDummyGhost[1] * stag->entriesPerElement
387:           + (dummyEnd[0]                ? nNonDummyGhost[1] * entriesPerEdge   : 0)
388:           + (dummyEnd[1]                ? nNonDummyGhost[0] * entriesPerEdge   : 0)
389:           + (dummyEnd[0] && dummyEnd[1] ?                     entriesPerCorner : 0);
390:       }
391:     }

393:     /* Allocate arrays to populate */
394:     PetscMalloc1(entriesToTransferTotal,&idxLocal);
395:     PetscMalloc1(entriesToTransferTotal,&idxGlobal);

397:     /* Counts into idxLocal/idxGlobal */
398:     count = 0;

400:     /* Here and below, we work with (i,j) describing element numbers within a neighboring rank's global ordering,
401:        to be offset by that rank's global offset,
402:        and (ighost,jghost) referring to element numbers within this ranks local (ghosted) ordering */

404:     /* Neighbor 0 (down left) */
405:     if (!star && !dummyStart[0] && !dummyStart[1]) {
406:       const PetscInt         neighbor             = 0;
407:       const PetscInt         globalOffset         = globalOffsets[stag->neighbors[neighbor]];
408:       const PetscInt * const nNeighbor            = nNeighbors[neighbor];
409:       const PetscInt entriesPerElementRowNeighbor = stag->entriesPerElement * nNeighbor[0];
410:       for (jghost = 0; jghost<ghostOffsetStart[1]; ++jghost) {
411:         const PetscInt j = nNeighbor[1] - ghostOffsetStart[1] + jghost;
412:         for (ighost = 0; ighost<ghostOffsetStart[0]; ++ighost) {
413:           const PetscInt i = nNeighbor[0] - ghostOffsetStart[0] + ighost;
414:           for (d=0; d<stag->entriesPerElement; ++d,++count) {
415:             idxGlobal[count] = globalOffset + j     *entriesPerElementRowNeighbor + i     *stag->entriesPerElement + d;
416:             idxLocal[count]  =                jghost*entriesPerElementRowGhost    + ighost*stag->entriesPerElement + d;
417:           }
418:         }
419:       }
420:     }

422:     /* Neighbor 1 (down) */
423:     if (!dummyStart[1]) {
424:       /* We may be a ghosted boundary in x, in which case the neighbor is also */
425:       const PetscInt         neighbor                     = 1;
426:       const PetscInt         globalOffset                 = globalOffsets[stag->neighbors[neighbor]];
427:       const PetscInt * const nNeighbor                    = nNeighbors[neighbor];
428:       const PetscInt         entriesPerElementRowNeighbor = entriesPerElementRow; /* same as here */
429:       for (jghost = 0; jghost<ghostOffsetStart[1]; ++jghost) {
430:         const PetscInt j = nNeighbor[1] - ghostOffsetStart[1] + jghost;
431:         for (ighost = ghostOffsetStart[0]; ighost<stag->nGhost[0]-ghostOffsetEnd[0]; ++ighost) {
432:           const PetscInt i = ighost - ghostOffsetStart[0];
433:           for (d=0; d<stag->entriesPerElement; ++d, ++count) {
434:             idxGlobal[count] = globalOffset+ j     *entriesPerElementRowNeighbor + i     *stag->entriesPerElement + d;
435:             idxLocal[count]  =               jghost*entriesPerElementRowGhost    + ighost*stag->entriesPerElement + d;
436:           }
437:         }
438:         if (dummyEnd[0]) {
439:           const PetscInt ighost = stag->nGhost[0]-ghostOffsetEnd[0];
440:           const PetscInt i = stag->n[0];
441:           for (d=0; d<stag->dof[0]; ++d, ++count) { /* Vertex */
442:             idxGlobal[count] = globalOffset + j     *entriesPerElementRowNeighbor + i     *stag->entriesPerElement + d;
443:             idxLocal[count]  =                jghost*entriesPerElementRowGhost    + ighost*stag->entriesPerElement + d;
444:           }
445:           for (d=0; d<stag->dof[1]; ++d,++count) { /* Edge */
446:             idxGlobal[count] = globalOffset + j     *entriesPerElementRowNeighbor + i     *stag->entriesPerElement + stag->dof[0]                + d;
447:             idxLocal[count]  =                jghost*entriesPerElementRowGhost    + ighost*stag->entriesPerElement + stag->dof[0] + stag->dof[1] + d;
448:           }
449:         }
450:       }
451:     }

453:     /* Neighbor 2 (down right) */
454:     if (!star && !dummyEnd[0] && !dummyStart[1]) {
455:       const PetscInt         neighbor             = 2;
456:       const PetscInt         globalOffset         = globalOffsets[stag->neighbors[neighbor]];
457:       const PetscInt * const nNeighbor            = nNeighbors[neighbor];
458:       const PetscInt entriesPerElementRowNeighbor = nNeighbor[0]*stag->entriesPerElement + (nextToDummyEnd[0] ? entriesPerEdge : 0);
459:       for (jghost = 0; jghost<ghostOffsetStart[1]; ++jghost) {
460:         const PetscInt j = nNeighbor[1] - ghostOffsetStart[1] + jghost;
461:         for (i=0; i<ghostOffsetEnd[0]; ++i) {
462:           const PetscInt ighost = stag->nGhost[0] - ghostOffsetEnd[0] + i;
463:           for (d=0; d<stag->entriesPerElement; ++d, ++count) {
464:             idxGlobal[count] = globalOffset + j     *entriesPerElementRowNeighbor + i     *stag->entriesPerElement + d;
465:             idxLocal[count]  =                jghost*entriesPerElementRowGhost    + ighost*stag->entriesPerElement + d;
466:           }
467:         }
468:       }
469:     }

471:     /* Neighbor 3 (left) */
472:     if (!dummyStart[0]) {
473:       /* Our neighbor is never a ghosted boundary in x, but we may be
474:          Here, we may be a ghosted boundary in y and thus so will our neighbor be */
475:       const PetscInt         neighbor             = 3;
476:       const PetscInt         globalOffset         = globalOffsets[stag->neighbors[neighbor]];
477:       const PetscInt * const nNeighbor            = nNeighbors[neighbor];
478:       const PetscInt entriesPerElementRowNeighbor = nNeighbor[0]*stag->entriesPerElement;
479:       for (jghost = ghostOffsetStart[1]; jghost<stag->nGhost[1] - ghostOffsetEnd[1]; ++jghost) {
480:         const PetscInt j = jghost-ghostOffsetStart[1];
481:         for (ighost = 0; ighost<ghostOffsetStart[0]; ++ighost) {
482:           const PetscInt i = nNeighbor[0] - ghostOffsetStart[0] + ighost;
483:           for (d=0; d<stag->entriesPerElement; ++d, ++count) {
484:             idxGlobal[count] = globalOffset + j     *entriesPerElementRowNeighbor + i     *stag->entriesPerElement + d;
485:             idxLocal[count]  =                jghost*entriesPerElementRowGhost    + ighost*stag->entriesPerElement + d;
486:           }
487:         }
488:       }
489:       if (dummyEnd[1]) {
490:         const PetscInt jghost = stag->nGhost[1]-ghostOffsetEnd[1];
491:         const PetscInt j = stag->n[1];
492:         for (ighost = 0; ighost<ghostOffsetStart[0]; ++ighost) {
493:           const PetscInt i = nNeighbor[0] - ghostOffsetStart[0] + ighost;
494:           for (d=0; d<entriesPerEdge; ++d, ++count) { /* only vertices and horizontal edge (which are the first dof) */
495:             idxGlobal[count] = globalOffset + j     *entriesPerElementRowNeighbor + i     *entriesPerEdge          + d; /* i moves by edge here */
496:             idxLocal[count]  =                jghost*entriesPerElementRowGhost    + ighost*stag->entriesPerElement + d;
497:           }
498:         }
499:       }
500:     }

502:     /* Interior/Resident-here-in-global elements ("Neighbor 4" - same rank)
503:        *including* entries from boundary dummy elements */
504:     {
505:       const PetscInt neighbor     = 4;
506:       const PetscInt globalOffset = globalOffsets[stag->neighbors[neighbor]];
507:       for (j=0; j<stag->n[1]; ++j) {
508:         const PetscInt jghost = j + ghostOffsetStart[1];
509:         for (i=0; i<stag->n[0]; ++i) {
510:           const PetscInt ighost = i + ghostOffsetStart[0];
511:           for (d=0; d<stag->entriesPerElement; ++d, ++count) {
512:             idxGlobal[count] = globalOffset + j     *entriesPerElementRow       + i     *stag->entriesPerElement + d;
513:             idxLocal[count]  =                jghost*entriesPerElementRowGhost  + ighost*stag->entriesPerElement + d;
514:           }
515:         }
516:         if (dummyEnd[0]) {
517:           const PetscInt ighost = i + ghostOffsetStart[0];
518:           i = stag->n[0];
519:           for (d=0; d<stag->dof[0]; ++d, ++count) { /* vertex first */
520:             idxGlobal[count] = globalOffset + j     *entriesPerElementRow      + i     *stag->entriesPerElement + d;
521:             idxLocal[count]  =                jghost*entriesPerElementRowGhost + ighost*stag->entriesPerElement + d;
522:           }
523:           for (d=0; d<stag->dof[1]; ++d, ++count) { /* then left ege (skipping bottom edge) */
524:             idxGlobal[count] = globalOffset + j     *entriesPerElementRow       + i     *stag->entriesPerElement + stag->dof[0]                + d;
525:             idxLocal[count]  =                jghost*entriesPerElementRowGhost  + ighost*stag->entriesPerElement + stag->dof[0] + stag->dof[1] + d;
526:           }
527:         }
528:       }
529:       if (dummyEnd[1]) {
530:         const PetscInt jghost = j + ghostOffsetStart[1];
531:         j = stag->n[1];
532:         for (i=0; i<stag->n[0]; ++i) {
533:           const PetscInt ighost = i + ghostOffsetStart[0];
534:           for (d=0; d<entriesPerEdge; ++d, ++count) { /* vertex and bottom edge (which are the first entries) */
535:             idxGlobal[count] = globalOffset + j     *entriesPerElementRow +      i*entriesPerEdge               + d; /* note i increment by entriesPerEdge */
536:             idxLocal[count]  =                jghost*entriesPerElementRowGhost + ighost*stag->entriesPerElement + d;
537:           }
538:         }
539:         if (dummyEnd[0]) {
540:           const PetscInt ighost = i + ghostOffsetStart[0];
541:           i = stag->n[0];
542:           for (d=0; d<entriesPerCorner; ++d, ++count) { /* vertex only */
543:             idxGlobal[count] = globalOffset + j     *entriesPerElementRow       + i     *entriesPerEdge          + d; /* note i increment by entriesPerEdge */
544:             idxLocal[count]  =                jghost*entriesPerElementRowGhost  + ighost*stag->entriesPerElement + d;
545:           }
546:         }
547:       }
548:     }

550:     /* Neighbor 5 (right) */
551:     if (!dummyEnd[0]) {
552:       /* We can never be right boundary, but we may be a top boundary, along with the right neighbor */
553:       const PetscInt         neighbor             = 5;
554:       const PetscInt         globalOffset         = globalOffsets[stag->neighbors[neighbor]];
555:       const PetscInt * const nNeighbor            = nNeighbors[neighbor];
556:       const PetscInt entriesPerElementRowNeighbor = nNeighbor[0]*stag->entriesPerElement + (nextToDummyEnd[0] ? entriesPerEdge : 0);
557:       for (jghost = ghostOffsetStart[1];jghost<stag->nGhost[1]-ghostOffsetEnd[1]; ++jghost) {
558:         const PetscInt j = jghost-ghostOffsetStart[1];
559:         for (i=0; i<ghostOffsetEnd[0]; ++i) {
560:           const PetscInt ighost = stag->nGhost[0] - ghostOffsetEnd[0] + i;
561:           for (d=0; d<stag->entriesPerElement; ++d, ++count) {
562:             idxGlobal[count] = globalOffset+ j     *entriesPerElementRowNeighbor + i     *stag->entriesPerElement + d;
563:             idxLocal[count]  =               jghost*entriesPerElementRowGhost    + ighost*stag->entriesPerElement + d;
564:           }
565:         }
566:       }
567:       if (dummyEnd[1]) {
568:         const PetscInt jghost = stag->nGhost[1]-ghostOffsetEnd[1];
569:         const PetscInt j = nNeighbor[1];
570:         for (i=0; i<ghostOffsetEnd[0]; ++i) {
571:           const PetscInt ighost = stag->nGhost[0] - ghostOffsetEnd[0] + i;
572:           for (d=0; d<entriesPerEdge; ++d, ++count) { /* only vertices and horizontal edge (which are the first dof) */
573:             idxGlobal[count] = globalOffset+ j     *entriesPerElementRowNeighbor    + i     *entriesPerEdge    + d; /* Note i increment by entriesPerEdge */
574:             idxLocal[count]  =               jghost*entriesPerElementRowGhost + ighost*stag->entriesPerElement + d;
575:           }
576:         }
577:       }
578:     }

580:     /* Neighbor 6 (up left) */
581:     if (!star && !dummyStart[0] && !dummyEnd[1]) {
582:       /* We can never be a top boundary, but our neighbor may be
583:        We may be a right boundary, but our neighbor cannot be */
584:       const PetscInt         neighbor             = 6;
585:       const PetscInt         globalOffset         = globalOffsets[stag->neighbors[neighbor]];
586:       const PetscInt * const nNeighbor            = nNeighbors[neighbor];
587:       const PetscInt entriesPerElementRowNeighbor = nNeighbor[0]*stag->entriesPerElement;
588:       for (j=0; j<ghostOffsetEnd[1]; ++j) {
589:         const PetscInt jghost = stag->nGhost[1] - ghostOffsetEnd[1] + j;
590:         for (ighost = 0; ighost<ghostOffsetStart[0]; ++ighost) {
591:           const PetscInt i = nNeighbor[0] - ghostOffsetStart[0] + ighost;
592:           for (d=0; d<stag->entriesPerElement; ++d, ++count) {
593:             idxGlobal[count] = globalOffset+ j     *entriesPerElementRowNeighbor + i     *stag->entriesPerElement + d;
594:             idxLocal[count]  =               jghost*entriesPerElementRowGhost    + ighost*stag->entriesPerElement + d;
595:           }
596:         }
597:       }
598:     }

600:     /* Neighbor 7 (up) */
601:     if (!dummyEnd[1]) {
602:       /* We cannot be the last rank in y, though our neighbor may be
603:        We may be the last rank in x, in which case our neighbor is also */
604:       const PetscInt         neighbor             = 7;
605:       const PetscInt         globalOffset         = globalOffsets[stag->neighbors[neighbor]];
606:       const PetscInt * const nNeighbor            = nNeighbors[neighbor];
607:       const PetscInt entriesPerElementRowNeighbor = entriesPerElementRow; /* same as here */
608:       for (j=0; j<ghostOffsetEnd[1]; ++j) {
609:         const PetscInt jghost = stag->nGhost[1] - ghostOffsetEnd[1] + j;
610:         for (ighost = ghostOffsetStart[0]; ighost<stag->nGhost[0]-ghostOffsetEnd[0]; ++ighost) {
611:           const PetscInt i = ighost - ghostOffsetStart[0];
612:           for (d=0; d<stag->entriesPerElement; ++d, ++count) {
613:             idxGlobal[count] = globalOffset + j     *entriesPerElementRowNeighbor + i     *stag->entriesPerElement + d;
614:             idxLocal[count]  =                jghost*entriesPerElementRowGhost    + ighost*stag->entriesPerElement + d;
615:           }
616:         }
617:         if (dummyEnd[0]) {
618:           const PetscInt ighost = stag->nGhost[0]-ghostOffsetEnd[0];
619:           const PetscInt i = nNeighbor[0];
620:           for (d=0; d<stag->dof[0]; ++d, ++count) { /* Vertex */
621:             idxGlobal[count] = globalOffset+ j     *entriesPerElementRowNeighbor + i     *stag->entriesPerElement + d;
622:             idxLocal[count]  =               jghost*entriesPerElementRowGhost    + ighost*stag->entriesPerElement + d;
623:           }
624:           for (d=0; d<stag->dof[1]; ++d, ++count) { /* Edge */
625:             idxGlobal[count] = globalOffset+ j     *entriesPerElementRowNeighbor + i     *stag->entriesPerElement + stag->dof[0]                + d;
626:             idxLocal[count]  =               jghost*entriesPerElementRowGhost    + ighost*stag->entriesPerElement + stag->dof[0] + stag->dof[1] + d;
627:           }
628:         }
629:       }
630:     }

632:     /* Neighbor 8 (up right) */
633:     if (!star && !dummyEnd[0] && !dummyEnd[1]) {
634:       /* We can never be a ghosted boundary
635:          Our neighbor may be a top boundary, a right boundary, or both */
636:       const PetscInt         neighbor             = 8;
637:       const PetscInt         globalOffset         = globalOffsets[stag->neighbors[neighbor]];
638:       const PetscInt * const nNeighbor            = nNeighbors[neighbor];
639:       const PetscInt entriesPerElementRowNeighbor = nNeighbor[0]*stag->entriesPerElement + (nextToDummyEnd[0] ? entriesPerEdge : 0);
640:       for (j=0; j<ghostOffsetEnd[1]; ++j) {
641:         const PetscInt jghost = stag->nGhost[1] - ghostOffsetEnd[1] + j;
642:         for (i=0; i<ghostOffsetEnd[0]; ++i) {
643:           const PetscInt ighost = stag->nGhost[0] - ghostOffsetEnd[0] + i;
644:           for (d=0; d<stag->entriesPerElement; ++d, ++count) {
645:             idxGlobal[count] = globalOffset + j     *entriesPerElementRowNeighbor + i     *stag->entriesPerElement + d;
646:             idxLocal[count]  =                jghost*entriesPerElementRowGhost    + ighost*stag->entriesPerElement + d;
647:           }
648:         }
649:       }
650:     }

652: #if defined(PETSC_USE_DEBUG)
653:     if (count != entriesToTransferTotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of entries computed in gtol (%d) is not as expected (%d)",count,entriesToTransferTotal);
654: #endif

656:     /* Create Local and Global ISs (transferring pointer ownership) */
657:     ISCreateGeneral(PetscObjectComm((PetscObject)dm),entriesToTransferTotal,idxLocal,PETSC_OWN_POINTER,&isLocal);
658:     ISCreateGeneral(PetscObjectComm((PetscObject)dm),entriesToTransferTotal,idxGlobal,PETSC_OWN_POINTER,&isGlobal);

660:     /* Create stag->gtol. The order is computed as PETSc ordering, and doesn't include dummy entries */
661:     {
662:       Vec local,global;
663:       VecCreateMPIWithArray(PetscObjectComm((PetscObject)dm),1,stag->entries,PETSC_DECIDE,NULL,&global);
664:       VecCreateSeqWithArray(PETSC_COMM_SELF,stag->entriesPerElement,stag->entriesGhost,NULL,&local);
665:       VecScatterCreate(global,isGlobal,local,isLocal,&stag->gtol);
666:       VecDestroy(&global);
667:       VecDestroy(&local);
668:     }

670:     /* Destroy ISs */
671:     ISDestroy(&isLocal);
672:     ISDestroy(&isGlobal);

674:     /* Next, we iterate over the local entries  again, in local order, recording the global entry to which each maps,
675:        or -1 if there is none */
676:     PetscMalloc1(stag->entriesGhost,&idxGlobalAll);

678:     countAll = 0;

680:     /* Loop over rows 1/3 : down */
681:     if (!dummyStart[1]) {
682:       for (jghost=0; jghost<ghostOffsetStart[1]; ++jghost) {

684:         /* Loop over columns 1/3 : down left */
685:         if (!star && !dummyStart[0]) {
686:           const PetscInt         neighbor     = 0;
687:           const PetscInt         globalOffset = globalOffsets[stag->neighbors[neighbor]];
688:           const PetscInt * const nNeighbor    = nNeighbors[neighbor];
689:           const PetscInt         j            = nNeighbor[1] - ghostOffsetStart[1] + jghost; /* Note: this is actually the same value for the whole row of ranks below, so recomputing it for the next two ranks is redundant, and one could even get rid of jghost entirely if desired */
690:           const PetscInt         eprNeighbor  = nNeighbor[0] * stag->entriesPerElement;
691:           for (i=nNeighbor[0]-ghostOffsetStart[0]; i<nNeighbor[0]; ++i) {
692:             for (d=0; d<stag->entriesPerElement; ++d, ++countAll) {
693:               idxGlobalAll[countAll] = globalOffset + j*eprNeighbor + i*stag->entriesPerElement + d;
694:             }
695:           }
696:         } else {
697:           /* Down Left dummies */
698:           for (ighost=0; ighost<ghostOffsetStart[0]; ++ighost) {
699:             for (d=0; d<stag->entriesPerElement; ++d, ++countAll) {
700:               idxGlobalAll[countAll] = -1;
701:             }
702:           }
703:         }

705:         /* Loop over columns 2/3 : down middle */
706:         {
707:           const PetscInt         neighbor     = 1;
708:           const PetscInt         globalOffset = globalOffsets[stag->neighbors[neighbor]];
709:           const PetscInt * const nNeighbor    = nNeighbors[neighbor];
710:           const PetscInt         j            = nNeighbor[1] - ghostOffsetStart[1] + jghost;
711:           const PetscInt         eprNeighbor  = entriesPerElementRow; /* same as here */
712:           for (i=0; i<nNeighbor[0]; ++i) {
713:             for (d=0; d<stag->entriesPerElement; ++d, ++countAll) {
714:               idxGlobalAll[countAll] = globalOffset + j*eprNeighbor + i*stag->entriesPerElement + d;
715:             }
716:           }
717:         }

719:         /* Loop over columns 3/3 : down right */
720:         if (!star && !dummyEnd[0]) {
721:           const PetscInt         neighbor     = 2;
722:           const PetscInt         globalOffset = globalOffsets[stag->neighbors[neighbor]];
723:           const PetscInt * const nNeighbor    = nNeighbors[neighbor];
724:           const PetscInt         j            = nNeighbor[1] - ghostOffsetStart[1] + jghost;
725:           const PetscInt         eprNeighbor  = nNeighbor[0]*stag->entriesPerElement + (nextToDummyEnd[0] ? entriesPerEdge : 0);
726:           for (i=0; i<ghostOffsetEnd[0]; ++i) {
727:             for (d=0; d<stag->entriesPerElement; ++d, ++countAll) {
728:               idxGlobalAll[countAll] = globalOffset + j*eprNeighbor + i*stag->entriesPerElement + d;
729:             }
730:           }
731:         } else if (dummyEnd[0]) {
732:           /* Down right partial dummy elements, living on the *down* rank */
733:           const PetscInt         neighbor     = 1;
734:           const PetscInt         globalOffset = globalOffsets[stag->neighbors[neighbor]];
735:           const PetscInt * const nNeighbor    = nNeighbors[neighbor];
736:           const PetscInt         j            = nNeighbor[1] - ghostOffsetStart[1] + jghost;
737:           const PetscInt         eprNeighbor  = entriesPerElementRow; /* same as here */
738:           PetscInt dGlobal;
739:           i = nNeighbor[0];
740:           for (d=0,dGlobal=0; d<stag->dof[0]; ++d, ++dGlobal, ++countAll) {
741:             idxGlobalAll[countAll] = globalOffset + j*eprNeighbor + i*stag->entriesPerElement + dGlobal;
742:           }
743:           for (; d<stag->dof[0] + stag->dof[1]; ++d, ++countAll) {
744:             idxGlobalAll[countAll] = -1; /* dummy down edge point */
745:           }
746:           for (; d<stag->dof[0] + 2*stag->dof[1]; ++d, ++dGlobal, ++countAll) {
747:             idxGlobalAll[countAll] = globalOffset + j*eprNeighbor + i*stag->entriesPerElement + dGlobal;
748:           }
749:           for (; d<stag->entriesPerElement; ++d, ++countAll) {
750:             idxGlobalAll[countAll] = -1; /* dummy element point */
751:           }
752:           ++i;
753:           for (; i<nNeighbor[0]+ghostOffsetEnd[0]; ++i) {
754:             for (d=0; d<stag->entriesPerElement; ++d, ++countAll) {
755:               idxGlobalAll[countAll] = -1;
756:             }
757:           }
758:         } else {
759:           /* Down Right dummies */
760:           for (ighost=0; ighost<ghostOffsetEnd[0]; ++ighost) {
761:             for (d=0; d<stag->entriesPerElement; ++d, ++countAll) {
762:               idxGlobalAll[countAll] = -1;
763:             }
764:           }
765:         }
766:       }
767:     } else {
768:       /* Down dummies row */
769:       for (jghost=0; jghost<ghostOffsetStart[1]; ++jghost) {
770:         for (ighost=0; ighost<stag->nGhost[0]; ++ighost) {
771:           for (d=0; d<stag->entriesPerElement; ++d, ++countAll) {
772:             idxGlobalAll[countAll] = -1;
773:           }
774:         }
775:       }
776:     }

778:     /* Loop over rows 2/3 : center */
779:     for (j=0; j<stag->n[1]; ++j) {
780:       /* Loop over columns 1/3 : left */
781:       if (!dummyStart[0]) {
782:         const PetscInt         neighbor     = 3;
783:         const PetscInt         globalOffset = globalOffsets[stag->neighbors[neighbor]];
784:         const PetscInt * const nNeighbor    = nNeighbors[neighbor];
785:         const PetscInt         eprNeighbor  = nNeighbor[0] * stag->entriesPerElement;
786:         for (i=nNeighbor[0]-ghostOffsetStart[0]; i<nNeighbor[0]; ++i) {
787:           for (d=0; d<stag->entriesPerElement; ++d, ++countAll) {
788:             idxGlobalAll[countAll] = globalOffset + j*eprNeighbor + i*stag->entriesPerElement + d;
789:           }
790:         }
791:       } else {
792:         /* (Middle) Left dummies */
793:         for (ighost=0; ighost < ghostOffsetStart[0]; ++ighost) {
794:           for (d=0; d<stag->entriesPerElement; ++d, ++countAll) {
795:             idxGlobalAll[countAll] = -1;
796:           }
797:         }
798:       }

800:       /* Loop over columns 2/3 : here (the "neighbor" is ourselves, here) */
801:       {
802:         const PetscInt neighbor     = 4;
803:         const PetscInt globalOffset = globalOffsets[stag->neighbors[neighbor]];
804:         const PetscInt eprNeighbor  = entriesPerElementRow; /* same as here (obviously) */
805:         for (i=0; i<stag->n[0]; ++i) {
806:           for (d=0; d<stag->entriesPerElement; ++d, ++countAll) {
807:             idxGlobalAll[countAll] = globalOffset + j*eprNeighbor + i*stag->entriesPerElement + d;
808:           }
809:         }
810:       }

812:       /* Loop over columns 3/3 : right */
813:       if (!dummyEnd[0]) {
814:           const PetscInt         neighbor     = 5;
815:           const PetscInt         globalOffset = globalOffsets[stag->neighbors[neighbor]];
816:           const PetscInt * const nNeighbor    = nNeighbors[neighbor];
817:           const PetscInt         eprNeighbor  = nNeighbor[0]*stag->entriesPerElement + (nextToDummyEnd[0] ? entriesPerEdge : 0);
818:         for (i=0; i<ghostOffsetEnd[0]; ++i) {
819:           for (d=0; d<stag->entriesPerElement; ++d, ++countAll) {
820:             idxGlobalAll[countAll] = globalOffset + j*eprNeighbor + i*stag->entriesPerElement + d;
821:           }
822:         }
823:       } else {
824:         /* -1's for right layer of partial dummies, living on *this* rank */
825:         const PetscInt         neighbor     = 4;
826:         const PetscInt         globalOffset = globalOffsets[stag->neighbors[neighbor]];
827:         const PetscInt * const nNeighbor    = nNeighbors[neighbor];
828:         const PetscInt         eprNeighbor  = entriesPerElementRow; /* same as here (obviously) */
829:         PetscInt dGlobal;
830:         i = nNeighbor[0];
831:         for (d=0,dGlobal=0; d<stag->dof[0]; ++d, ++dGlobal, ++countAll) {
832:           idxGlobalAll[countAll] = globalOffset + j*eprNeighbor + i*stag->entriesPerElement + dGlobal;
833:         }
834:         for (; d<stag->dof[0] + stag->dof[1]; ++d, ++countAll) {
835:           idxGlobalAll[countAll] = -1; /* dummy down edge point */
836:         }
837:         for (; d<stag->dof[0] + 2*stag->dof[1]; ++d, ++dGlobal, ++countAll) {
838:           idxGlobalAll[countAll] = globalOffset + j*eprNeighbor + i*stag->entriesPerElement + dGlobal;
839:         }
840:         for (; d<stag->entriesPerElement; ++d, ++countAll) {
841:           idxGlobalAll[countAll] = -1; /* dummy element point */
842:         }
843:         ++i;
844:         for (; i<nNeighbor[0]+ghostOffsetEnd[0]; ++i) {
845:           for (d=0; d<stag->entriesPerElement; ++d, ++countAll) {
846:             idxGlobalAll[countAll] = -1;
847:           }
848:         }
849:       }
850:     }

852:     /* Loop over rows 3/3 : up */
853:     if (!dummyEnd[1]) {
854:       for (j=0; j<ghostOffsetEnd[1]; ++j) {

856:         /* Loop over columns 1/3 : up left */
857:         if (!star && !dummyStart[0]) {
858:           const PetscInt         neighbor     = 6;
859:           const PetscInt         globalOffset = globalOffsets[stag->neighbors[neighbor]];
860:           const PetscInt * const nNeighbor    = nNeighbors[neighbor];
861:           const PetscInt         eprNeighbor  = nNeighbor[0] * stag->entriesPerElement;
862:           for (i=nNeighbor[0]-ghostOffsetStart[0]; i<nNeighbor[0]; ++i) {
863:             for (d=0; d<stag->entriesPerElement; ++d, ++countAll) {
864:               idxGlobalAll[countAll] = globalOffset + j*eprNeighbor + i*stag->entriesPerElement + d;
865:             }
866:           }
867:         } else {
868:           /* Up Left dummies */
869:           for (ighost=0; ighost<ghostOffsetStart[0]; ++ighost) {
870:             for (d=0; d<stag->entriesPerElement; ++d, ++countAll) {
871:               idxGlobalAll[countAll] = -1;
872:             }
873:           }
874:         }

876:         /* Loop over columns 2/3 : up */
877:         {
878:           const PetscInt         neighbor     = 7;
879:           const PetscInt         globalOffset = globalOffsets[stag->neighbors[neighbor]];
880:           const PetscInt * const nNeighbor    = nNeighbors[neighbor];
881:           const PetscInt         eprNeighbor  = entriesPerElementRow; /* Same as here */
882:           for (i=0; i<nNeighbor[0]; ++i) {
883:             for (d=0; d<stag->entriesPerElement; ++d, ++countAll) {
884:               idxGlobalAll[countAll] = globalOffset + j*eprNeighbor + i*stag->entriesPerElement + d;
885:             }
886:           }
887:         }

889:         /* Loop over columns 3/3 : up right */
890:         if (!star && !dummyEnd[0]) {
891:           const PetscInt         neighbor     = 8;
892:           const PetscInt         globalOffset = globalOffsets[stag->neighbors[neighbor]];
893:           const PetscInt * const nNeighbor    = nNeighbors[neighbor];
894:           const PetscInt         eprNeighbor  = nNeighbor[0]*stag->entriesPerElement + (nextToDummyEnd[0] ? entriesPerEdge : 0);
895:           for (i=0; i<ghostOffsetEnd[0]; ++i) {
896:             for (d=0; d<stag->entriesPerElement; ++d, ++countAll) {
897:               idxGlobalAll[countAll] = globalOffset + j*eprNeighbor + i*stag->entriesPerElement + d;
898:             }
899:           }
900:         } else if (dummyEnd[0]) {
901:           /* -1's for right layer of partial dummies, living on rank above */
902:           const PetscInt         neighbor     = 7;
903:           const PetscInt         globalOffset = globalOffsets[stag->neighbors[neighbor]];
904:           const PetscInt * const nNeighbor    = nNeighbors[neighbor];
905:           const PetscInt         eprNeighbor  = entriesPerElementRow; /* Same as here */
906:           PetscInt dGlobal;
907:           i = nNeighbor[0];
908:           for (d=0,dGlobal=0; d<stag->dof[0]; ++d, ++dGlobal, ++countAll) {
909:             idxGlobalAll[countAll] = globalOffset + j*eprNeighbor + i*stag->entriesPerElement + dGlobal;
910:           }
911:           for (; d<stag->dof[0] + stag->dof[1]; ++d, ++countAll) {
912:             idxGlobalAll[countAll] = -1; /* dummy down edge point */
913:           }
914:           for (; d<stag->dof[0] + 2*stag->dof[1]; ++d, ++dGlobal, ++countAll) {
915:             idxGlobalAll[countAll] = globalOffset + j*eprNeighbor + i*stag->entriesPerElement + dGlobal;
916:           }
917:           for (; d<stag->entriesPerElement; ++d, ++countAll) {
918:             idxGlobalAll[countAll] = -1; /* dummy element point */
919:           }
920:           ++i;
921:           for (; i<nNeighbor[0]+ghostOffsetEnd[0]; ++i) {
922:             for (d=0; d<stag->entriesPerElement; ++d, ++countAll) {
923:               idxGlobalAll[countAll] = -1;
924:             }
925:           }
926:         } else {
927:           /* Up Right dummies */
928:           for (ighost=0; ighost<ghostOffsetEnd[0]; ++ighost) {
929:             for (d=0; d<stag->entriesPerElement; ++d, ++countAll) {
930:               idxGlobalAll[countAll] = -1;
931:             }
932:           }
933:         }
934:       }
935:     } else {
936:       j = stag->n[1];
937:       /* Top layer of partial dummies */

939:       /* up left partial dummies layer : Loop over columns 1/3 : living on *left* neighbor */
940:       if (!dummyStart[0]) {
941:         const PetscInt         neighbor     = 3;
942:         const PetscInt         globalOffset = globalOffsets[stag->neighbors[neighbor]];
943:         const PetscInt * const nNeighbor    = nNeighbors[neighbor];
944:         const PetscInt         eprNeighbor  = nNeighbor[0] * stag->entriesPerElement;
945:         for (i=nNeighbor[0]-ghostOffsetStart[0]; i<nNeighbor[0]; ++i) {
946:           for (d=0; d<stag->dof[0] + stag->dof[1]; ++d, ++countAll) {
947:             idxGlobalAll[countAll] = globalOffset + j*eprNeighbor + i*entriesPerEdge + d; /* Note entriesPerEdge here */
948:           }
949:           for (; d<stag->entriesPerElement; ++d, ++countAll) {
950:             idxGlobalAll[countAll] = -1; /* dummy left edge and element points */
951:           }
952:         }
953:       } else {
954:         for (ighost=0; ighost<ghostOffsetStart[0]; ++ighost) {
955:           for (d=0; d<stag->entriesPerElement; ++d, ++countAll) {
956:             idxGlobalAll[countAll] = -1;
957:           }
958:         }
959:       }

961:       /* up partial dummies layer : Loop over columns 2/3 : living on *this* rank */
962:       {
963:         const PetscInt         neighbor     = 4;
964:         const PetscInt         globalOffset = globalOffsets[stag->neighbors[neighbor]];
965:         const PetscInt         eprNeighbor  = entriesPerElementRow; /* same as here (obviously) */
966:         for (i=0; i<stag->n[0]; ++i) {
967:           for (d=0; d<stag->dof[0] + stag->dof[1]; ++d, ++countAll) {
968:             idxGlobalAll[countAll] = globalOffset + j*eprNeighbor + i*entriesPerEdge + d; /* Note entriesPerEdge here */
969:           }
970:           for (; d<stag->entriesPerElement; ++d, ++countAll) {
971:             idxGlobalAll[countAll] = -1; /* dummy left edge and element points */
972:           }
973:         }
974:       }

976:       if (!dummyEnd[0]) {
977:         /* up right partial dummies layer : Loop over columns 3/3 :  living on *right* neighbor */
978:         const PetscInt         neighbor     = 5;
979:         const PetscInt         globalOffset = globalOffsets[stag->neighbors[neighbor]];
980:         const PetscInt * const nNeighbor    = nNeighbors[neighbor];
981:         const PetscInt         eprNeighbor  = nNeighbor[0]*stag->entriesPerElement + (nextToDummyEnd[0] ? entriesPerEdge : 0);
982:         for (i = 0; i<ghostOffsetEnd[0]; ++i) {
983:           for (d=0; d<stag->dof[0] + stag->dof[1]; ++d, ++countAll) {
984:             idxGlobalAll[countAll] = globalOffset + j*eprNeighbor + i*entriesPerEdge + d; /* Note entriesPerEdge here */
985:           }
986:           for (; d<stag->entriesPerElement; ++d, ++countAll) {
987:             idxGlobalAll[countAll] = -1; /* dummy left edge and element points */
988:           }
989:         }
990:       } else {
991:         /* Top partial dummies layer : Loop over columns 3/3 : right, living *here* */
992:         const PetscInt         neighbor     = 4;
993:         const PetscInt         globalOffset = globalOffsets[stag->neighbors[neighbor]];
994:         const PetscInt         eprNeighbor  = entriesPerElementRow; /* same as here (obviously) */
995:         i = stag->n[0];
996:         for (d=0; d<stag->dof[0]; ++d, ++countAll) { /* Note just the vertex here */
997:           idxGlobalAll[countAll] = globalOffset + j*eprNeighbor + i*entriesPerEdge + d; /* Note entriesPerEdge here */
998:         }
999:         for (; d<stag->entriesPerElement; ++d, ++countAll) {
1000:           idxGlobalAll[countAll] = -1; /* dummy bottom edge, left edge and element points */
1001:         }
1002:         ++i;
1003:         for (; i<stag->n[0] + ghostOffsetEnd[0]; ++i) {
1004:           for (d=0; d<stag->entriesPerElement; ++d, ++countAll) {
1005:             idxGlobalAll[countAll] = -1;
1006:           }
1007:         }
1008:       }
1009:       ++j;
1010:       /* Additional top dummy layers */
1011:       for (; j<stag->n[1]+ghostOffsetEnd[1]; ++j) {
1012:         for (ighost=0; ighost<stag->nGhost[0]; ++ighost){
1013:           for (d=0; d<stag->entriesPerElement; ++d, ++countAll) {
1014:             idxGlobalAll[countAll] = -1;
1015:           }
1016:         }
1017:       }
1018:     }

1020:     /* Create local-to-global map (in local ordering, includes maps to -1 for dummy points) */
1021:     ISLocalToGlobalMappingCreate(comm,1,stag->entriesGhost,idxGlobalAll,PETSC_OWN_POINTER,&dm->ltogmap);
1022:     PetscLogObjectParent((PetscObject)dm,(PetscObject)dm->ltogmap);
1023:   }

1025:   /* In special cases, create a dedicated injective local-to-global map */
1026:   if ((stag->boundaryType[0] == DM_BOUNDARY_PERIODIC && stag->nRanks[0] == 1) ||
1027:       (stag->boundaryType[1] == DM_BOUNDARY_PERIODIC && stag->nRanks[1] == 1)) {
1028:     DMStagPopulateLocalToGlobalInjective(dm);
1029:   }

1031:   /* Free global offsets */
1032:   PetscFree(globalOffsets);

1034:   /* Precompute location offsets */
1035:   DMStagComputeLocationOffsets_2d(dm);

1037:   /* View from Options */
1038:   DMViewFromOptions(dm,NULL,"-dm_view");

1040:   return(0);
1041: }

1043: /* adapted from da2.c */
1044: static PetscErrorCode DMStagSetUpBuildRankGrid_2d(DM dm)
1045: {
1046:   PetscErrorCode  ierr;
1047:   DM_Stag * const stag = (DM_Stag*)dm->data;
1048:   PetscInt        m,n;
1049:   PetscMPIInt     rank,size;
1050:   const PetscInt  M = stag->N[0];
1051:   const PetscInt  N = stag->N[1];

1054:   MPI_Comm_size(PetscObjectComm((PetscObject)dm),&size);
1055:   MPI_Comm_rank(PetscObjectComm((PetscObject)dm),&rank);
1056:   m = stag->nRanks[0];
1057:   n = stag->nRanks[1];
1058:   if (m != PETSC_DECIDE) {
1059:     if (m < 1) SETERRQ1(PetscObjectComm((PetscObject)dm),PETSC_ERR_ARG_OUTOFRANGE,"Non-positive number of ranks in X direction: %D",m);
1060:     else if (m > size) SETERRQ2(PetscObjectComm((PetscObject)dm),PETSC_ERR_ARG_OUTOFRANGE,"Too many ranks in X direction: %D %d",m,size);
1061:   }
1062:   if (n != PETSC_DECIDE) {
1063:     if (n < 1) SETERRQ1(PetscObjectComm((PetscObject)dm),PETSC_ERR_ARG_OUTOFRANGE,"Non-positive number of ranks in Y direction: %D",n);
1064:     else if (n > size) SETERRQ2(PetscObjectComm((PetscObject)dm),PETSC_ERR_ARG_OUTOFRANGE,"Too many ranks in Y direction: %D %d",n,size);
1065:   }
1066:   if (m == PETSC_DECIDE || n == PETSC_DECIDE) {
1067:     if (n != PETSC_DECIDE) {
1068:       m = size/n;
1069:     } else if (m != PETSC_DECIDE) {
1070:       n = size/m;
1071:     } else {
1072:       /* try for squarish distribution */
1073:       m = (PetscInt)(0.5 + PetscSqrtReal(((PetscReal)M)*((PetscReal)size)/((PetscReal)N)));
1074:       if (!m) m = 1;
1075:       while (m > 0) {
1076:         n = size/m;
1077:         if (m*n == size) break;
1078:         m--;
1079:       }
1080:       if (M > N && m < n) {PetscInt _m = m; m = n; n = _m;}
1081:     }
1082:     if (m*n != size) SETERRQ(PetscObjectComm((PetscObject)dm),PETSC_ERR_PLIB,"Unable to create partition, check the size of the communicator and input m and n ");
1083:   } else if (m*n != size) SETERRQ2(PetscObjectComm((PetscObject)dm),PETSC_ERR_ARG_OUTOFRANGE,"Given Bad partition. Product of sizes (%D) does not equal communicator size (%d)",m*n,size);
1084:   if (M < m) SETERRQ2(PetscObjectComm((PetscObject)dm),PETSC_ERR_ARG_OUTOFRANGE,"Partition in x direction is too fine! %D %D",M,m);
1085:   if (N < n) SETERRQ2(PetscObjectComm((PetscObject)dm),PETSC_ERR_ARG_OUTOFRANGE,"Partition in y direction is too fine! %D %D",N,n);
1086:   stag->nRanks[0] = m;
1087:   stag->nRanks[1] = n;
1088:   return(0);
1089: }

1091: static PetscErrorCode DMStagSetUpBuildNeighbors_2d(DM dm)
1092: {
1093:   PetscErrorCode  ierr;
1094:   DM_Stag * const stag = (DM_Stag*)dm->data;
1095:   PetscInt        d,i;
1096:   PetscBool       per[2],first[2],last[2];
1097:   PetscInt        neighborRank[9][2],r[2],n[2];
1098:   const PetscInt  dim = 2;

1101:   for (d=0; d<dim; ++d) if (stag->boundaryType[d] != DM_BOUNDARY_NONE && stag->boundaryType[d] != DM_BOUNDARY_PERIODIC && stag->boundaryType[d] != DM_BOUNDARY_GHOSTED) SETERRQ1(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Neighbor determination not implemented for %s",DMBoundaryTypes[stag->boundaryType[d]]);

1103:   /* Assemble some convenience variables */
1104:   for (d=0; d<dim; ++d) {
1105:     per[d]   = (PetscBool)(stag->boundaryType[d] == DM_BOUNDARY_PERIODIC);
1106:     first[d] = stag->firstRank[d];
1107:     last[d]  = stag->lastRank[d];
1108:     r[d]     = stag->rank[d];
1109:     n[d]     = stag->nRanks[d];
1110:   }

1112:   /* First, compute the position in the rank grid for all neighbors */
1113:   neighborRank[0][0]  = first[0] ? (per[0] ?  n[0]-1 : -1) : r[0] - 1; /* left  down */
1114:   neighborRank[0][1]  = first[1] ? (per[1] ?  n[1]-1 : -1) : r[1] - 1;

1116:   neighborRank[1][0] =                                      r[0]    ; /*       down */
1117:   neighborRank[1][1] = first[1] ? (per[1] ?  n[1]-1 : -1) : r[1] - 1;

1119:   neighborRank[2][0] = last[0]  ? (per[0] ?  0      : -1) : r[0] + 1; /* right down */
1120:   neighborRank[2][1] = first[1] ? (per[1] ?  n[1]-1 : -1) : r[1] - 1;

1122:   neighborRank[3][0] = first[0] ? (per[0] ?  n[0]-1 : -1) : r[0] - 1; /* left       */
1123:   neighborRank[3][1] =                                      r[1]    ;

1125:   neighborRank[4][0] =                                      r[0]    ; /*            */
1126:   neighborRank[4][1] =                                      r[1]    ;

1128:   neighborRank[5][0] = last[0]  ? (per[0] ?  0      : -1) : r[0] + 1; /* right      */
1129:   neighborRank[5][1] =                                      r[1]    ;

1131:   neighborRank[6][0] = first[0] ? (per[0] ?  n[0]-1 : -1) : r[0] - 1; /* left  up   */
1132:   neighborRank[6][1] = last[1]  ? (per[1] ?  0      : -1) : r[1] + 1;

1134:   neighborRank[7][0] =                                      r[0]    ; /*       up   */
1135:   neighborRank[7][1] = last[1]  ? (per[1] ?  0      : -1) : r[1] + 1;

1137:   neighborRank[8][0] = last[0]  ? (per[0] ?  0      : -1) : r[0] + 1; /* right up   */
1138:   neighborRank[8][1] = last[1]  ? (per[1] ?  0      : -1) : r[1] + 1;

1140:   /* Then, compute the rank of each in the linear ordering */
1141:   PetscMalloc1(9,&stag->neighbors);
1142:   for (i=0; i<9; ++i){
1143:     if  (neighborRank[i][0] >= 0 && neighborRank[i][1] >=0) {
1144:       stag->neighbors[i] = neighborRank[i][0] + n[0]*neighborRank[i][1];
1145:     } else {
1146:       stag->neighbors[i] = -1;
1147:     }
1148:   }

1150:   return(0);
1151: }

1153: static PetscErrorCode DMStagSetUpBuildGlobalOffsets_2d(DM dm,PetscInt **pGlobalOffsets)
1154: {
1155:   PetscErrorCode        ierr;
1156:   const DM_Stag * const stag = (DM_Stag*)dm->data;
1157:   PetscInt              *globalOffsets;
1158:   PetscInt              i,j,d,entriesPerEdge,count;
1159:   PetscMPIInt           size;
1160:   PetscBool             extra[2];

1163:   MPI_Comm_size(PetscObjectComm((PetscObject)dm),&size);
1164:   for (d=0; d<2; ++d) extra[d] = (PetscBool)(stag->boundaryType[d] != DM_BOUNDARY_PERIODIC); /* Extra points in global rep */
1165:   entriesPerEdge = stag->dof[0] + stag->dof[1];
1166:   PetscMalloc1(size,pGlobalOffsets);
1167:   globalOffsets = *pGlobalOffsets;
1168:   globalOffsets[0] = 0;
1169:   count = 1; /* note the count is offset by 1 here. We add the size of the previous rank */
1170:   for (j=0; j<stag->nRanks[1]-1; ++j) {
1171:     const PetscInt nnj = stag->l[1][j];
1172:     for (i=0; i<stag->nRanks[0]-1; ++i) {
1173:       const PetscInt nni = stag->l[0][i];
1174:       globalOffsets[count] = globalOffsets[count-1] + nnj*nni*stag->entriesPerElement; /* No right/top/front boundaries */
1175:       ++count;
1176:     }
1177:     {
1178:       /* i = stag->nRanks[0]-1; */
1179:       const PetscInt nni = stag->l[0][i];
1180:       globalOffsets[count] = globalOffsets[count-1] + nnj*nni*stag->entriesPerElement
1181:                              + (extra[0] ? nnj*entriesPerEdge : 0); /* Extra edges on the right */
1182:       ++count;
1183:     }
1184:   }
1185:   {
1186:     /* j = stag->nRanks[1]-1; */
1187:     const PetscInt nnj = stag->l[1][j];
1188:     for (i=0; i<stag->nRanks[0]-1; ++i) {
1189:       const PetscInt nni = stag->l[0][i];
1190:       globalOffsets[count] = globalOffsets[count-1] + nni*nnj*stag->entriesPerElement
1191:                              + (extra[1] ? nni*entriesPerEdge : 0); /* Extra edges on the top */
1192:       ++count;
1193:     }
1194:     /* Don't need to compute entries in last element */
1195:   }
1196:   return(0);
1197: }

1199: static PetscErrorCode DMStagComputeLocationOffsets_2d(DM dm)
1200: {
1201:   PetscErrorCode  ierr;
1202:   DM_Stag * const stag = (DM_Stag*)dm->data;
1203:   const PetscInt  epe = stag->entriesPerElement;
1204:   const PetscInt  epr = stag->nGhost[0]*epe;

1207:   PetscMalloc1(DMSTAG_NUMBER_LOCATIONS,&stag->locationOffsets);
1208:   stag->locationOffsets[DMSTAG_DOWN_LEFT]  = 0;
1209:   stag->locationOffsets[DMSTAG_DOWN]       = stag->locationOffsets[DMSTAG_DOWN_LEFT]  + stag->dof[0];
1210:   stag->locationOffsets[DMSTAG_DOWN_RIGHT] = stag->locationOffsets[DMSTAG_DOWN_LEFT]  + epe;
1211:   stag->locationOffsets[DMSTAG_LEFT]       = stag->locationOffsets[DMSTAG_DOWN]       + stag->dof[1];
1212:   stag->locationOffsets[DMSTAG_ELEMENT]    = stag->locationOffsets[DMSTAG_LEFT]       + stag->dof[1];
1213:   stag->locationOffsets[DMSTAG_RIGHT]      = stag->locationOffsets[DMSTAG_LEFT]       + epe;
1214:   stag->locationOffsets[DMSTAG_UP_LEFT]    = stag->locationOffsets[DMSTAG_DOWN_LEFT]  + epr;
1215:   stag->locationOffsets[DMSTAG_UP]         = stag->locationOffsets[DMSTAG_DOWN]       + epr;
1216:   stag->locationOffsets[DMSTAG_UP_RIGHT]   = stag->locationOffsets[DMSTAG_UP_LEFT]    + epe;
1217:   return(0);
1218: }

1220: PETSC_INTERN PetscErrorCode DMStagPopulateLocalToGlobalInjective_2d(DM dm)
1221: {
1222:   PetscErrorCode  ierr;
1223:   DM_Stag * const stag = (DM_Stag*)dm->data;
1224:   PetscInt        *idxLocal,*idxGlobal,*globalOffsetsRecomputed;
1225:   const PetscInt  *globalOffsets;
1226:   PetscInt        i,j,d,count,entriesPerCorner,entriesPerEdge,entriesPerElementRowGhost,entriesPerElementRow,ghostOffsetStart[2];
1227:   IS              isLocal,isGlobal;
1228:   PetscBool       dummyEnd[2];

1231:   DMStagSetUpBuildGlobalOffsets_2d(dm,&globalOffsetsRecomputed); /* note that we don't actually use all of these. An available optimization is to pass them, when available */
1232:   globalOffsets = globalOffsetsRecomputed;
1233:   PetscMalloc1(stag->entries,&idxLocal);
1234:   PetscMalloc1(stag->entries,&idxGlobal);
1235:   for (d=0; d<2; ++d) dummyEnd[d]   = (PetscBool)(stag->lastRank[d] && stag->boundaryType[d] != DM_BOUNDARY_PERIODIC);
1236:   entriesPerCorner          = stag->dof[0];
1237:   entriesPerEdge            = stag->dof[0] + stag->dof[1];
1238:   entriesPerElementRow      = stag->n[0]     *stag->entriesPerElement + (dummyEnd[0] ? entriesPerEdge : 0);
1239:   entriesPerElementRowGhost = stag->nGhost[0]*stag->entriesPerElement;
1240:   count = 0;
1241:   for (d=0; d<2; ++d) ghostOffsetStart[d] = stag->start[d] - stag->startGhost[d];
1242:   {
1243:     const PetscInt neighbor     = 4;
1244:     const PetscInt globalOffset = globalOffsets[stag->neighbors[neighbor]];
1245:     for (j=0; j<stag->n[1]; ++j) {
1246:       const PetscInt jghost = j + ghostOffsetStart[1];
1247:       for (i=0; i<stag->n[0]; ++i) {
1248:         const PetscInt ighost = i + ghostOffsetStart[0];
1249:         for (d=0; d<stag->entriesPerElement; ++d, ++count) {
1250:           idxGlobal[count] = globalOffset + j     *entriesPerElementRow       + i     *stag->entriesPerElement + d;
1251:           idxLocal[count]  =                jghost*entriesPerElementRowGhost  + ighost*stag->entriesPerElement + d;
1252:         }
1253:       }
1254:       if (dummyEnd[0]) {
1255:         const PetscInt ighost = i + ghostOffsetStart[0];
1256:         i = stag->n[0];
1257:         for (d=0; d<stag->dof[0]; ++d, ++count) { /* vertex first */
1258:           idxGlobal[count] = globalOffset + j     *entriesPerElementRow      + i     *stag->entriesPerElement + d;
1259:           idxLocal[count]  =                jghost*entriesPerElementRowGhost + ighost*stag->entriesPerElement + d;
1260:         }
1261:         for (d=0; d<stag->dof[1]; ++d, ++count) { /* then left ege (skipping bottom edge) */
1262:           idxGlobal[count] = globalOffset + j     *entriesPerElementRow       + i     *stag->entriesPerElement + stag->dof[0]                + d;
1263:           idxLocal[count]  =                jghost*entriesPerElementRowGhost  + ighost*stag->entriesPerElement + stag->dof[0] + stag->dof[1] + d;
1264:         }
1265:       }
1266:     }
1267:     if (dummyEnd[1]) {
1268:       const PetscInt jghost = j + ghostOffsetStart[1];
1269:       j = stag->n[1];
1270:       for (i=0; i<stag->n[0]; ++i) {
1271:         const PetscInt ighost = i + ghostOffsetStart[0];
1272:         for (d=0; d<entriesPerEdge; ++d, ++count) { /* vertex and bottom edge (which are the first entries) */
1273:           idxGlobal[count] = globalOffset + j     *entriesPerElementRow +      i*entriesPerEdge               + d; /* note i increment by entriesPerEdge */
1274:           idxLocal[count]  =                jghost*entriesPerElementRowGhost + ighost*stag->entriesPerElement + d;
1275:         }
1276:       }
1277:       if (dummyEnd[0]) {
1278:         const PetscInt ighost = i + ghostOffsetStart[0];
1279:         i = stag->n[0];
1280:         for (d=0; d<entriesPerCorner; ++d, ++count) { /* vertex only */
1281:           idxGlobal[count] = globalOffset + j     *entriesPerElementRow       + i     *entriesPerEdge          + d; /* note i increment by entriesPerEdge */
1282:           idxLocal[count]  =                jghost*entriesPerElementRowGhost  + ighost*stag->entriesPerElement + d;
1283:         }
1284:       }
1285:     }
1286:   }
1287:   ISCreateGeneral(PetscObjectComm((PetscObject)dm),stag->entries,idxLocal,PETSC_OWN_POINTER,&isLocal);
1288:   ISCreateGeneral(PetscObjectComm((PetscObject)dm),stag->entries,idxGlobal,PETSC_OWN_POINTER,&isGlobal);
1289:   {
1290:     Vec local,global;
1291:     VecCreateMPIWithArray(PetscObjectComm((PetscObject)dm),1,stag->entries,PETSC_DECIDE,NULL,&global);
1292:     VecCreateSeqWithArray(PETSC_COMM_SELF,stag->entriesPerElement,stag->entriesGhost,NULL,&local);
1293:     VecScatterCreate(local,isLocal,global,isGlobal,&stag->ltog_injective);
1294:     VecDestroy(&global);
1295:     VecDestroy(&local);
1296:   }
1297:   ISDestroy(&isLocal);
1298:   ISDestroy(&isGlobal);
1299:   if (globalOffsetsRecomputed) {
1300:     PetscFree(globalOffsetsRecomputed);
1301:   }
1302:   return(0);
1303: }