Actual source code: vpscat.h

petsc-3.13.6 2020-09-29
Report Typos and Errors

  2: /*
  3:      Defines the methods VecScatterBegin/End_1,2,......
  4:      This is included by vpscat.c with different values for BS

  6:      This is a terrible way of doing "templates" in C.
  7: */
  8: #define PETSCMAP1_a(a,b)  a ## _ ## b
  9: #define PETSCMAP1_b(a,b)  PETSCMAP1_a(a,b)
 10: #define PETSCMAP1(a)      PETSCMAP1_b(a,BS)

 12: PetscErrorCode PETSCMAP1(VecScatterBegin)(VecScatter ctx,Vec xin,Vec yin,InsertMode addv,ScatterMode mode)
 13: {
 14:   VecScatter_MPI_General *to,*from;
 15:   PetscScalar            *xv,*yv,*svalues;
 16:   MPI_Request            *rwaits,*swaits;
 17:   PetscErrorCode         ierr;
 18:   PetscInt               i,*indices,*sstarts,nrecvs,nsends,bs;
 19: #if defined(PETSC_HAVE_CUDA)
 20:   PetscBool              is_cudatype = PETSC_FALSE;
 21: #endif

 24:   if (mode & SCATTER_REVERSE) {
 25:     to     = (VecScatter_MPI_General*)ctx->fromdata;
 26:     from   = (VecScatter_MPI_General*)ctx->todata;
 27:     rwaits = from->rev_requests;
 28:     swaits = to->rev_requests;
 29:   } else {
 30:     to     = (VecScatter_MPI_General*)ctx->todata;
 31:     from   = (VecScatter_MPI_General*)ctx->fromdata;
 32:     rwaits = from->requests;
 33:     swaits = to->requests;
 34:   }
 35:   bs      = to->bs;
 36:   svalues = to->values;
 37:   nrecvs  = from->n;
 38:   nsends  = to->n;
 39:   indices = to->indices;
 40:   sstarts = to->starts;
 41: #if defined(PETSC_HAVE_CUDA)
 42:   PetscObjectTypeCompareAny((PetscObject)xin,&is_cudatype,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
 43:   if (is_cudatype) {
 44:     VecCUDAAllocateCheckHost(xin);
 45:     if (xin->offloadmask == PETSC_OFFLOAD_GPU) {
 46:       if (xin->spptr && ctx->spptr) {
 47:         VecCUDACopyFromGPUSome_Public(xin,(PetscCUDAIndices)ctx->spptr,mode);
 48:       } else {
 49:         VecCUDACopyFromGPU(xin);
 50:       }
 51:     }
 52:     xv = *((PetscScalar**)xin->data);
 53:   } else
 54: #endif
 55:   {
 56:     VecGetArrayRead(xin,(const PetscScalar**)&xv);
 57:   }

 59:   if (xin != yin) {VecGetArray(yin,&yv);}
 60:   else yv = xv;

 62:   if (!(mode & SCATTER_LOCAL)) {
 63:     /* post receives since they were not previously posted    */
 64:     MPI_Startall_irecv(from->starts[nrecvs]*bs,MPIU_SCALAR,nrecvs,rwaits);

 66:     if (to->sharedspace) {
 67:       /* Pack the send data into my shared memory buffer  --- this is the normal forward scatter */
 68:       PETSCMAP1(Pack)(to->sharedcnt,to->sharedspaceindices,xv,to->sharedspace,bs);
 69:     } else {
 70:       /* Pack the send data into receivers shared memory buffer -- this is the normal backward scatter */
 71:       for (i=0; i<to->msize; i++) {
 72:         if (to->sharedspacesoffset && to->sharedspacesoffset[i] > -1) {
 73:           PETSCMAP1(Pack)(to->sharedspacestarts[i+1] - to->sharedspacestarts[i],to->sharedspaceindices + to->sharedspacestarts[i],xv,&to->sharedspaces[i][bs*to->sharedspacesoffset[i]],bs);
 74:         }
 75:       }
 76:     }
 77:     /* this version packs and sends one at a time */
 78:     for (i=0; i<nsends; i++) {
 79:       PETSCMAP1(Pack)(sstarts[i+1]-sstarts[i],indices + sstarts[i],xv,svalues + bs*sstarts[i],bs);
 80:       MPI_Start_isend((sstarts[i+1]-sstarts[i])*bs,MPIU_SCALAR,swaits+i);
 81:     }
 82:   }

 84:   /* take care of local scatters */
 85:   if (to->local.n) {
 86:     if (to->local.memcpy_plan.optimized[0] && addv == INSERT_VALUES) {
 87:       /* do copy when it is not a self-to-self copy */
 88:       if (!(yv == xv && to->local.memcpy_plan.same_copy_starts)) {
 89:         for (i=to->local.memcpy_plan.copy_offsets[0]; i<to->local.memcpy_plan.copy_offsets[1]; i++) {
 90:           /* Do we need to take care of overlaps? We could but overlaps sound more like a bug than a requirement,
 91:              so I just leave it and let PetscMemcpy detect this bug.
 92:            */
 93:           PetscArraycpy(yv + from->local.memcpy_plan.copy_starts[i],xv + to->local.memcpy_plan.copy_starts[i],to->local.memcpy_plan.copy_lengths[i]);
 94:         }
 95:       }
 96:     } else {
 97:       if (xv == yv && addv == INSERT_VALUES && to->local.nonmatching_computed) {
 98:         /* only copy entries that do not share identical memory locations */
 99:         PETSCMAP1(Scatter)(to->local.n_nonmatching,to->local.slots_nonmatching,xv,from->local.slots_nonmatching,yv,addv,bs);
100:       } else {
101:         PETSCMAP1(Scatter)(to->local.n,to->local.vslots,xv,from->local.vslots,yv,addv,bs);
102:       }
103:     }
104:   }
105:   VecRestoreArrayRead(xin,(const PetscScalar**)&xv);
106:   if (xin != yin) {VecRestoreArray(yin,&yv);}
107:   return(0);
108: }

110: /* --------------------------------------------------------------------------------------*/

112: PetscErrorCode PETSCMAP1(VecScatterEnd)(VecScatter ctx,Vec xin,Vec yin,InsertMode addv,ScatterMode mode)
113: {
114:   VecScatter_MPI_General *to,*from;
115:   PetscScalar            *rvalues,*yv;
116:   PetscErrorCode         ierr;
117:   PetscInt               nrecvs,nsends,*indices,count,*rstarts,bs;
118:   PetscMPIInt            imdex;
119:   MPI_Request            *rwaits,*swaits;
120:   MPI_Status             xrstatus,*sstatus;
121:   PetscMPIInt            i;

124:   if (mode & SCATTER_LOCAL) return(0);
125:   VecGetArray(yin,&yv);

127:   to      = (VecScatter_MPI_General*)ctx->todata;
128:   from    = (VecScatter_MPI_General*)ctx->fromdata;
129:   rwaits  = from->requests;
130:   swaits  = to->requests;
131:   sstatus = to->sstatus;    /* sstatus and rstatus are always stored in to */
132:   if (mode & SCATTER_REVERSE) {
133:     to     = (VecScatter_MPI_General*)ctx->fromdata;
134:     from   = (VecScatter_MPI_General*)ctx->todata;
135:     rwaits = from->rev_requests;
136:     swaits = to->rev_requests;
137:   }
138:   bs      = from->bs;
139:   rvalues = from->values;
140:   nrecvs  = from->n;
141:   nsends  = to->n;
142:   indices = from->indices;
143:   rstarts = from->starts;


146:   MPI_Barrier(PetscObjectComm((PetscObject)ctx));

148:   /* unpack one at a time */
149:   count = nrecvs;
150:   while (count) {
151:     MPI_Waitany(nrecvs,rwaits,&imdex,&xrstatus);
152:     /* unpack receives into our local space */
153:     PETSCMAP1(UnPack)(rstarts[imdex+1] - rstarts[imdex],rvalues + bs*rstarts[imdex],indices + rstarts[imdex],yv,addv,bs);
154:     count--;
155:   }
156:   /* handle processes that share the same shared memory communicator */
157:   if (from->sharedspace) {
158:     /* unpack the data from my shared memory buffer  --- this is the normal backward scatter */
159:     PETSCMAP1(UnPack)(from->sharedcnt,from->sharedspace,from->sharedspaceindices,yv,addv,bs);
160:   } else {
161:     /* unpack the data from each of my sending partners shared memory buffers --- this is the normal forward scatter */
162:     for (i=0; i<from->msize; i++) {
163:       if (from->sharedspacesoffset && from->sharedspacesoffset[i] > -1) {
164:         PETSCMAP1(UnPack)(from->sharedspacestarts[i+1] - from->sharedspacestarts[i],&from->sharedspaces[i][bs*from->sharedspacesoffset[i]],from->sharedspaceindices + from->sharedspacestarts[i],yv,addv,bs);
165:       }
166:     }
167:   }
168:   MPI_Barrier(PetscObjectComm((PetscObject)ctx));

170:   /* wait on sends */
171:   if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
172:   VecRestoreArray(yin,&yv);
173:   return(0);
174: }

176: /* -------------------------------------------------------- */
177:  #include <../src/vec/vec/impls/node/vecnodeimpl.h>

179: PetscErrorCode PETSCMAP1(VecScatterBeginMPI3Node)(VecScatter ctx,Vec xin,Vec yin,InsertMode addv,ScatterMode mode)
180: {
181:   VecScatter_MPI_General *to,*from;
182:   PetscScalar            *xv,*yv,*svalues;
183:   MPI_Request            *rwaits,*swaits;
184:   PetscErrorCode         ierr;
185:   PetscInt               i,*indices,*sstarts,nrecvs,nsends,bs;

188:   if (mode & SCATTER_REVERSE) {
189:     to     = (VecScatter_MPI_General*)ctx->fromdata;
190:     from   = (VecScatter_MPI_General*)ctx->todata;
191:     rwaits = from->rev_requests;
192:     swaits = to->rev_requests;
193:   } else {
194:     to     = (VecScatter_MPI_General*)ctx->todata;
195:     from   = (VecScatter_MPI_General*)ctx->fromdata;
196:     rwaits = from->requests;
197:     swaits = to->requests;
198:   }
199:   bs      = to->bs;
200:   svalues = to->values;
201:   nrecvs  = from->n;
202:   nsends  = to->n;
203:   indices = to->indices;
204:   sstarts = to->starts;

206:   VecGetArrayRead(xin,(const PetscScalar**)&xv);

208:   if (xin != yin) {VecGetArray(yin,&yv);}
209:   else yv = xv;

211:   if (!(mode & SCATTER_LOCAL)) {
212:     /* post receives since they were not previously posted    */
213:     MPI_Startall_irecv(from->starts[nrecvs]*bs,MPIU_SCALAR,nrecvs,rwaits);

215:     /* this version packs and sends one at a time */
216:     for (i=0; i<nsends; i++) {
217:       PETSCMAP1(Pack)(sstarts[i+1]-sstarts[i],indices + sstarts[i],xv,svalues + bs*sstarts[i],bs);
218:       MPI_Start_isend((sstarts[i+1]-sstarts[i])*bs,MPIU_SCALAR,swaits+i);
219:     }
220:   }

222:   /* take care of local scatters */
223:   if (to->local.n) {
224:    if (to->local.memcpy_plan.optimized[0] && addv == INSERT_VALUES) {
225:       /* do copy when it is not a self-to-self copy */
226:       if (!(yv == xv && to->local.memcpy_plan.same_copy_starts)) {
227:         for (i=to->local.memcpy_plan.copy_offsets[0]; i<to->local.memcpy_plan.copy_offsets[1]; i++) {
228:           /* Do we need to take care of overlaps? We could but overlaps sound more like a bug than a requirement,
229:              so I just leave it and let PetscMemcpy detect this bug.
230:            */
231:           PetscArraycpy(yv + from->local.memcpy_plan.copy_starts[i],xv + to->local.memcpy_plan.copy_starts[i],to->local.memcpy_plan.copy_lengths[i]);
232:         }
233:       }
234:     } else {
235:       if (xv == yv && addv == INSERT_VALUES && to->local.nonmatching_computed) {
236:         /* only copy entries that do not share identical memory locations */
237:         PETSCMAP1(Scatter)(to->local.n_nonmatching,to->local.slots_nonmatching,xv,from->local.slots_nonmatching,yv,addv,bs);
238:       } else {
239:         PETSCMAP1(Scatter)(to->local.n,to->local.vslots,xv,from->local.vslots,yv,addv,bs);
240:       }
241:     }
242:   }
243:   VecRestoreArrayRead(xin,(const PetscScalar**)&xv);
244:   if (xin != yin) {VecRestoreArray(yin,&yv);}
245:   return(0);
246: }

248: #define PETSC_MEMSHARE_SAFE
249: PetscErrorCode PETSCMAP1(VecScatterEndMPI3Node)(VecScatter ctx,Vec xin,Vec yin,InsertMode addv,ScatterMode mode)
250: {
251:   VecScatter_MPI_General *to,*from;
252:   PetscScalar            *rvalues,*yv;
253:   const PetscScalar      *xv;
254:   PetscErrorCode         ierr;
255:   PetscInt               nrecvs,nsends,*indices,count,*rstarts,bs;
256:   PetscMPIInt            imdex;
257:   MPI_Request            *rwaits,*swaits;
258:   MPI_Status             xrstatus,*sstatus;
259:   Vec_Node               *vnode;
260:   PetscInt               cnt,*idx,*idy;
261:   MPI_Comm               comm,mscomm,veccomm;
262:   PetscShmComm           scomm;
263:   PetscMPIInt            i,xsize;
264:   PetscInt               k,k1;
265:   PetscScalar            *sharedspace;

268:   if (mode & SCATTER_LOCAL) return(0);

270:   PetscObjectGetComm((PetscObject)ctx,&comm);
271:   PetscShmCommGet(comm,&scomm);
272:   PetscShmCommGetMpiShmComm(scomm,&mscomm);

274:   VecGetArray(yin,&yv);

276:   to      = (VecScatter_MPI_General*)ctx->todata;
277:   from    = (VecScatter_MPI_General*)ctx->fromdata;
278:   rwaits  = from->requests;
279:   swaits  = to->requests;
280:   sstatus = to->sstatus;    /* sstatus and rstatus are always stored in to */
281:   if (mode & SCATTER_REVERSE) {
282:     to     = (VecScatter_MPI_General*)ctx->fromdata;
283:     from   = (VecScatter_MPI_General*)ctx->todata;
284:     rwaits = from->rev_requests;
285:     swaits = to->rev_requests;
286:   }
287:   bs      = from->bs;
288:   rvalues = from->values;
289:   nrecvs  = from->n;
290:   nsends  = to->n;
291:   indices = from->indices;
292:   rstarts = from->starts;


295:   /* unpack one at a time */
296:   count = nrecvs;
297:   while (count) {
298:     MPI_Waitany(nrecvs,rwaits,&imdex,&xrstatus);
299:     /* unpack receives into our local space */
300:     PETSCMAP1(UnPack)(rstarts[imdex+1] - rstarts[imdex],rvalues + bs*rstarts[imdex],indices + rstarts[imdex],yv,addv,bs);
301:     count--;
302:   }

304:   /* handle processes that share the same shared memory communicator */
305: #if defined(PETSC_MEMSHARE_SAFE)
306:   MPI_Barrier(mscomm);
307: #endif

309:   /* check if xin is sequential */
310:   PetscObjectGetComm((PetscObject)xin,&veccomm);
311:   MPI_Comm_size(veccomm,&xsize);

313:   if (xsize == 1 || from->sharedspace) { /* 'from->sharedspace' indicates this core's shared memory will be written */
314:     /* StoP: read sequential local xvalues, then write to shared yvalues */
315:     PetscInt notdone = to->notdone;
316:     vnode = (Vec_Node*)yin->data;
317:     if (!vnode->win) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"vector y must have type VECNODE with shared memory");
318:     if (ctx->is_duplicate) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Duplicate index is not supported");
319:     VecGetArrayRead(xin,&xv);

321:     i = 0;
322:     while (notdone) {
323:       while (i < to->msize) {
324:         if (to->sharedspacesoffset && to->sharedspacesoffset[i] > -1) {
325:           cnt = to->sharedspacestarts[i+1] - to->sharedspacestarts[i];
326:           idx = to->sharedspaceindices + to->sharedspacestarts[i];
327:           idy = idx + to->sharedcnt;

329:           sharedspace = vnode->winarray[i];

331:           if (sharedspace[-1] != yv[-1]) {
332:             if (PetscRealPart(sharedspace[-1] - yv[-1]) > 0.0) {
333:               PetscMPIInt msrank;
334:               MPI_Comm_rank(mscomm,&msrank);
335:               SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"[%d] statecnt %g > [%d] my_statecnt %g",i,PetscRealPart(sharedspace[-1]),msrank,PetscRealPart(yv[-1]));
336:             }
337:             /* i-the core has not reached the current object statecnt yet, wait ... */
338:             continue;
339:           }

341:           if (addv == ADD_VALUES) {
342:             for (k= 0; k<cnt; k++) {
343:               for (k1=0; k1<bs; k1++) sharedspace[idy[k]+k1] += xv[idx[k]+k1];
344:             }
345:           } else if (addv == INSERT_VALUES) {
346:             for (k= 0; k<cnt; k++) {
347:               for (k1=0; k1<bs; k1++) sharedspace[idy[k]+k1] = xv[idx[k]+k1];
348:             }
349:           } else SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %D", addv);
350:           notdone--;
351:         }
352:         i++;
353:       }
354:     }
355:     VecRestoreArrayRead(xin,&xv);
356:   } else {
357:     /* PtoS: read shared xvalues, then write to sequential local yvalues */
358:     PetscInt notdone = from->notdone;

360:     vnode = (Vec_Node*)xin->data;
361:     if (!vnode->win && notdone) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"vector x must have type VECNODE with shared memory");
362:     VecGetArrayRead(xin,&xv);

364:     i = 0;
365:     while (notdone) {
366:       while (i < from->msize) {
367:         if (from->sharedspacesoffset && from->sharedspacesoffset[i] > -1) {
368:           cnt = from->sharedspacestarts[i+1] - from->sharedspacestarts[i];
369:           idy = from->sharedspaceindices + from->sharedspacestarts[i]; /* recv local y indices */
370:           idx = idy + from->sharedcnt;

372:           sharedspace = vnode->winarray[i];

374:           if (sharedspace[-1] != xv[-1]) {
375:             if (PetscRealPart(sharedspace[-1] - xv[-1]) > 0.0) {
376:               PetscMPIInt msrank;
377:               MPI_Comm_rank(mscomm,&msrank);
378:               SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"[%d] statecnt %g > [%d] my_statecnt %g",i,PetscRealPart(sharedspace[-1]),msrank,PetscRealPart(xv[-1]));
379:             }
380:             /* i-the core has not reached the current object state cnt yet, wait ... */
381:             continue;
382:           }

384:           if (addv==ADD_VALUES) {
385:             for (k=0; k<cnt; k++) {
386:               for (k1=0; k1<bs; k1++) yv[idy[k]+k1] += sharedspace[idx[k]+k1]; /* read x shared values */
387:             }
388:           } else if (addv==INSERT_VALUES){
389:             for (k=0; k<cnt; k++) {
390:               for (k1=0; k1<bs; k1++) yv[idy[k]+k1] = sharedspace[idx[k]+k1]; /* read x shared values */
391:             }
392:           } else SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %D", addv);

394:           notdone--;
395:         }
396:         i++;
397:       }
398:     }
399:     VecRestoreArrayRead(xin,&xv);
400:   }
401:   /* output y is parallel, ensure it is done -- would lose performance */
402:   MPI_Barrier(mscomm);

404:   /* wait on sends */
405:   if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
406:   VecRestoreArray(yin,&yv);
407:   return(0);
408: }

410: #undef PETSCMAP1_a
411: #undef PETSCMAP1_b
412: #undef PETSCMAP1
413: #undef BS