Actual source code: vpscat.h

petsc-3.9.4 2018-09-11
Report Typos and Errors

  2: /*
  3:      Defines the methods VecScatterBegin/End_1,2,......
  4:      This is included by vpscat.c with different values for BS

  6:      This is a terrible way of doing "templates" in C.
  7: */
  8: #define PETSCMAP1_a(a,b)  a ## _ ## b
  9: #define PETSCMAP1_b(a,b)  PETSCMAP1_a(a,b)
 10: #define PETSCMAP1(a)      PETSCMAP1_b(a,BS)

 12: PetscErrorCode PETSCMAP1(VecScatterBegin)(VecScatter ctx,Vec xin,Vec yin,InsertMode addv,ScatterMode mode)
 13: {
 14:   VecScatter_MPI_General *to,*from;
 15:   PetscScalar            *xv,*yv,*svalues;
 16:   MPI_Request            *rwaits,*swaits;
 17:   PetscErrorCode         ierr;
 18:   PetscInt               i,*indices,*sstarts,nrecvs,nsends,bs;
 19: #if defined(PETSC_HAVE_VIENNACL)
 20:   PetscBool              is_viennacltype = PETSC_FALSE;
 21: #endif

 24:   if (mode & SCATTER_REVERSE) {
 25:     to     = (VecScatter_MPI_General*)ctx->fromdata;
 26:     from   = (VecScatter_MPI_General*)ctx->todata;
 27:     rwaits = from->rev_requests;
 28:     swaits = to->rev_requests;
 29:   } else {
 30:     to     = (VecScatter_MPI_General*)ctx->todata;
 31:     from   = (VecScatter_MPI_General*)ctx->fromdata;
 32:     rwaits = from->requests;
 33:     swaits = to->requests;
 34:   }
 35:   bs      = to->bs;
 36:   svalues = to->values;
 37:   nrecvs  = from->n;
 38:   nsends  = to->n;
 39:   indices = to->indices;
 40:   sstarts = to->starts;
 41: #if defined(PETSC_HAVE_VIENNACL)
 42:   PetscObjectTypeCompareAny((PetscObject)xin,&is_viennacltype,VECSEQVIENNACL,VECMPIVIENNACL,VECVIENNACL,"");
 43:   if (is_viennacltype) {
 44:     VecGetArrayRead(xin,(const PetscScalar**)&xv);
 45:   } else
 46: #endif
 47: #if defined(PETSC_HAVE_VECCUDA)
 48:   {
 49:     VecCUDAAllocateCheckHost(xin);
 50:     if (xin->valid_GPU_array == PETSC_OFFLOAD_GPU) {
 51:       if (xin->spptr && ctx->spptr) {
 52:         VecCUDACopyFromGPUSome_Public(xin,(PetscCUDAIndices)ctx->spptr);
 53:       } else {
 54:         VecCUDACopyFromGPU(xin);
 55:       }
 56:     }
 57:     xv = *((PetscScalar**)xin->data);
 58:   }
 59: #else
 60:   {
 61:     VecGetArrayRead(xin,(const PetscScalar**)&xv);
 62:   }
 63: #endif

 65:   if (xin != yin) {VecGetArray(yin,&yv);}
 66:   else yv = xv;

 68:   if (!(mode & SCATTER_LOCAL)) {
 69:     if (!from->use_readyreceiver && !to->sendfirst && !to->use_alltoallv && !to->use_window) {
 70:       /* post receives since they were not previously posted    */
 71:       if (nrecvs) {MPI_Startall_irecv(from->starts[nrecvs]*bs,nrecvs,rwaits);}
 72:     }

 74: #if defined(PETSC_HAVE_MPI_ALLTOALLW)  && !defined(PETSC_USE_64BIT_INDICES)
 75:     if (to->use_alltoallw && addv == INSERT_VALUES) {
 76:       MPI_Alltoallw(xv,to->wcounts,to->wdispls,to->types,yv,from->wcounts,from->wdispls,from->types,PetscObjectComm((PetscObject)ctx));
 77:     } else
 78: #endif
 79:     if (ctx->packtogether || to->use_alltoallv || to->use_window) {
 80:       /* this version packs all the messages together and sends, when -vecscatter_packtogether used */
 81:       PETSCMAP1(Pack)(sstarts[nsends],indices,xv,svalues,bs);
 82:       if (to->use_alltoallv) {
 83:         MPI_Alltoallv(to->values,to->counts,to->displs,MPIU_SCALAR,from->values,from->counts,from->displs,MPIU_SCALAR,PetscObjectComm((PetscObject)ctx));
 84:       } else if (to->use_window) {
 85:         PetscInt cnt;

 87:         MPI_Win_fence(0,from->window);
 88:         for (i=0; i<nsends; i++) {
 89:           cnt  = bs*(to->starts[i+1]-to->starts[i]);
 90:           MPI_Put(to->values+bs*to->starts[i],cnt,MPIU_SCALAR,to->procs[i],bs*to->winstarts[i],cnt,MPIU_SCALAR,from->window);
 91:         }
 92:       } else if (nsends) {
 93:         MPI_Startall_isend(to->starts[to->n]*bs,nsends,swaits);
 94:       }
 95:     } else {
 96:       if (to->sharedspace) {
 97:         /* Pack the send data into my shared memory buffer  --- this is the normal forward scatter */
 98:         PETSCMAP1(Pack)(to->sharedcnt,to->sharedspaceindices,xv,to->sharedspace,bs);
 99:       } else {
100:         /* Pack the send data into receivers shared memory buffer -- this is the normal backward scatter */
101:         for (i=0; i<to->msize; i++) {
102:           if (to->sharedspacesoffset && to->sharedspacesoffset[i] > -1) {
103:             PETSCMAP1(Pack)(to->sharedspacestarts[i+1] - to->sharedspacestarts[i],to->sharedspaceindices + to->sharedspacestarts[i],xv,&to->sharedspaces[i][bs*to->sharedspacesoffset[i]],bs);
104:           }
105:         }
106:       }
107:       /* this version packs and sends one at a time */
108:       for (i=0; i<nsends; i++) {
109:         PETSCMAP1(Pack)(sstarts[i+1]-sstarts[i],indices + sstarts[i],xv,svalues + bs*sstarts[i],bs);
110:         MPI_Start_isend((sstarts[i+1]-sstarts[i])*bs,swaits+i);
111:       }
112:     }

114:     if (!from->use_readyreceiver && to->sendfirst && !to->use_alltoallv && !to->use_window) {
115:       /* post receives since they were not previously posted   */
116:       if (nrecvs) {MPI_Startall_irecv(from->starts[nrecvs]*bs,nrecvs,rwaits);}
117:     }
118:   }

120:   /* take care of local scatters */
121:   if (to->local.n) {
122:     if (to->local.is_copy && addv == INSERT_VALUES) {
123:       if (yv != xv || from->local.copy_start !=  to->local.copy_start) {
124:         PetscMemcpy(yv + from->local.copy_start,xv + to->local.copy_start,to->local.copy_length);
125:       }
126:     } else {
127:       if (xv == yv && addv == INSERT_VALUES && to->local.nonmatching_computed) {
128:         /* only copy entries that do not share identical memory locations */
129:         PETSCMAP1(Scatter)(to->local.n_nonmatching,to->local.slots_nonmatching,xv,from->local.slots_nonmatching,yv,addv,bs);
130:       } else {
131:         PETSCMAP1(Scatter)(to->local.n,to->local.vslots,xv,from->local.vslots,yv,addv,bs);
132:       }
133:     }
134:   }
135:   VecRestoreArrayRead(xin,(const PetscScalar**)&xv);
136:   if (xin != yin) {VecRestoreArray(yin,&yv);}
137:   return(0);
138: }

140: /* --------------------------------------------------------------------------------------*/

142: PetscErrorCode PETSCMAP1(VecScatterEnd)(VecScatter ctx,Vec xin,Vec yin,InsertMode addv,ScatterMode mode)
143: {
144:   VecScatter_MPI_General *to,*from;
145:   PetscScalar            *rvalues,*yv;
146:   PetscErrorCode         ierr;
147:   PetscInt               nrecvs,nsends,*indices,count,*rstarts,bs;
148:   PetscMPIInt            imdex;
149:   MPI_Request            *rwaits,*swaits;
150:   MPI_Status             xrstatus,*rstatus,*sstatus;

153:   if (mode & SCATTER_LOCAL) return(0);
154:   VecGetArray(yin,&yv);

156:   to      = (VecScatter_MPI_General*)ctx->todata;
157:   from    = (VecScatter_MPI_General*)ctx->fromdata;
158:   rwaits  = from->requests;
159:   swaits  = to->requests;
160:   sstatus = to->sstatus;    /* sstatus and rstatus are always stored in to */
161:   rstatus = to->rstatus;
162:   if (mode & SCATTER_REVERSE) {
163:     to     = (VecScatter_MPI_General*)ctx->fromdata;
164:     from   = (VecScatter_MPI_General*)ctx->todata;
165:     rwaits = from->rev_requests;
166:     swaits = to->rev_requests;
167:   }
168:   bs      = from->bs;
169:   rvalues = from->values;
170:   nrecvs  = from->n;
171:   nsends  = to->n;
172:   indices = from->indices;
173:   rstarts = from->starts;

175:   if (ctx->packtogether || (to->use_alltoallw && (addv != INSERT_VALUES)) || (to->use_alltoallv && !to->use_alltoallw) || to->use_window) {
176:     if (to->use_window) {MPI_Win_fence(0,from->window);}
177:     else if (nrecvs && !to->use_alltoallv) {MPI_Waitall(nrecvs,rwaits,rstatus);}
178:     PETSCMAP1(UnPack)(from->starts[from->n],from->values,indices,yv,addv,bs);
179:   } else if (!to->use_alltoallw) {
180:     PetscMPIInt i;
181:     MPI_Barrier(PetscObjectComm((PetscObject)ctx));

183:     /* unpack one at a time */
184:     count = nrecvs;
185:     while (count) {
186:       if (ctx->reproduce) {
187:         imdex = count - 1;
188:         MPI_Wait(rwaits+imdex,&xrstatus);
189:       } else {
190:         MPI_Waitany(nrecvs,rwaits,&imdex,&xrstatus);
191:       }
192:       /* unpack receives into our local space */
193:       PETSCMAP1(UnPack)(rstarts[imdex+1] - rstarts[imdex],rvalues + bs*rstarts[imdex],indices + rstarts[imdex],yv,addv,bs);
194:       count--;
195:     }
196:     /* handle processes that share the same shared memory communicator */
197:     if (from->sharedspace) {
198:       /* unpack the data from my shared memory buffer  --- this is the normal backward scatter */
199:       PETSCMAP1(UnPack)(from->sharedcnt,from->sharedspace,from->sharedspaceindices,yv,addv,bs);
200:     } else {
201:       /* unpack the data from each of my sending partners shared memory buffers --- this is the normal forward scatter */
202:       for (i=0; i<from->msize; i++) {
203:         if (from->sharedspacesoffset && from->sharedspacesoffset[i] > -1) {
204:           PETSCMAP1(UnPack)(from->sharedspacestarts[i+1] - from->sharedspacestarts[i],&from->sharedspaces[i][bs*from->sharedspacesoffset[i]],from->sharedspaceindices + from->sharedspacestarts[i],yv,addv,bs);
205:         }
206:       }
207:     }
208:     MPI_Barrier(PetscObjectComm((PetscObject)ctx));
209:   }
210:   if (from->use_readyreceiver) {
211:     if (nrecvs) {MPI_Startall_irecv(from->starts[nrecvs]*bs,nrecvs,rwaits);}
212:     MPI_Barrier(PetscObjectComm((PetscObject)ctx));
213:   }

215:   /* wait on sends */
216:   if (nsends  && !to->use_alltoallv  && !to->use_window) {MPI_Waitall(nsends,swaits,sstatus);}
217:   VecRestoreArray(yin,&yv);
218:   return(0);
219: }

221: /* -------------------------------------------------------- */
222:  #include <../src/vec/vec/impls/node/vecnodeimpl.h>

224: PetscErrorCode PETSCMAP1(VecScatterBeginMPI3Node)(VecScatter ctx,Vec xin,Vec yin,InsertMode addv,ScatterMode mode)
225: {
226:   VecScatter_MPI_General *to,*from;
227:   PetscScalar            *xv,*yv,*svalues;
228:   MPI_Request            *rwaits,*swaits;
229:   PetscErrorCode         ierr;
230:   PetscInt               i,*indices,*sstarts,nrecvs,nsends,bs;

233:   if (mode & SCATTER_REVERSE) {
234:     to     = (VecScatter_MPI_General*)ctx->fromdata;
235:     from   = (VecScatter_MPI_General*)ctx->todata;
236:     rwaits = from->rev_requests;
237:     swaits = to->rev_requests;
238:   } else {
239:     to     = (VecScatter_MPI_General*)ctx->todata;
240:     from   = (VecScatter_MPI_General*)ctx->fromdata;
241:     rwaits = from->requests;
242:     swaits = to->requests;
243:   }
244:   bs      = to->bs;
245:   svalues = to->values;
246:   nrecvs  = from->n;
247:   nsends  = to->n;
248:   indices = to->indices;
249:   sstarts = to->starts;

251:   VecGetArrayRead(xin,(const PetscScalar**)&xv);

253:   if (xin != yin) {VecGetArray(yin,&yv);}
254:   else yv = xv;

256:   if (!(mode & SCATTER_LOCAL)) {
257:     if (!from->use_readyreceiver && !to->sendfirst && !to->use_alltoallv  & !to->use_window) {
258:       /* post receives since they were not previously posted    */
259:       if (nrecvs) {MPI_Startall_irecv(from->starts[nrecvs]*bs,nrecvs,rwaits);}
260:     }

262: #if defined(PETSC_HAVE_MPI_ALLTOALLW)  && !defined(PETSC_USE_64BIT_INDICES)
263:     if (to->use_alltoallw && addv == INSERT_VALUES) {
264:       MPI_Alltoallw(xv,to->wcounts,to->wdispls,to->types,yv,from->wcounts,from->wdispls,from->types,PetscObjectComm((PetscObject)ctx));
265:     } else
266: #endif
267:     if (ctx->packtogether || to->use_alltoallv || to->use_window) {
268:       /* this version packs all the messages together and sends, when -vecscatter_packtogether used */
269:       PETSCMAP1(Pack)(sstarts[nsends],indices,xv,svalues,bs);
270:       if (to->use_alltoallv) {
271:         MPI_Alltoallv(to->values,to->counts,to->displs,MPIU_SCALAR,from->values,from->counts,from->displs,MPIU_SCALAR,PetscObjectComm((PetscObject)ctx));
272:       } else if (to->use_window) {
273:         PetscInt cnt;

275:         MPI_Win_fence(0,from->window);
276:         for (i=0; i<nsends; i++) {
277:           cnt  = bs*(to->starts[i+1]-to->starts[i]);
278:           MPI_Put(to->values+bs*to->starts[i],cnt,MPIU_SCALAR,to->procs[i],bs*to->winstarts[i],cnt,MPIU_SCALAR,from->window);
279:         }
280:       } else if (nsends) {
281:         MPI_Startall_isend(to->starts[to->n]*bs,nsends,swaits);
282:       }
283:     } else {
284:       /* this version packs and sends one at a time */
285:       for (i=0; i<nsends; i++) {
286:         PETSCMAP1(Pack)(sstarts[i+1]-sstarts[i],indices + sstarts[i],xv,svalues + bs*sstarts[i],bs);
287:         MPI_Start_isend((sstarts[i+1]-sstarts[i])*bs,swaits+i);
288:       }
289:     }

291:     if (!from->use_readyreceiver && to->sendfirst && !to->use_alltoallv && !to->use_window) {
292:       /* post receives since they were not previously posted   */
293:       if (nrecvs) {MPI_Startall_irecv(from->starts[nrecvs]*bs,nrecvs,rwaits);}
294:     }
295:   }

297:   /* take care of local scatters */
298:   if (to->local.n) {
299:     if (to->local.is_copy && addv == INSERT_VALUES) {
300:       if (yv != xv || from->local.copy_start !=  to->local.copy_start) {
301:         PetscMemcpy(yv + from->local.copy_start,xv + to->local.copy_start,to->local.copy_length);
302:       }
303:     } else {
304:       if (xv == yv && addv == INSERT_VALUES && to->local.nonmatching_computed) {
305:         /* only copy entries that do not share identical memory locations */
306:         PETSCMAP1(Scatter)(to->local.n_nonmatching,to->local.slots_nonmatching,xv,from->local.slots_nonmatching,yv,addv,bs);
307:       } else {
308:         PETSCMAP1(Scatter)(to->local.n,to->local.vslots,xv,from->local.vslots,yv,addv,bs);
309:       }
310:     }
311:   }
312:   VecRestoreArrayRead(xin,(const PetscScalar**)&xv);
313:   if (xin != yin) {VecRestoreArray(yin,&yv);}
314:   return(0);
315: }

317: #define PETSC_MEMSHARE_SAFE
318: PetscErrorCode PETSCMAP1(VecScatterEndMPI3Node)(VecScatter ctx,Vec xin,Vec yin,InsertMode addv,ScatterMode mode)
319: {
320:   VecScatter_MPI_General *to,*from;
321:   PetscScalar            *rvalues,*yv;
322:   const PetscScalar      *xv;
323:   PetscErrorCode         ierr;
324:   PetscInt               nrecvs,nsends,*indices,count,*rstarts,bs;
325:   PetscMPIInt            imdex;
326:   MPI_Request            *rwaits,*swaits;
327:   MPI_Status             xrstatus,*rstatus,*sstatus;
328:   Vec_Node               *vnode;
329:   PetscInt               cnt,*idx,*idy;
330:   MPI_Comm               comm,mscomm,veccomm;
331:   PetscCommShared        scomm;

334:   if (mode & SCATTER_LOCAL) return(0);

336:   PetscObjectGetComm((PetscObject)ctx,&comm);
337:   PetscCommSharedGet(comm,&scomm);
338:   PetscCommSharedGetComm(scomm,&mscomm);

340:   VecGetArray(yin,&yv);

342:   to      = (VecScatter_MPI_General*)ctx->todata;
343:   from    = (VecScatter_MPI_General*)ctx->fromdata;
344:   rwaits  = from->requests;
345:   swaits  = to->requests;
346:   sstatus = to->sstatus;    /* sstatus and rstatus are always stored in to */
347:   rstatus = to->rstatus;
348:   if (mode & SCATTER_REVERSE) {
349:     to     = (VecScatter_MPI_General*)ctx->fromdata;
350:     from   = (VecScatter_MPI_General*)ctx->todata;
351:     rwaits = from->rev_requests;
352:     swaits = to->rev_requests;
353:   }
354:   bs      = from->bs;
355:   rvalues = from->values;
356:   nrecvs  = from->n;
357:   nsends  = to->n;
358:   indices = from->indices;
359:   rstarts = from->starts;

361:   if (ctx->packtogether || (to->use_alltoallw && (addv != INSERT_VALUES)) || (to->use_alltoallv && !to->use_alltoallw) || to->use_window) {
362:     if (to->use_window) {MPI_Win_fence(0,from->window);}
363:     else if (nrecvs && !to->use_alltoallv) {MPI_Waitall(nrecvs,rwaits,rstatus);}
364:     PETSCMAP1(UnPack)(from->starts[from->n],from->values,indices,yv,addv,bs);
365:   } else if (!to->use_alltoallw) {
366:     PetscMPIInt i,xsize;
367:     PetscInt    k,k1;
368:     PetscScalar *sharedspace;

370:     /* unpack one at a time */
371:     count = nrecvs;
372:     while (count) {
373:       if (ctx->reproduce) {
374:         imdex = count - 1;
375:         MPI_Wait(rwaits+imdex,&xrstatus);
376:       } else {
377:         MPI_Waitany(nrecvs,rwaits,&imdex,&xrstatus);
378:       }
379:       /* unpack receives into our local space */
380:       PETSCMAP1(UnPack)(rstarts[imdex+1] - rstarts[imdex],rvalues + bs*rstarts[imdex],indices + rstarts[imdex],yv,addv,bs);
381:       count--;
382:     }

384:     /* handle processes that share the same shared memory communicator */
385: #if defined(PETSC_MEMSHARE_SAFE)
386:     MPI_Barrier(mscomm);
387: #endif

389:     /* check if xin is sequential */
390:     PetscObjectGetComm((PetscObject)xin,&veccomm);
391:     MPI_Comm_size(veccomm,&xsize);

393:     if (xsize == 1 || from->sharedspace) { /* 'from->sharedspace' indicates this core's shared memory will be written */
394:       /* StoP: read sequential local xvalues, then write to shared yvalues */
395:       PetscInt notdone = to->notdone;
396:       vnode = (Vec_Node*)yin->data;
397:       if (!vnode->win) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"vector y must have type VECNODE with shared memory");
398:       if (ctx->is_duplicate) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Duplicate index is not supported");
399:       VecGetArrayRead(xin,&xv);

401:       i = 0;
402:       while (notdone) {
403:         while (i < to->msize) {
404:           if (to->sharedspacesoffset && to->sharedspacesoffset[i] > -1) {
405:             cnt = to->sharedspacestarts[i+1] - to->sharedspacestarts[i];
406:             idx = to->sharedspaceindices + to->sharedspacestarts[i];
407:             idy = idx + to->sharedcnt;

409:             sharedspace = vnode->winarray[i];

411:             if (sharedspace[-1] != yv[-1]) {
412:               if (PetscRealPart(sharedspace[-1] - yv[-1]) > 0.0) {
413:                 PetscMPIInt msrank;
414:                 MPI_Comm_rank(mscomm,&msrank);
415:                 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"[%d] statecnt %g > [%d] my_statecnt %g",i,PetscRealPart(sharedspace[-1]),msrank,PetscRealPart(yv[-1]));
416:               }
417:               /* i-the core has not reached the current object statecnt yet, wait ... */
418:               continue;
419:             }

421:             if (addv == ADD_VALUES) {
422:               for (k= 0; k<cnt; k++) {
423:                 for (k1=0; k1<bs; k1++) sharedspace[idy[k]+k1] += xv[idx[k]+k1];
424:               }
425:             } else if (addv == INSERT_VALUES) {
426:               for (k= 0; k<cnt; k++) {
427:                 for (k1=0; k1<bs; k1++) sharedspace[idy[k]+k1] = xv[idx[k]+k1];
428:               }
429:             } else SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %D", addv);
430:             notdone--;
431:           }
432:           i++;
433:         }
434:       }
435:       VecRestoreArrayRead(xin,&xv);
436:     } else {
437:       /* PtoS: read shared xvalues, then write to sequential local yvalues */
438:       PetscInt notdone = from->notdone;

440:       vnode = (Vec_Node*)xin->data;
441:       if (!vnode->win && notdone) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"vector x must have type VECNODE with shared memory");
442:       VecGetArrayRead(xin,&xv);

444:       i = 0;
445:       while (notdone) {
446:         while (i < from->msize) {
447:           if (from->sharedspacesoffset && from->sharedspacesoffset[i] > -1) {
448:             cnt = from->sharedspacestarts[i+1] - from->sharedspacestarts[i];
449:             idy = from->sharedspaceindices + from->sharedspacestarts[i]; /* recv local y indices */
450:             idx = idy + from->sharedcnt;

452:             sharedspace = vnode->winarray[i];

454:             if (sharedspace[-1] != xv[-1]) {
455:               if (PetscRealPart(sharedspace[-1] - xv[-1]) > 0.0) {
456:                 PetscMPIInt msrank;
457:                 MPI_Comm_rank(mscomm,&msrank);
458:                 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"[%d] statecnt %g > [%d] my_statecnt %g",i,PetscRealPart(sharedspace[-1]),msrank,PetscRealPart(xv[-1]));
459:               }
460:               /* i-the core has not reached the current object state cnt yet, wait ... */
461:               continue;
462:             }

464:             if (addv==ADD_VALUES) {
465:               for (k=0; k<cnt; k++) {
466:                 for (k1=0; k1<bs; k1++) yv[idy[k]+k1] += sharedspace[idx[k]+k1]; /* read x shared values */
467:               }
468:             } else if (addv==INSERT_VALUES){
469:               for (k=0; k<cnt; k++) {
470:                 for (k1=0; k1<bs; k1++) yv[idy[k]+k1] = sharedspace[idx[k]+k1]; /* read x shared values */
471:               }
472:             } else SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %D", addv);

474:             notdone--;
475:           }
476:           i++;
477:         }
478:       }
479:       VecRestoreArrayRead(xin,&xv);
480:     }

482:     /* output y is parallel, ensure it is done -- would lose performance */
483:     MPI_Barrier(mscomm);
484:   }
485:   if (from->use_readyreceiver) {
486:     if (nrecvs) {MPI_Startall_irecv(from->starts[nrecvs]*bs,nrecvs,rwaits);}
487:     MPI_Barrier(comm);
488:   }

490:   /* wait on sends */
491:   if (nsends  && !to->use_alltoallv  && !to->use_window) {MPI_Waitall(nsends,swaits,sstatus);}
492:   VecRestoreArray(yin,&yv);
493:   return(0);
494: }

496: #undef PETSCMAP1_a
497: #undef PETSCMAP1_b
498: #undef PETSCMAP1
499: #undef BS