Actual source code: vscat.c

petsc-3.9.4 2018-09-11
Report Typos and Errors

  2: /*
  3:      Code for creating scatters between vectors. This file
  4:   includes the code for scattering between sequential vectors and
  5:   some special cases for parallel scatters.
  6: */

  8:  #include <petsc/private/isimpl.h>
  9:  #include <petsc/private/vecimpl.h>

 11: #if defined(PETSC_HAVE_VECCUDA)
 12: PETSC_INTERN PetscErrorCode VecScatterCUDAIndicesCreate_PtoP(PetscInt,PetscInt*,PetscInt,PetscInt*,PetscCUDAIndices*);
 13: PETSC_INTERN PetscErrorCode VecScatterCUDAIndicesCreate_StoS(PetscInt,PetscInt,PetscInt,PetscInt,PetscInt,PetscInt*,PetscInt*,PetscCUDAIndices*);
 14: PETSC_INTERN PetscErrorCode VecScatterCUDAIndicesDestroy(PetscCUDAIndices*);
 15: PETSC_INTERN PetscErrorCode VecScatterCUDA_StoS(Vec,Vec,PetscCUDAIndices,InsertMode,ScatterMode);
 16: #endif

 18: #if defined(PETSC_USE_DEBUG)
 19: /*
 20:      Checks if any indices are less than zero and generates an error
 21: */
 22: PetscErrorCode VecScatterCheckIndices_Private(PetscInt nmax,PetscInt n,const PetscInt *idx)
 23: {
 24:   PetscInt i;

 27:   for (i=0; i<n; i++) {
 28:     if (idx[i] < 0)     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative index %D at %D location",idx[i],i);
 29:     if (idx[i] >= nmax) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index %D at %D location greater than max %D",idx[i],i,nmax);
 30:   }
 31:   return(0);
 32: }
 33: #endif

 35: /*
 36:       This is special scatter code for when the entire parallel vector is copied to each processor.

 38:    This code was written by Cameron Cooper, Occidental College, Fall 1995,
 39:    will working at ANL as a SERS student.
 40: */
 41: PetscErrorCode VecScatterBegin_MPI_ToAll(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
 42: {
 44:   PetscInt       yy_n,xx_n;
 45:   PetscScalar    *xv,*yv;

 48:   VecGetLocalSize(y,&yy_n);
 49:   VecGetLocalSize(x,&xx_n);
 50:   VecGetArrayPair(x,y,&xv,&yv);

 52:   if (mode & SCATTER_REVERSE) {
 53:     PetscScalar          *xvt,*xvt2;
 54:     VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
 55:     PetscInt             i;
 56:     PetscMPIInt          *disply = scat->displx;

 58:     if (addv == INSERT_VALUES) {
 59:       PetscInt rstart,rend;
 60:       /*
 61:          copy the correct part of the local vector into the local storage of
 62:          the MPI one.  Note: this operation only makes sense if all the local
 63:          vectors have the same values
 64:       */
 65:       VecGetOwnershipRange(y,&rstart,&rend);
 66:       PetscMemcpy(yv,xv+rstart,yy_n*sizeof(PetscScalar));
 67:     } else {
 68:       MPI_Comm    comm;
 69:       PetscMPIInt rank;
 70:       PetscObjectGetComm((PetscObject)y,&comm);
 71:       MPI_Comm_rank(comm,&rank);
 72:       if (scat->work1) xvt = scat->work1;
 73:       else {
 74:         PetscMalloc1(xx_n,&xvt);
 75:         scat->work1 = xvt;
 76:       }
 77:       if (!rank) { /* I am the zeroth processor, values are accumulated here */
 78:         if   (scat->work2) xvt2 = scat->work2;
 79:         else {
 80:           PetscMalloc1(xx_n,&xvt2);
 81:           scat->work2 = xvt2;
 82:         }
 83:         MPI_Gatherv(yv,yy_n,MPIU_SCALAR,xvt2,scat->count,disply,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
 84:         MPI_Reduce(xv,xvt,xx_n,MPIU_SCALAR,MPIU_SUM,0,PetscObjectComm((PetscObject)ctx));
 85:         if (addv == ADD_VALUES) {
 86:           for (i=0; i<xx_n; i++) xvt[i] += xvt2[i];
 87: #if !defined(PETSC_USE_COMPLEX)
 88:         } else if (addv == MAX_VALUES) {
 89:           for (i=0; i<xx_n; i++) xvt[i] = PetscMax(xvt[i],xvt2[i]);
 90: #endif
 91:         } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
 92:         MPI_Scatterv(xvt,scat->count,disply,MPIU_SCALAR,yv,yy_n,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
 93:       } else {
 94:         MPI_Gatherv(yv,yy_n,MPIU_SCALAR,0, 0,0,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
 95:         MPI_Reduce(xv,xvt,xx_n,MPIU_SCALAR,MPIU_SUM,0,PetscObjectComm((PetscObject)ctx));
 96:         MPI_Scatterv(0,scat->count,disply,MPIU_SCALAR,yv,yy_n,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
 97:       }
 98:     }
 99:   } else {
100:     PetscScalar          *yvt;
101:     VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
102:     PetscInt             i;
103:     PetscMPIInt          *displx = scat->displx;

105:     if (addv == INSERT_VALUES) {
106:       MPI_Allgatherv(xv,xx_n,MPIU_SCALAR,yv,scat->count,displx,MPIU_SCALAR,PetscObjectComm((PetscObject)ctx));
107:     } else {
108:       if (scat->work1) yvt = scat->work1;
109:       else {
110:         PetscMalloc1(yy_n,&yvt);
111:         scat->work1 = yvt;
112:       }
113:       MPI_Allgatherv(xv,xx_n,MPIU_SCALAR,yvt,scat->count,displx,MPIU_SCALAR,PetscObjectComm((PetscObject)ctx));
114:       if (addv == ADD_VALUES) {
115:         for (i=0; i<yy_n; i++) yv[i] += yvt[i];
116: #if !defined(PETSC_USE_COMPLEX)
117:       } else if (addv == MAX_VALUES) {
118:         for (i=0; i<yy_n; i++) yv[i] = PetscMax(yv[i],yvt[i]);
119: #endif
120:       } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
121:     }
122:   }
123:   VecRestoreArrayPair(x,y,&xv,&yv);
124:   return(0);
125: }

127: PetscErrorCode VecScatterView_MPI_ToAll(VecScatter in,PetscViewer viewer)
128: {
130:   PetscBool      isascii;

133:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
134:   if (isascii) {
135:     PetscViewerASCIIPrintf(viewer,"Entire parallel vector is copied to each process\n");
136:   }
137:   return(0);
138: }

140: /*
141:       This is special scatter code for when the entire parallel vector is  copied to processor 0.

143: */
144: PetscErrorCode VecScatterBegin_MPI_ToOne(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
145: {
146:   PetscErrorCode    ierr;
147:   PetscMPIInt       rank;
148:   PetscInt          yy_n,xx_n;
149:   PetscScalar       *yv;
150:   const PetscScalar *xv;
151:   MPI_Comm          comm;

154:   VecGetLocalSize(y,&yy_n);
155:   VecGetLocalSize(x,&xx_n);
156:   VecGetArrayRead(x,&xv);
157:   VecGetArray(y,&yv);

159:   PetscObjectGetComm((PetscObject)x,&comm);
160:   MPI_Comm_rank(comm,&rank);

162:   /* --------  Reverse scatter; spread from processor 0 to other processors */
163:   if (mode & SCATTER_REVERSE) {
164:     PetscScalar          *yvt;
165:     VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
166:     PetscInt             i;
167:     PetscMPIInt          *disply = scat->displx;

169:     if (addv == INSERT_VALUES) {
170:       MPI_Scatterv((PetscScalar*)xv,scat->count,disply,MPIU_SCALAR,yv,yy_n,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
171:     } else {
172:       if (scat->work2) yvt = scat->work2;
173:       else {
174:         PetscInt xx_nt;
175:         MPI_Allreduce(&xx_n,&xx_nt,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)y));
176:         PetscMalloc1(xx_nt,&yvt);
177:         scat->work2 = yvt;
178:       }
179:       MPI_Scatterv((PetscScalar*)xv,scat->count,disply,MPIU_SCALAR,yvt,yy_n,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
180:       if (addv == ADD_VALUES) {
181:         for (i=0; i<yy_n; i++) yv[i] += yvt[i];
182: #if !defined(PETSC_USE_COMPLEX)
183:       } else if (addv == MAX_VALUES) {
184:         for (i=0; i<yy_n; i++) yv[i] = PetscMax(yv[i],yvt[i]);
185: #endif
186:       } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
187:     }
188:   /* ---------  Forward scatter; gather all values onto processor 0 */
189:   } else {
190:     PetscScalar          *yvt  = 0;
191:     VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
192:     PetscInt             i;
193:     PetscMPIInt          *displx = scat->displx;

195:     if (addv == INSERT_VALUES) {
196:       MPI_Gatherv((PetscScalar*)xv,xx_n,MPIU_SCALAR,yv,scat->count,displx,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
197:     } else {
198:       if (!rank) {
199:         if (scat->work1) yvt = scat->work1;
200:         else {
201:           PetscMalloc1(yy_n,&yvt);
202:           scat->work1 = yvt;
203:         }
204:       }
205:       MPI_Gatherv((PetscScalar*)xv,xx_n,MPIU_SCALAR,yvt,scat->count,displx,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
206:       if (!rank) {
207:         if (addv == ADD_VALUES) {
208:           for (i=0; i<yy_n; i++) yv[i] += yvt[i];
209: #if !defined(PETSC_USE_COMPLEX)
210:         } else if (addv == MAX_VALUES) {
211:           for (i=0; i<yy_n; i++) yv[i] = PetscMax(yv[i],yvt[i]);
212: #endif
213:         }  else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
214:       }
215:     }
216:   }
217:   VecRestoreArrayRead(x,&xv);
218:   VecRestoreArray(y,&yv);
219:   return(0);
220: }

222: /*
223:        The follow to are used for both VecScatterBegin_MPI_ToAll() and VecScatterBegin_MPI_ToOne()
224: */
225: PetscErrorCode VecScatterDestroy_MPI_ToAll(VecScatter ctx)
226: {
227:   VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
228:   PetscErrorCode       ierr;

231:   PetscFree(scat->work1);
232:   PetscFree(scat->work2);
233:   PetscFree3(ctx->todata,scat->count,scat->displx);
234:   return(0);
235: }

237: PetscErrorCode VecScatterDestroy_SGToSG(VecScatter ctx)
238: {

242:   PetscFree2(((VecScatter_Seq_General*)ctx->todata)->vslots,((VecScatter_Seq_General*)ctx->fromdata)->vslots);
243:   PetscFree2(ctx->todata,ctx->fromdata);
244:   return(0);
245: }

247: PetscErrorCode VecScatterDestroy_SGToSS(VecScatter ctx)
248: {

252:   PetscFree(((VecScatter_Seq_General*)ctx->fromdata)->vslots);
253:   PetscFree2(ctx->todata,ctx->fromdata);
254:   return(0);
255: }

257: PetscErrorCode VecScatterDestroy_SSToSG(VecScatter ctx)
258: {

262:   PetscFree(((VecScatter_Seq_General*)ctx->todata)->vslots);
263:   PetscFree2(ctx->todata,ctx->fromdata);
264:   return(0);
265: }

267: PetscErrorCode VecScatterDestroy_SSToSS(VecScatter ctx)
268: {

272:   PetscFree2(ctx->todata,ctx->fromdata);
273:   return(0);
274: }

276: /* -------------------------------------------------------------------------*/
277: PetscErrorCode VecScatterCopy_MPI_ToAll(VecScatter in,VecScatter out)
278: {
279:   VecScatter_MPI_ToAll *in_to = (VecScatter_MPI_ToAll*)in->todata,*sto;
280:   PetscErrorCode       ierr;
281:   PetscMPIInt          size,*count,*displx;
282:   PetscInt             i;

285:   out->ops->begin   = in->ops->begin;
286:   out->ops->end     = in->ops->end;
287:   out->ops->copy    = in->ops->copy;
288:   out->ops->destroy = in->ops->destroy;
289:   out->ops->view    = in->ops->view;

291:   MPI_Comm_size(PetscObjectComm((PetscObject)out),&size);
292:   PetscMalloc3(1,&sto,size,&count,size,&displx);
293:   sto->format = in_to->format;
294:   sto->count  = count;
295:   sto->displx = displx;
296:   for (i=0; i<size; i++) {
297:     sto->count[i]  = in_to->count[i];
298:     sto->displx[i] = in_to->displx[i];
299:   }
300:   sto->work1    = 0;
301:   sto->work2    = 0;
302:   out->todata   = (void*)sto;
303:   out->fromdata = (void*)0;
304:   return(0);
305: }

307: /* --------------------------------------------------------------------------------------*/
308: /*
309:         Scatter: sequential general to sequential general
310: */
311: PetscErrorCode VecScatterBegin_SGToSG(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
312: {
313:   VecScatter_Seq_General *gen_to   = (VecScatter_Seq_General*)ctx->todata;
314:   VecScatter_Seq_General *gen_from = (VecScatter_Seq_General*)ctx->fromdata;
315:   PetscErrorCode         ierr;
316:   PetscInt               i,n = gen_from->n,*fslots,*tslots;
317:   PetscScalar            *xv,*yv;
318: #if defined(PETSC_HAVE_VECCUDA)
319:   PetscBool              is_veccuda;
320: #endif

323: #if defined(PETSC_HAVE_VECCUDA)
324:   PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
325:   if (is_veccuda && x->valid_GPU_array == PETSC_OFFLOAD_GPU) {
326:     /* create the scatter indices if not done already */
327:     if (!ctx->spptr) {
328:       PetscInt tofirst = 0,tostep = 0,fromfirst = 0,fromstep = 0;
329:       fslots = gen_from->vslots;
330:       tslots = gen_to->vslots;
331:       VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
332:     }
333:     /* next do the scatter */
334:     VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
335:     return(0);
336:   }
337: #endif

339:   VecGetArrayPair(x,y,&xv,&yv);
340:   if (mode & SCATTER_REVERSE) {
341:     gen_to   = (VecScatter_Seq_General*)ctx->fromdata;
342:     gen_from = (VecScatter_Seq_General*)ctx->todata;
343:   }
344:   fslots = gen_from->vslots;
345:   tslots = gen_to->vslots;

347:   if (addv == INSERT_VALUES) {
348:     for (i=0; i<n; i++) yv[tslots[i]]  = xv[fslots[i]];
349:   } else if (addv == ADD_VALUES) {
350:     for (i=0; i<n; i++) yv[tslots[i]] += xv[fslots[i]];
351: #if !defined(PETSC_USE_COMPLEX)
352:   } else if (addv == MAX_VALUES) {
353:     for (i=0; i<n; i++) yv[tslots[i]] = PetscMax(yv[tslots[i]],xv[fslots[i]]);
354: #endif
355:   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
356:   VecRestoreArrayPair(x,y,&xv,&yv);
357:   return(0);
358: }

360: /*
361:     Scatter: sequential general to sequential stride 1
362: */
363: PetscErrorCode VecScatterBegin_SGToSS_Stride1(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
364: {
365:   VecScatter_Seq_Stride  *gen_to   = (VecScatter_Seq_Stride*)ctx->todata;
366:   VecScatter_Seq_General *gen_from = (VecScatter_Seq_General*)ctx->fromdata;
367:   PetscInt               i,n = gen_from->n,*fslots = gen_from->vslots;
368:   PetscErrorCode         ierr;
369:   PetscInt               first = gen_to->first;
370:   PetscScalar            *xv,*yv;
371: #if defined(PETSC_HAVE_VECCUDA)
372:   PetscBool              is_veccuda;
373: #endif

376: #if defined(PETSC_HAVE_VECCUDA)
377:   PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
378:   if (is_veccuda && x->valid_GPU_array == PETSC_OFFLOAD_GPU) {
379:     /* create the scatter indices if not done already */
380:     if (!ctx->spptr) {
381:       PetscInt tofirst = first,tostep = 1,fromfirst = 0,fromstep = 0;
382:       PetscInt *tslots = 0;
383:       VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
384:     }
385:     /* next do the scatter */
386:     VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
387:     return(0);
388:   }
389: #endif

391:   VecGetArrayPair(x,y,&xv,&yv);
392:   if (mode & SCATTER_REVERSE) {
393:     xv += first;
394:     if (addv == INSERT_VALUES) {
395:       for (i=0; i<n; i++) yv[fslots[i]] = xv[i];
396:     } else if (addv == ADD_VALUES) {
397:       for (i=0; i<n; i++) yv[fslots[i]] += xv[i];
398: #if !defined(PETSC_USE_COMPLEX)
399:     } else if (addv == MAX_VALUES) {
400:       for (i=0; i<n; i++) yv[fslots[i]] = PetscMax(yv[fslots[i]],xv[i]);
401: #endif
402:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
403:   } else {
404:     yv += first;
405:     if (addv == INSERT_VALUES) {
406:       for (i=0; i<n; i++) yv[i] = xv[fslots[i]];
407:     } else if (addv == ADD_VALUES) {
408:       for (i=0; i<n; i++) yv[i] += xv[fslots[i]];
409: #if !defined(PETSC_USE_COMPLEX)
410:     } else if (addv == MAX_VALUES) {
411:       for (i=0; i<n; i++) yv[i] = PetscMax(yv[i],xv[fslots[i]]);
412: #endif
413:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
414:   }
415:   VecRestoreArrayPair(x,y,&xv,&yv);
416:   return(0);
417: }

419: /*
420:    Scatter: sequential general to sequential stride
421: */
422: PetscErrorCode VecScatterBegin_SGToSS(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
423: {
424:   VecScatter_Seq_Stride  *gen_to   = (VecScatter_Seq_Stride*)ctx->todata;
425:   VecScatter_Seq_General *gen_from = (VecScatter_Seq_General*)ctx->fromdata;
426:   PetscInt               i,n = gen_from->n,*fslots = gen_from->vslots;
427:   PetscErrorCode         ierr;
428:   PetscInt               first = gen_to->first,step = gen_to->step;
429:   PetscScalar            *xv,*yv;
430: #if defined(PETSC_HAVE_VECCUDA)
431:   PetscBool              is_veccuda;
432: #endif

435: #if defined(PETSC_HAVE_VECCUDA)
436:   PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
437:   if (is_veccuda && x->valid_GPU_array == PETSC_OFFLOAD_GPU) {
438:     /* create the scatter indices if not done already */
439:     if (!ctx->spptr) {
440:       PetscInt tofirst = first,tostep = step,fromfirst = 0,fromstep = 0;
441:       PetscInt * tslots = 0;
442:       VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
443:     }
444:     /* next do the scatter */
445:     VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
446:     return(0);
447:   }
448: #endif

450:   VecGetArrayPair(x,y,&xv,&yv);
451:   if (mode & SCATTER_REVERSE) {
452:     if (addv == INSERT_VALUES) {
453:       for (i=0; i<n; i++) yv[fslots[i]] = xv[first + i*step];
454:     } else if (addv == ADD_VALUES) {
455:       for (i=0; i<n; i++) yv[fslots[i]] += xv[first + i*step];
456: #if !defined(PETSC_USE_COMPLEX)
457:     } else if (addv == MAX_VALUES) {
458:       for (i=0; i<n; i++) yv[fslots[i]] = PetscMax(yv[fslots[i]],xv[first + i*step]);
459: #endif
460:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
461:   } else {
462:     if (addv == INSERT_VALUES) {
463:       for (i=0; i<n; i++) yv[first + i*step] = xv[fslots[i]];
464:     } else if (addv == ADD_VALUES) {
465:       for (i=0; i<n; i++) yv[first + i*step] += xv[fslots[i]];
466: #if !defined(PETSC_USE_COMPLEX)
467:     } else if (addv == MAX_VALUES) {
468:       for (i=0; i<n; i++) yv[first + i*step] = PetscMax(yv[first + i*step],xv[fslots[i]]);
469: #endif
470:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
471:   }
472:   VecRestoreArrayPair(x,y,&xv,&yv);
473:   return(0);
474: }

476: /*
477:     Scatter: sequential stride 1 to sequential general
478: */
479: PetscErrorCode VecScatterBegin_SSToSG_Stride1(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
480: {
481:   VecScatter_Seq_Stride  *gen_from = (VecScatter_Seq_Stride*)ctx->fromdata;
482:   VecScatter_Seq_General *gen_to   = (VecScatter_Seq_General*)ctx->todata;
483:   PetscInt               i,n = gen_from->n,*tslots = gen_to->vslots;
484:   PetscErrorCode         ierr;
485:   PetscInt               first = gen_from->first;
486:   PetscScalar            *xv,*yv;
487: #if defined(PETSC_HAVE_VECCUDA)
488:   PetscBool              is_veccuda;
489: #endif

492: #if defined(PETSC_HAVE_VECCUDA)
493:   PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
494:   if (is_veccuda && x->valid_GPU_array == PETSC_OFFLOAD_GPU) {
495:     /* create the scatter indices if not done already */
496:     if (!ctx->spptr) {
497:       PetscInt tofirst = 0,tostep = 0,fromfirst = first,fromstep = 1;
498:       PetscInt *fslots = 0;
499:       VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
500:     }
501:     /* next do the scatter */
502:     VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
503:     return(0);
504:   }
505: #endif

507:   VecGetArrayPair(x,y,&xv,&yv);
508:   if (mode & SCATTER_REVERSE) {
509:     yv += first;
510:     if (addv == INSERT_VALUES) {
511:       for (i=0; i<n; i++) yv[i] = xv[tslots[i]];
512:     } else  if (addv == ADD_VALUES) {
513:       for (i=0; i<n; i++) yv[i] += xv[tslots[i]];
514: #if !defined(PETSC_USE_COMPLEX)
515:     } else  if (addv == MAX_VALUES) {
516:       for (i=0; i<n; i++) yv[i] = PetscMax(yv[i],xv[tslots[i]]);
517: #endif
518:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
519:   } else {
520:     xv += first;
521:     if (addv == INSERT_VALUES) {
522:       for (i=0; i<n; i++) yv[tslots[i]] = xv[i];
523:     } else  if (addv == ADD_VALUES) {
524:       for (i=0; i<n; i++) yv[tslots[i]] += xv[i];
525: #if !defined(PETSC_USE_COMPLEX)
526:     } else  if (addv == MAX_VALUES) {
527:       for (i=0; i<n; i++) yv[tslots[i]] = PetscMax(yv[tslots[i]],xv[i]);
528: #endif
529:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
530:   }
531:   VecRestoreArrayPair(x,y,&xv,&yv);
532:   return(0);
533: }

535: /*
536:    Scatter: sequential stride to sequential general
537: */
538: PetscErrorCode VecScatterBegin_SSToSG(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
539: {
540:   VecScatter_Seq_Stride  *gen_from = (VecScatter_Seq_Stride*)ctx->fromdata;
541:   VecScatter_Seq_General *gen_to   = (VecScatter_Seq_General*)ctx->todata;
542:   PetscInt               i,n = gen_from->n,*tslots = gen_to->vslots;
543:   PetscErrorCode         ierr;
544:   PetscInt               first = gen_from->first,step = gen_from->step;
545:   PetscScalar            *xv,*yv;
546: #if defined(PETSC_HAVE_VECCUDA)
547:   PetscBool              is_veccuda;
548: #endif

551: #if defined(PETSC_HAVE_VECCUDA)
552:   PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
553:   if (is_veccuda && x->valid_GPU_array == PETSC_OFFLOAD_GPU) {
554:     /* create the scatter indices if not done already */
555:     if (!ctx->spptr) {
556:       PetscInt tofirst = 0,tostep = 0,fromfirst = first,fromstep = step;
557:       PetscInt *fslots = 0;
558:       VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
559:     }
560:     /* next do the scatter */
561:     VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
562:     return(0);
563:   }
564: #endif

566:   VecGetArrayPair(x,y,&xv,&yv);
567:   if (mode & SCATTER_REVERSE) {
568:     if (addv == INSERT_VALUES) {
569:       for (i=0; i<n; i++) yv[first + i*step] = xv[tslots[i]];
570:     } else if (addv == ADD_VALUES) {
571:       for (i=0; i<n; i++) yv[first + i*step] += xv[tslots[i]];
572: #if !defined(PETSC_USE_COMPLEX)
573:     } else if (addv == MAX_VALUES) {
574:       for (i=0; i<n; i++) yv[first + i*step] = PetscMax(yv[first + i*step],xv[tslots[i]]);
575: #endif
576:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
577:   } else {
578:     if (addv == INSERT_VALUES) {
579:       for (i=0; i<n; i++) yv[tslots[i]] = xv[first + i*step];
580:     } else if (addv == ADD_VALUES) {
581:       for (i=0; i<n; i++) yv[tslots[i]] += xv[first + i*step];
582: #if !defined(PETSC_USE_COMPLEX)
583:     } else if (addv == MAX_VALUES) {
584:       for (i=0; i<n; i++) yv[tslots[i]] = PetscMax(yv[tslots[i]],xv[first + i*step]);
585: #endif
586:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
587:   }
588:   VecRestoreArrayPair(x,y,&xv,&yv);
589:   return(0);
590: }

592: PetscErrorCode VecScatterView_SSToSG(VecScatter in,PetscViewer viewer)
593: {
594:   PetscErrorCode         ierr;
595:   VecScatter_Seq_Stride  *in_from = (VecScatter_Seq_Stride*)in->fromdata;
596:   VecScatter_Seq_General *in_to   = (VecScatter_Seq_General*)in->todata;
597:   PetscInt               i;
598:   PetscBool              isascii;

601:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
602:   if (isascii) {
603:     PetscViewerASCIIPrintf(viewer,"Sequential stride to general scatter\n");
604:     for (i=0; i<in_to->n; i++) {
605:       PetscViewerASCIIPrintf(viewer,"%D to %D\n",in_from->first + in_from->step*i,in_to->vslots[i]);
606:     }
607:   }
608:   return(0);
609: }
610: /*
611:      Scatter: sequential stride to sequential stride
612: */
613: PetscErrorCode VecScatterBegin_SSToSS(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
614: {
615:   VecScatter_Seq_Stride *gen_to   = (VecScatter_Seq_Stride*)ctx->todata;
616:   VecScatter_Seq_Stride *gen_from = (VecScatter_Seq_Stride*)ctx->fromdata;
617:   PetscInt              i,n = gen_from->n,to_first = gen_to->first,to_step = gen_to->step;
618:   PetscErrorCode        ierr;
619:   PetscInt              from_first = gen_from->first,from_step = gen_from->step;
620:   PetscScalar           *xv,*yv;
621: #if defined(PETSC_HAVE_VECCUDA)
622:   PetscBool              is_veccuda;
623: #endif

626: #if defined(PETSC_HAVE_VECCUDA)
627:   PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
628:   if (is_veccuda && x->valid_GPU_array == PETSC_OFFLOAD_GPU) {
629:     /* create the scatter indices if not done already */
630:     if (!ctx->spptr) {
631:       PetscInt *tslots = 0,*fslots = 0;
632:       VecScatterCUDAIndicesCreate_StoS(n,to_first,from_first,to_step,from_step,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
633:     }
634:     /* next do the scatter */
635:     VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
636:     return(0);
637:   }
638: #endif

640:   VecGetArrayPair(x,y,&xv,&yv);
641:   if (mode & SCATTER_REVERSE) {
642:     from_first = gen_to->first;
643:     to_first   = gen_from->first;
644:     from_step  = gen_to->step;
645:     to_step    = gen_from->step;
646:   }

648:   if (addv == INSERT_VALUES) {
649:     if (to_step == 1 && from_step == 1) {
650:       PetscMemcpy(yv+to_first,xv+from_first,n*sizeof(PetscScalar));
651:     } else  {
652:       for (i=0; i<n; i++) yv[to_first + i*to_step] = xv[from_first+i*from_step];
653:     }
654:   } else if (addv == ADD_VALUES) {
655:     if (to_step == 1 && from_step == 1) {
656:       yv += to_first; xv += from_first;
657:       for (i=0; i<n; i++) yv[i] += xv[i];
658:     } else {
659:       for (i=0; i<n; i++) yv[to_first + i*to_step] += xv[from_first+i*from_step];
660:     }
661: #if !defined(PETSC_USE_COMPLEX)
662:   } else if (addv == MAX_VALUES) {
663:     if (to_step == 1 && from_step == 1) {
664:       yv += to_first; xv += from_first;
665:       for (i=0; i<n; i++) yv[i] = PetscMax(yv[i],xv[i]);
666:     } else {
667:       for (i=0; i<n; i++) yv[to_first + i*to_step] = PetscMax(yv[to_first + i*to_step],xv[from_first+i*from_step]);
668:     }
669: #endif
670:   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
671:   VecRestoreArrayPair(x,y,&xv,&yv);
672:   return(0);
673: }

675: /* --------------------------------------------------------------------------*/


678: PetscErrorCode VecScatterCopy_SGToSG(VecScatter in,VecScatter out)
679: {
680:   PetscErrorCode         ierr;
681:   VecScatter_Seq_General *in_to   = (VecScatter_Seq_General*)in->todata,*out_to = NULL;
682:   VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata,*out_from = NULL;

685:   out->ops->begin   = in->ops->begin;
686:   out->ops->end     = in->ops->end;
687:   out->ops->copy    = in->ops->copy;
688:   out->ops->destroy = in->ops->destroy;
689:   out->ops->view    = in->ops->view;

691:   PetscMalloc2(1,&out_to,1,&out_from);
692:   PetscMalloc2(in_to->n,&out_to->vslots,in_from->n,&out_from->vslots);
693:   out_to->n                    = in_to->n;
694:   out_to->format               = in_to->format;
695:   out_to->nonmatching_computed = PETSC_FALSE;
696:   out_to->slots_nonmatching    = 0;
697:   out_to->is_copy              = PETSC_FALSE;
698:   PetscMemcpy(out_to->vslots,in_to->vslots,(out_to->n)*sizeof(PetscInt));

700:   out_from->n                    = in_from->n;
701:   out_from->format               = in_from->format;
702:   out_from->nonmatching_computed = PETSC_FALSE;
703:   out_from->slots_nonmatching    = 0;
704:   out_from->is_copy              = PETSC_FALSE;
705:   PetscMemcpy(out_from->vslots,in_from->vslots,(out_from->n)*sizeof(PetscInt));

707:   out->todata   = (void*)out_to;
708:   out->fromdata = (void*)out_from;
709:   return(0);
710: }

712: PetscErrorCode VecScatterView_SGToSG(VecScatter in,PetscViewer viewer)
713: {
714:   PetscErrorCode         ierr;
715:   VecScatter_Seq_General *in_to   = (VecScatter_Seq_General*)in->todata;
716:   VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata;
717:   PetscInt               i;
718:   PetscBool              isascii;

721:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
722:   if (isascii) {
723:     PetscViewerASCIIPrintf(viewer,"Sequential general scatter\n");
724:     for (i=0; i<in_to->n; i++) {
725:       PetscViewerASCIIPrintf(viewer,"%D to %D\n",in_from->vslots[i],in_to->vslots[i]);
726:     }
727:   }
728:   return(0);
729: }


732: PetscErrorCode VecScatterCopy_SGToSS(VecScatter in,VecScatter out)
733: {
734:   PetscErrorCode         ierr;
735:   VecScatter_Seq_Stride  *in_to   = (VecScatter_Seq_Stride*)in->todata,*out_to = NULL;
736:   VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata,*out_from = NULL;

739:   out->ops->begin   = in->ops->begin;
740:   out->ops->end     = in->ops->end;
741:   out->ops->copy    = in->ops->copy;
742:   out->ops->destroy = in->ops->destroy;
743:   out->ops->view    = in->ops->view;

745:   PetscMalloc2(1,&out_to,1,&out_from);
746:   PetscMalloc1(in_from->n,&out_from->vslots);
747:   out_to->n      = in_to->n;
748:   out_to->format = in_to->format;
749:   out_to->first  = in_to->first;
750:   out_to->step   = in_to->step;
751:   out_to->format = in_to->format;

753:   out_from->n                    = in_from->n;
754:   out_from->format               = in_from->format;
755:   out_from->nonmatching_computed = PETSC_FALSE;
756:   out_from->slots_nonmatching    = 0;
757:   out_from->is_copy              = PETSC_FALSE;
758:   PetscMemcpy(out_from->vslots,in_from->vslots,(out_from->n)*sizeof(PetscInt));

760:   out->todata   = (void*)out_to;
761:   out->fromdata = (void*)out_from;
762:   return(0);
763: }

765: PetscErrorCode VecScatterView_SGToSS(VecScatter in,PetscViewer viewer)
766: {
767:   PetscErrorCode         ierr;
768:   VecScatter_Seq_Stride  *in_to   = (VecScatter_Seq_Stride*)in->todata;
769:   VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata;
770:   PetscInt               i;
771:   PetscBool              isascii;

774:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
775:   if (isascii) {
776:     PetscViewerASCIIPrintf(viewer,"Sequential general scatter to stride\n");
777:     for (i=0; i<in_to->n; i++) {
778:       PetscViewerASCIIPrintf(viewer,"%D to %D\n",in_from->vslots[i],in_to->first + in_to->step*i);
779:     }
780:   }
781:   return(0);
782: }

784: /* --------------------------------------------------------------------------*/
785: /*
786:     Scatter: parallel to sequential vector, sequential strides for both.
787: */
788: PetscErrorCode VecScatterCopy_SSToSS(VecScatter in,VecScatter out)
789: {
790:   VecScatter_Seq_Stride *in_to   = (VecScatter_Seq_Stride*)in->todata,*out_to = NULL;
791:   VecScatter_Seq_Stride *in_from = (VecScatter_Seq_Stride*)in->fromdata,*out_from = NULL;
792:   PetscErrorCode        ierr;

795:   out->ops->begin   = in->ops->begin;
796:   out->ops->end     = in->ops->end;
797:   out->ops->copy    = in->ops->copy;
798:   out->ops->destroy = in->ops->destroy;
799:   out->ops->view    = in->ops->view;

801:   PetscMalloc2(1,&out_to,1,&out_from);
802:   out_to->n       = in_to->n;
803:   out_to->format  = in_to->format;
804:   out_to->first   = in_to->first;
805:   out_to->step    = in_to->step;
806:   out_to->format  = in_to->format;
807:   out_from->n     = in_from->n;
808:   out_from->format= in_from->format;
809:   out_from->first = in_from->first;
810:   out_from->step  = in_from->step;
811:   out_from->format= in_from->format;
812:   out->todata     = (void*)out_to;
813:   out->fromdata   = (void*)out_from;
814:   return(0);
815: }

817: PetscErrorCode VecScatterView_SSToSS(VecScatter in,PetscViewer viewer)
818: {
819:   VecScatter_Seq_Stride *in_to   = (VecScatter_Seq_Stride*)in->todata;
820:   VecScatter_Seq_Stride *in_from = (VecScatter_Seq_Stride*)in->fromdata;
821:   PetscErrorCode        ierr;
822:   PetscBool             isascii;

825:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
826:   if (isascii) {
827:     PetscViewerASCIIPrintf(viewer,"Sequential stride count %D start %D step to start %D stride %D\n",in_to->n,in_to->first,in_to->step,in_from->first,in_from->step);
828:   }
829:   return(0);
830: }


833: #if defined(PETSC_HAVE_MPI_WIN_CREATE_FEATURE)
834: extern PetscErrorCode VecScatterCreateLocal_PtoS_MPI3(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter);
835: extern PetscErrorCode VecScatterCreateLocal_PtoP_MPI3(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter);
836: extern PetscErrorCode VecScatterCreateLocal_StoP_MPI3(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter);
837: #endif

839: extern PetscErrorCode VecScatterCreateLocal_PtoS_MPI1(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter);
840: extern PetscErrorCode VecScatterCreateLocal_PtoP_MPI1(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter);
841: extern PetscErrorCode VecScatterCreateLocal_StoP_MPI1(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter);

843: static PetscErrorCode GetInputISType_private(VecScatter,PetscInt,PetscInt,PetscInt*,IS*,PetscInt*,IS*);

845: /* =======================================================================*/
846: #define VEC_SEQ_ID 0
847: #define VEC_MPI_ID 1
848: #define IS_GENERAL_ID 0
849: #define IS_STRIDE_ID  1
850: #define IS_BLOCK_ID   2

852: /*
853:    Blocksizes we have optimized scatters for
854: */

856: #define VecScatterOptimizedBS(mbs) (2 <= mbs)


859: PetscErrorCode  VecScatterCreateEmpty(MPI_Comm comm,VecScatter *newctx)
860: {
861:   VecScatter     ctx;

865:   PetscHeaderCreate(ctx,VEC_SCATTER_CLASSID,"VecScatter","VecScatter","Vec",comm,VecScatterDestroy,VecScatterView);
866:   ctx->inuse               = PETSC_FALSE;
867:   ctx->beginandendtogether = PETSC_FALSE;
868:   PetscOptionsGetBool(NULL,NULL,"-vecscatter_merge",&ctx->beginandendtogether,NULL);
869:   if (ctx->beginandendtogether) {
870:     PetscInfo(ctx,"Using combined (merged) vector scatter begin and end\n");
871:   }
872:   ctx->packtogether = PETSC_FALSE;
873:   PetscOptionsGetBool(NULL,NULL,"-vecscatter_packtogether",&ctx->packtogether,NULL);
874:   if (ctx->packtogether) {
875:     PetscInfo(ctx,"Pack all messages before sending\n");
876:   }
877:   *newctx = ctx;
878:   return(0);
879: }

881: /* -------------------------------------------------- */
882: PetscErrorCode VecScatterCreate_Seq(VecScatter ctx)
883: {
884:   PetscErrorCode    ierr;
885:   PetscInt          ix_type=-1,iy_type=-1;
886:   IS                tix=NULL,tiy=NULL,ix=ctx->from_is,iy=ctx->to_is;
887:   Vec               xin=ctx->from_v;

890:   PetscObjectChangeTypeName((PetscObject)ctx,VECSCATTERSEQ);
891:   GetInputISType_private(ctx,VEC_SEQ_ID,VEC_SEQ_ID,&ix_type,&tix,&iy_type,&tiy);
892:   if (tix) ix = tix;
893:   if (tiy) iy = tiy;

895:   if (ix_type == IS_GENERAL_ID && iy_type == IS_GENERAL_ID) {
896:     PetscInt               nx,ny;
897:     const PetscInt         *idx,*idy;
898:     VecScatter_Seq_General *to = NULL,*from = NULL;

900:     ISGetLocalSize(ix,&nx);
901:     ISGetLocalSize(iy,&ny);
902:     if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
903:     ISGetIndices(ix,&idx);
904:     ISGetIndices(iy,&idy);
905:     PetscMalloc2(1,&to,1,&from);
906:     PetscMalloc2(nx,&to->vslots,nx,&from->vslots);
907:     to->n = nx;
908: #if defined(PETSC_USE_DEBUG)
909:     VecScatterCheckIndices_Private(ctx->to_n,ny,idy);
910: #endif
911:     PetscMemcpy(to->vslots,idy,nx*sizeof(PetscInt));
912:     from->n = nx;
913: #if defined(PETSC_USE_DEBUG)
914:     VecScatterCheckIndices_Private(ctx->from_n,nx,idx);
915: #endif
916:      PetscMemcpy(from->vslots,idx,nx*sizeof(PetscInt));
917:     to->format          = VEC_SCATTER_SEQ_GENERAL;
918:     from->format        = VEC_SCATTER_SEQ_GENERAL;
919:     ctx->todata       = (void*)to;
920:     ctx->fromdata     = (void*)from;
921:     ctx->ops->begin   = VecScatterBegin_SGToSG;
922:     ctx->ops->end     = 0;
923:     ctx->ops->destroy = VecScatterDestroy_SGToSG;
924:     ctx->ops->copy    = VecScatterCopy_SGToSG;
925:     ctx->ops->view    = VecScatterView_SGToSG;
926:     PetscInfo(xin,"Special case: sequential vector general scatter\n");
927:     goto functionend;
928:   } else if (ix_type == IS_STRIDE_ID &&  iy_type == IS_STRIDE_ID) {
929:     PetscInt              nx,ny,to_first,to_step,from_first,from_step;
930:     VecScatter_Seq_Stride *from8 = NULL,*to8 = NULL;

932:     ISGetLocalSize(ix,&nx);
933:     ISGetLocalSize(iy,&ny);
934:     if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
935:     ISStrideGetInfo(iy,&to_first,&to_step);
936:     ISStrideGetInfo(ix,&from_first,&from_step);
937:     PetscMalloc2(1,&to8,1,&from8);
938:     to8->n            = nx;
939:     to8->first        = to_first;
940:     to8->step         = to_step;
941:     from8->n          = nx;
942:     from8->first      = from_first;
943:     from8->step       = from_step;
944:     to8->format         = VEC_SCATTER_SEQ_STRIDE;
945:     from8->format       = VEC_SCATTER_SEQ_STRIDE;
946:     ctx->todata       = (void*)to8;
947:     ctx->fromdata     = (void*)from8;
948:     ctx->ops->begin   = VecScatterBegin_SSToSS;
949:     ctx->ops->end     = 0;
950:     ctx->ops->destroy = VecScatterDestroy_SSToSS;
951:     ctx->ops->copy    = VecScatterCopy_SSToSS;
952:     ctx->ops->view    = VecScatterView_SSToSS;
953:     PetscInfo(xin,"Special case: sequential vector stride to stride\n");
954:     goto functionend;
955:   } else if (ix_type == IS_GENERAL_ID && iy_type == IS_STRIDE_ID) {
956:     PetscInt               nx,ny,first,step;
957:     const PetscInt         *idx;
958:     VecScatter_Seq_General *from9 = NULL;
959:     VecScatter_Seq_Stride  *to9   = NULL;

961:     ISGetLocalSize(ix,&nx);
962:     ISGetIndices(ix,&idx);
963:     ISGetLocalSize(iy,&ny);
964:     ISStrideGetInfo(iy,&first,&step);
965:     if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
966:     PetscMalloc2(1,&to9,1,&from9);
967:     PetscMalloc1(nx,&from9->vslots);
968:     to9->n     = nx;
969:     to9->first = first;
970:     to9->step  = step;
971:     from9->n   = nx;
972: #if defined(PETSC_USE_DEBUG)
973:     VecScatterCheckIndices_Private(ctx->from_n,nx,idx);
974: #endif
975:     PetscMemcpy(from9->vslots,idx,nx*sizeof(PetscInt));
976:     ctx->todata    = (void*)to9; ctx->fromdata = (void*)from9;
977:     if (step == 1)  ctx->ops->begin = VecScatterBegin_SGToSS_Stride1;
978:     else            ctx->ops->begin = VecScatterBegin_SGToSS;
979:     ctx->ops->destroy   = VecScatterDestroy_SGToSS;
980:     ctx->ops->end       = 0;
981:     ctx->ops->copy      = VecScatterCopy_SGToSS;
982:     ctx->ops->view      = VecScatterView_SGToSS;
983:     to9->format      = VEC_SCATTER_SEQ_STRIDE;
984:     from9->format    = VEC_SCATTER_SEQ_GENERAL;
985:     PetscInfo(xin,"Special case: sequential vector general to stride\n");
986:     goto functionend;
987:   } else if (ix_type == IS_STRIDE_ID && iy_type == IS_GENERAL_ID) {
988:     PetscInt               nx,ny,first,step;
989:     const PetscInt         *idy;
990:     VecScatter_Seq_General *to10 = NULL;
991:     VecScatter_Seq_Stride  *from10 = NULL;

993:     ISGetLocalSize(ix,&nx);
994:     ISGetIndices(iy,&idy);
995:     ISGetLocalSize(iy,&ny);
996:     ISStrideGetInfo(ix,&first,&step);
997:     if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
998:     PetscMalloc2(1,&to10,1,&from10);
999:     PetscMalloc1(nx,&to10->vslots);
1000:     from10->n     = nx;
1001:     from10->first = first;
1002:     from10->step  = step;
1003:     to10->n       = nx;
1004: #if defined(PETSC_USE_DEBUG)
1005:     VecScatterCheckIndices_Private(ctx->to_n,ny,idy);
1006: #endif
1007:     PetscMemcpy(to10->vslots,idy,nx*sizeof(PetscInt));
1008:     ctx->todata   = (void*)to10;
1009:     ctx->fromdata = (void*)from10;
1010:     if (step == 1) ctx->ops->begin = VecScatterBegin_SSToSG_Stride1;
1011:     else           ctx->ops->begin = VecScatterBegin_SSToSG;
1012:     ctx->ops->destroy = VecScatterDestroy_SSToSG;
1013:     ctx->ops->end     = 0;
1014:     ctx->ops->copy    = 0;
1015:     ctx->ops->view    = VecScatterView_SSToSG;
1016:     to10->format   = VEC_SCATTER_SEQ_GENERAL;
1017:     from10->format = VEC_SCATTER_SEQ_STRIDE;
1018:     PetscInfo(xin,"Special case: sequential vector stride to general\n");
1019:     goto functionend;
1020:   } else {
1021:     PetscInt               nx,ny;
1022:     const PetscInt         *idx,*idy;
1023:     VecScatter_Seq_General *to11 = NULL,*from11 = NULL;
1024:     PetscBool              idnx,idny;

1026:     ISGetLocalSize(ix,&nx);
1027:     ISGetLocalSize(iy,&ny);
1028:     if (nx != ny) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match, in %D out %D",nx,ny);

1030:     ISIdentity(ix,&idnx);
1031:     ISIdentity(iy,&idny);
1032:     if (idnx && idny) {
1033:       VecScatter_Seq_Stride *to13 = NULL,*from13 = NULL;
1034:       PetscMalloc2(1,&to13,1,&from13);
1035:       to13->n       = nx;
1036:       to13->first   = 0;
1037:       to13->step    = 1;
1038:       from13->n     = nx;
1039:       from13->first = 0;
1040:       from13->step  = 1;
1041:       to13->format    = VEC_SCATTER_SEQ_STRIDE;
1042:       from13->format  = VEC_SCATTER_SEQ_STRIDE;
1043:       ctx->todata   = (void*)to13;
1044:       ctx->fromdata = (void*)from13;
1045:       ctx->ops->begin    = VecScatterBegin_SSToSS;
1046:       ctx->ops->end      = 0;
1047:       ctx->ops->destroy  = VecScatterDestroy_SSToSS;
1048:       ctx->ops->copy     = VecScatterCopy_SSToSS;
1049:       ctx->ops->view     = VecScatterView_SSToSS;
1050:       PetscInfo(xin,"Special case: sequential copy\n");
1051:       goto functionend;
1052:     }

1054:     ISGetIndices(iy,&idy);
1055:     ISGetIndices(ix,&idx);
1056:     PetscMalloc2(1,&to11,1,&from11);
1057:     PetscMalloc2(nx,&to11->vslots,nx,&from11->vslots);
1058:     to11->n = nx;
1059: #if defined(PETSC_USE_DEBUG)
1060:     VecScatterCheckIndices_Private(ctx->to_n,ny,idy);
1061: #endif
1062:     PetscMemcpy(to11->vslots,idy,nx*sizeof(PetscInt));
1063:     from11->n = nx;
1064: #if defined(PETSC_USE_DEBUG)
1065:     VecScatterCheckIndices_Private(ctx->from_n,nx,idx);
1066: #endif
1067:     PetscMemcpy(from11->vslots,idx,nx*sizeof(PetscInt));
1068:     to11->format        = VEC_SCATTER_SEQ_GENERAL;
1069:     from11->format      = VEC_SCATTER_SEQ_GENERAL;
1070:     ctx->todata       = (void*)to11;
1071:     ctx->fromdata     = (void*)from11;
1072:     ctx->ops->begin   = VecScatterBegin_SGToSG;
1073:     ctx->ops->end     = 0;
1074:     ctx->ops->destroy = VecScatterDestroy_SGToSG;
1075:     ctx->ops->copy    = VecScatterCopy_SGToSG;
1076:     ctx->ops->view    = VecScatterView_SGToSG;
1077:     ISRestoreIndices(ix,&idx);
1078:     ISRestoreIndices(iy,&idy);
1079:     PetscInfo(xin,"Sequential vector scatter with block indices\n");
1080:     goto functionend;
1081:   }
1082:   functionend:
1083:   ISDestroy(&tix);
1084:   ISDestroy(&tiy);
1085:   VecScatterViewFromOptions(ctx,NULL,"-vecscatter_view");
1086:   return(0);
1087: }

1089: static PetscErrorCode VecScatterCreate_PtoS(VecScatter ctx)
1090: {
1091:   PetscErrorCode    ierr;
1092:   PetscMPIInt       size;
1093:   PetscInt          ix_type=-1,iy_type=-1,*range;
1094:   MPI_Comm          comm;
1095:   IS                tix=NULL,tiy=NULL,ix=ctx->from_is,iy=ctx->to_is;
1096:   Vec               xin=ctx->from_v,yin=ctx->to_v;
1097:   VecScatterType    type;
1098:   PetscBool         vec_mpi1_flg;
1099:   PetscBool         totalv,ixblock,iyblock,iystride,islocal,cando;

1102:   PetscObjectGetComm((PetscObject)ctx,&comm);
1103:   GetInputISType_private(ctx,VEC_MPI_ID,VEC_SEQ_ID,&ix_type,&tix,&iy_type,&tiy);
1104:   if (tix) ix = tix;
1105:   if (tiy) iy = tiy;

1107:   VecScatterGetType(ctx,&type);
1108:   PetscStrcmp(type,"mpi1",&vec_mpi1_flg);

1110:   islocal = PETSC_FALSE;
1111:   /* special case extracting (subset of) local portion */
1112:   if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
1113:     /* Case (2-a) */
1114:     PetscInt              nx,ny,to_first,to_step,from_first,from_step;
1115:     PetscInt              start,end,min,max;
1116:     VecScatter_Seq_Stride *from12 = NULL,*to12 = NULL;

1118:     VecGetOwnershipRange(xin,&start,&end);
1119:     ISGetLocalSize(ix,&nx);
1120:     ISStrideGetInfo(ix,&from_first,&from_step);
1121:     ISGetLocalSize(iy,&ny);
1122:     ISStrideGetInfo(iy,&to_first,&to_step);
1123:     if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1124:     ISGetMinMax(ix,&min,&max);
1125:     if (min >= start && max < end) islocal = PETSC_TRUE;
1126:     else islocal = PETSC_FALSE;
1127:     /* cannot use MPIU_Allreduce() since this call matches with the MPI_Allreduce() in the else statement below */
1128:     MPI_Allreduce(&islocal,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
1129:     if (cando) {
1130:       PetscMalloc2(1,&to12,1,&from12);
1131:       to12->n            = nx;
1132:       to12->first        = to_first;
1133:       to12->step         = to_step;
1134:       from12->n          = nx;
1135:       from12->first      = from_first-start;
1136:       from12->step       = from_step;
1137:       to12->format         = VEC_SCATTER_SEQ_STRIDE;
1138:       from12->format       = VEC_SCATTER_SEQ_STRIDE;
1139:       ctx->todata        = (void*)to12;
1140:       ctx->fromdata      = (void*)from12;
1141:       ctx->ops->begin    = VecScatterBegin_SSToSS;
1142:       ctx->ops->end      = 0;
1143:       ctx->ops->destroy  = VecScatterDestroy_SSToSS;
1144:       ctx->ops->copy     = VecScatterCopy_SSToSS;
1145:       ctx->ops->view     = VecScatterView_SSToSS;
1146:       PetscInfo(xin,"Special case: processors only getting local values\n");
1147:       goto functionend;
1148:     }
1149:   } else {
1150:     MPI_Allreduce(&islocal,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
1151:   }

1153:   /* test for special case of all processors getting entire vector */
1154:   /* contains check that PetscMPIInt can handle the sizes needed */
1155:   totalv = PETSC_FALSE;
1156:   if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
1157:     /* Case (2-b) */
1158:     PetscInt             i,nx,ny,to_first,to_step,from_first,from_step,N;
1159:     PetscMPIInt          *count = NULL,*displx;
1160:     VecScatter_MPI_ToAll *sto   = NULL;

1162:     ISGetLocalSize(ix,&nx);
1163:     ISStrideGetInfo(ix,&from_first,&from_step);
1164:     ISGetLocalSize(iy,&ny);
1165:     ISStrideGetInfo(iy,&to_first,&to_step);
1166:     if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1167:     VecGetSize(xin,&N);
1168:     if (nx != N) totalv = PETSC_FALSE;
1169:     else if (from_first == 0 && from_step == 1 && from_first == to_first && from_step == to_step) totalv = PETSC_TRUE;
1170:     else totalv = PETSC_FALSE;
1171:     /* cannot use MPIU_Allreduce() since this call matches with the MPI_Allreduce() in the else statement below */
1172:     MPI_Allreduce(&totalv,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));

1174: #if defined(PETSC_USE_64BIT_INDICES)
1175:     if (cando && (yin->map->N < PETSC_MPI_INT_MAX)) {
1176: #else
1177:     if (cando) {
1178: #endif
1179:       MPI_Comm_size(PetscObjectComm((PetscObject)ctx),&size);
1180:       PetscMalloc3(1,&sto,size,&count,size,&displx);
1181:       range = xin->map->range;
1182:       for (i=0; i<size; i++) {
1183:         PetscMPIIntCast(range[i+1] - range[i],count+i);
1184:         PetscMPIIntCast(range[i],displx+i);
1185:       }
1186:       sto->count        = count;
1187:       sto->displx       = displx;
1188:       sto->work1        = 0;
1189:       sto->work2        = 0;
1190:       sto->format         = VEC_SCATTER_MPI_TOALL;
1191:       ctx->todata       = (void*)sto;
1192:       ctx->fromdata     = 0;
1193:       ctx->ops->begin   = VecScatterBegin_MPI_ToAll;
1194:       ctx->ops->end     = 0;
1195:       ctx->ops->destroy = VecScatterDestroy_MPI_ToAll;
1196:       ctx->ops->copy    = VecScatterCopy_MPI_ToAll;
1197:       ctx->ops->view    = VecScatterView_MPI_ToAll;
1198:       PetscInfo(xin,"Special case: all processors get entire parallel vector\n");
1199:       goto functionend;
1200:     }
1201:   } else {
1202:     MPI_Allreduce(&totalv,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
1203:   }

1205:   /* test for special case of processor 0 getting entire vector */
1206:   /* contains check that PetscMPIInt can handle the sizes needed */
1207:   totalv = PETSC_FALSE;
1208:   if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
1209:     /* Case (2-c) */
1210:     PetscInt             i,nx,ny,to_first,to_step,from_first,from_step,N;
1211:     PetscMPIInt          rank,*count = NULL,*displx;
1212:     VecScatter_MPI_ToAll *sto = NULL;

1214:     PetscObjectGetComm((PetscObject)xin,&comm);
1215:     MPI_Comm_rank(comm,&rank);
1216:     ISGetLocalSize(ix,&nx);
1217:     ISStrideGetInfo(ix,&from_first,&from_step);
1218:     ISGetLocalSize(iy,&ny);
1219:     ISStrideGetInfo(iy,&to_first,&to_step);
1220:     if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1221:     if (!rank) {
1222:       VecGetSize(xin,&N);
1223:       if (nx != N) totalv = PETSC_FALSE;
1224:       else if (from_first == 0        && from_step == 1 &&
1225:                from_first == to_first && from_step == to_step) totalv = PETSC_TRUE;
1226:       else totalv = PETSC_FALSE;
1227:     } else {
1228:       if (!nx) totalv = PETSC_TRUE;
1229:       else     totalv = PETSC_FALSE;
1230:     }
1231:     /* cannot use MPIU_Allreduce() since this call matches with the MPI_Allreduce() in the else statement below */
1232:     MPI_Allreduce(&totalv,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));

1234: #if defined(PETSC_USE_64BIT_INDICES)
1235:     if (cando && (yin->map->N < PETSC_MPI_INT_MAX)) {
1236: #else
1237:     if (cando) {
1238: #endif
1239:       MPI_Comm_size(PetscObjectComm((PetscObject)ctx),&size);
1240:       PetscMalloc3(1,&sto,size,&count,size,&displx);
1241:       range = xin->map->range;
1242:       for (i=0; i<size; i++) {
1243:         PetscMPIIntCast(range[i+1] - range[i],count+i);
1244:         PetscMPIIntCast(range[i],displx+i);
1245:       }
1246:       sto->count        = count;
1247:       sto->displx       = displx;
1248:       sto->work1        = 0;
1249:       sto->work2        = 0;
1250:       sto->format         = VEC_SCATTER_MPI_TOONE;
1251:       ctx->todata       = (void*)sto;
1252:       ctx->fromdata     = 0;
1253:       ctx->ops->begin   = VecScatterBegin_MPI_ToOne;
1254:       ctx->ops->end     = 0;
1255:       ctx->ops->destroy = VecScatterDestroy_MPI_ToAll;
1256:       ctx->ops->copy    = VecScatterCopy_MPI_ToAll;
1257:       ctx->ops->view    = VecScatterView_MPI_ToAll;
1258:       PetscInfo(xin,"Special case: processor zero gets entire parallel vector, rest get none\n");
1259:       goto functionend;
1260:     }
1261:   } else {
1262:     MPI_Allreduce(&totalv,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
1263:   }

1265:   /* Case 2-d */
1266:   PetscObjectTypeCompare((PetscObject)ix,ISBLOCK,&ixblock);
1267:   PetscObjectTypeCompare((PetscObject)iy,ISBLOCK,&iyblock);
1268:   PetscObjectTypeCompare((PetscObject)iy,ISSTRIDE,&iystride);
1269:   if (ixblock) {
1270:     /* special case block to block */
1271:     if (iyblock) {
1272:       PetscInt       nx,ny,bsx,bsy;
1273:       const PetscInt *idx,*idy;
1274:       ISGetBlockSize(iy,&bsy);
1275:       ISGetBlockSize(ix,&bsx);
1276:       if (bsx == bsy && VecScatterOptimizedBS(bsx)) {
1277:         ISBlockGetLocalSize(ix,&nx);
1278:         ISBlockGetIndices(ix,&idx);
1279:         ISBlockGetLocalSize(iy,&ny);
1280:         ISBlockGetIndices(iy,&idy);
1281:         if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1282: #if defined(PETSC_HAVE_MPI_WIN_CREATE_FEATURE)
1283:         if (vec_mpi1_flg) {
1284:           VecScatterCreateLocal_PtoS_MPI1(nx,idx,ny,idy,xin,yin,bsx,ctx);
1285:         } else {
1286:           VecScatterCreateLocal_PtoS_MPI3(nx,idx,ny,idy,xin,yin,bsx,ctx);
1287:         }
1288: #else
1289:         VecScatterCreateLocal_PtoS_MPI1(nx,idx,ny,idy,xin,yin,bsx,ctx);
1290: #endif
1291:         ISBlockRestoreIndices(ix,&idx);
1292:         ISBlockRestoreIndices(iy,&idy);
1293:         PetscInfo(xin,"Special case: blocked indices\n");
1294:         goto functionend;
1295:       }
1296:       /* special case block to stride */
1297:     } else if (iystride) {
1298:       /* Case (2-e) */
1299:       PetscInt ystart,ystride,ysize,bsx;
1300:       ISStrideGetInfo(iy,&ystart,&ystride);
1301:       ISGetLocalSize(iy,&ysize);
1302:       ISGetBlockSize(ix,&bsx);
1303:       /* see if stride index set is equivalent to block index set */
1304:       if (VecScatterOptimizedBS(bsx) && ((ystart % bsx) == 0) && (ystride == 1) && ((ysize % bsx) == 0)) {
1305:         PetscInt       nx,il,*idy;
1306:         const PetscInt *idx;
1307:         ISBlockGetLocalSize(ix,&nx);
1308:         ISBlockGetIndices(ix,&idx);
1309:         if (ysize != bsx*nx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1310:         PetscMalloc1(nx,&idy);
1311:         if (nx) {
1312:           idy[0] = ystart/bsx;
1313:           for (il=1; il<nx; il++) idy[il] = idy[il-1] + 1;
1314:         }
1315: #if defined(PETSC_HAVE_MPI_WIN_CREATE_FEATURE)
1316:         if (vec_mpi1_flg) {
1317:           VecScatterCreateLocal_PtoS_MPI1(nx,idx,nx,idy,xin,yin,bsx,ctx);
1318:         } else {
1319:           VecScatterCreateLocal_PtoS_MPI3(nx,idx,nx,idy,xin,yin,bsx,ctx);
1320:         }
1321: #else
1322:         VecScatterCreateLocal_PtoS_MPI1(nx,idx,nx,idy,xin,yin,bsx,ctx);
1323: #endif
1324:         PetscFree(idy);
1325:         ISBlockRestoreIndices(ix,&idx);
1326:         PetscInfo(xin,"Special case: blocked indices to stride\n");
1327:         goto functionend;
1328:       }
1329:     }
1330:   }

1332:   /* left over general case (2-f) */
1333:   {
1334:     PetscInt       nx,ny;
1335:     const PetscInt *idx,*idy;
1336:     ISGetLocalSize(ix,&nx);
1337:     ISGetIndices(ix,&idx);
1338:     ISGetLocalSize(iy,&ny);
1339:     ISGetIndices(iy,&idy);
1340:     if (nx != ny) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match (%D %D)",nx,ny);
1341: #if defined(PETSC_HAVE_MPI_WIN_CREATE_FEATURE)
1342:     if (vec_mpi1_flg) {
1343:       VecScatterCreateLocal_PtoS_MPI1(nx,idx,ny,idy,xin,yin,1,ctx);
1344:     } else {
1345:       VecScatterCreateLocal_PtoS_MPI3(nx,idx,ny,idy,xin,yin,1,ctx);
1346:     }
1347: #else
1348:     VecScatterCreateLocal_PtoS_MPI1(nx,idx,ny,idy,xin,yin,1,ctx);
1349: #endif
1350:     ISRestoreIndices(ix,&idx);
1351:     ISRestoreIndices(iy,&idy);
1352:     PetscInfo(xin,"General case: MPI to Seq\n");
1353:     goto functionend;
1354:   }
1355:   functionend:
1356:   ISDestroy(&tix);
1357:   ISDestroy(&tiy);
1358:   VecScatterViewFromOptions(ctx,NULL,"-vecscatter_view");
1359:   return(0);
1360: }

1362: static PetscErrorCode VecScatterCreate_StoP(VecScatter ctx)
1363: {
1364:   PetscErrorCode    ierr;
1365:   PetscInt          ix_type=-1,iy_type=-1;
1366:   IS                tix=NULL,tiy=NULL,ix=ctx->from_is,iy=ctx->to_is;
1367:   Vec               xin=ctx->from_v,yin=ctx->to_v;
1368:   VecScatterType    type;
1369:   PetscBool         vscat_mpi1;
1370:   PetscBool         islocal,cando;

1373:   GetInputISType_private(ctx,VEC_MPI_ID,VEC_SEQ_ID,&ix_type,&tix,&iy_type,&tiy);
1374:   if (tix) ix = tix;
1375:   if (tiy) iy = tiy;

1377:   VecScatterGetType(ctx,&type);
1378:   PetscStrcmp(type,"mpi1",&vscat_mpi1);

1380:   /* special case local copy portion */
1381:   islocal = PETSC_FALSE;
1382:   if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
1383:     PetscInt              nx,ny,to_first,to_step,from_step,start,end,from_first,min,max;
1384:     VecScatter_Seq_Stride *from = NULL,*to = NULL;

1386:     VecGetOwnershipRange(yin,&start,&end);
1387:     ISGetLocalSize(ix,&nx);
1388:     ISStrideGetInfo(ix,&from_first,&from_step);
1389:     ISGetLocalSize(iy,&ny);
1390:     ISStrideGetInfo(iy,&to_first,&to_step);
1391:     if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1392:     ISGetMinMax(iy,&min,&max);
1393:     if (min >= start && max < end) islocal = PETSC_TRUE;
1394:     else islocal = PETSC_FALSE;
1395:     /* cannot use MPIU_Allreduce() since this call matches with the MPI_Allreduce() in the else statement below */
1396:     MPI_Allreduce(&islocal,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)yin));
1397:     if (cando) {
1398:       PetscMalloc2(1,&to,1,&from);
1399:       to->n             = nx;
1400:       to->first         = to_first-start;
1401:       to->step          = to_step;
1402:       from->n           = nx;
1403:       from->first       = from_first;
1404:       from->step        = from_step;
1405:       to->format          = VEC_SCATTER_SEQ_STRIDE;
1406:       from->format        = VEC_SCATTER_SEQ_STRIDE;
1407:       ctx->todata       = (void*)to;
1408:       ctx->fromdata     = (void*)from;
1409:       ctx->ops->begin   = VecScatterBegin_SSToSS;
1410:       ctx->ops->end     = 0;
1411:       ctx->ops->destroy = VecScatterDestroy_SSToSS;
1412:       ctx->ops->copy    = VecScatterCopy_SSToSS;
1413:       ctx->ops->view    = VecScatterView_SSToSS;
1414:       PetscInfo(xin,"Special case: sequential stride to MPI stride\n");
1415:       goto functionend;
1416:     }
1417:   } else {
1418:     MPI_Allreduce(&islocal,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)yin));
1419:   }
1420:   /* special case block to stride */
1421:   if (ix_type == IS_BLOCK_ID && iy_type == IS_STRIDE_ID) {
1422:     PetscInt ystart,ystride,ysize,bsx;
1423:     ISStrideGetInfo(iy,&ystart,&ystride);
1424:     ISGetLocalSize(iy,&ysize);
1425:     ISGetBlockSize(ix,&bsx);
1426:     /* see if stride index set is equivalent to block index set */
1427:     if (VecScatterOptimizedBS(bsx) && ((ystart % bsx) == 0) && (ystride == 1) && ((ysize % bsx) == 0)) {
1428:       PetscInt       nx,il,*idy;
1429:       const PetscInt *idx;
1430:       ISBlockGetLocalSize(ix,&nx);
1431:       ISBlockGetIndices(ix,&idx);
1432:       if (ysize != bsx*nx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1433:       PetscMalloc1(nx,&idy);
1434:       if (nx) {
1435:         idy[0] = ystart/bsx;
1436:         for (il=1; il<nx; il++) idy[il] = idy[il-1] + 1;
1437:       }
1438:       if (vscat_mpi1) {
1439:         VecScatterCreateLocal_StoP_MPI1(nx,idx,nx,idy,xin,yin,bsx,ctx);
1440:       }
1441: #if defined(PETSC_HAVE_MPI_WIN_CREATE_FEATURE)
1442:       else {
1443:         VecScatterCreateLocal_StoP_MPI3(nx,idx,nx,idy,xin,yin,bsx,ctx);
1444:       }
1445: #endif
1446:       PetscFree(idy);
1447:       ISBlockRestoreIndices(ix,&idx);
1448:       PetscInfo(xin,"Special case: Blocked indices to stride\n");
1449:       goto functionend;
1450:     }
1451:   }

1453:   /* general case */
1454:   {
1455:     PetscInt       nx,ny;
1456:     const PetscInt *idx,*idy;
1457:     ISGetLocalSize(ix,&nx);
1458:     ISGetIndices(ix,&idx);
1459:     ISGetLocalSize(iy,&ny);
1460:     ISGetIndices(iy,&idy);
1461:     if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1462:     if (vscat_mpi1) {
1463:       VecScatterCreateLocal_StoP_MPI1(nx,idx,ny,idy,xin,yin,1,ctx);
1464:     }
1465: #if defined(PETSC_HAVE_MPI_WIN_CREATE_FEATURE)
1466:     else {
1467:       VecScatterCreateLocal_StoP_MPI3(nx,idx,ny,idy,xin,yin,1,ctx);
1468:     }
1469: #endif
1470:     ISRestoreIndices(ix,&idx);
1471:     ISRestoreIndices(iy,&idy);
1472:     PetscInfo(xin,"General case: Seq to MPI\n");
1473:     goto functionend;
1474:   }
1475:   functionend:
1476:   ISDestroy(&tix);
1477:   ISDestroy(&tiy);
1478:   VecScatterViewFromOptions(ctx,NULL,"-vecscatter_view");
1479:   return(0);
1480: }

1482: static PetscErrorCode VecScatterCreate_PtoP(VecScatter ctx)
1483: {
1484:   PetscErrorCode    ierr;
1485:   PetscInt          ix_type=-1,iy_type=-1;
1486:   IS                tix=NULL,tiy=NULL,ix=ctx->from_is,iy=ctx->to_is;
1487:   Vec               xin=ctx->from_v,yin=ctx->to_v;
1488:   VecScatterType    type;
1489:   PetscBool         vscat_mpi1;
1490:   PetscInt          nx,ny;
1491:   const PetscInt    *idx,*idy;

1494:   GetInputISType_private(ctx,VEC_MPI_ID,VEC_MPI_ID,&ix_type,&tix,&iy_type,&tiy);
1495:   if (tix) ix = tix;
1496:   if (tiy) iy = tiy;

1498:   VecScatterGetType(ctx,&type);
1499:   PetscStrcmp(type,"mpi1",&vscat_mpi1);

1501:   /* no special cases for now */
1502:   ISGetLocalSize(ix,&nx);
1503:   ISGetIndices(ix,&idx);
1504:   ISGetLocalSize(iy,&ny);
1505:   ISGetIndices(iy,&idy);
1506:   if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1507:   if (vscat_mpi1) {
1508:     VecScatterCreateLocal_PtoP_MPI1(nx,idx,ny,idy,xin,yin,1,ctx);
1509:   }
1510: #if defined(PETSC_HAVE_MPI_WIN_CREATE_FEATURE)
1511:   else {
1512:     VecScatterCreateLocal_PtoP_MPI3(nx,idx,ny,idy,xin,yin,1,ctx);
1513:   }
1514: #endif
1515:   ISRestoreIndices(ix,&idx);
1516:   ISRestoreIndices(iy,&idy);
1517:   PetscInfo(xin,"General case: MPI to MPI\n");

1519:   ISDestroy(&tix);
1520:   ISDestroy(&tiy);
1521:   VecScatterViewFromOptions(ctx,NULL,"-vecscatter_view");
1522:   return(0);
1523: }

1525: static PetscErrorCode VecScatterGetInputVecType_private(VecScatter ctx,PetscInt *xin_type1,PetscInt *yin_type1)
1526: {
1527:   PetscErrorCode    ierr;
1528:   MPI_Comm          comm,ycomm;
1529:   PetscMPIInt       size;
1530:   Vec               xin = ctx->from_v,yin = ctx->to_v;
1531:   PetscInt          xin_type,yin_type;

1534:   /*
1535:       Determine if the vectors are "parallel", ie. it shares a comm with other processors, or
1536:       sequential (it does not share a comm). The difference is that parallel vectors treat the
1537:       index set as providing indices in the global parallel numbering of the vector, with
1538:       sequential vectors treat the index set as providing indices in the local sequential
1539:       numbering
1540:   */
1541:   PetscObjectGetComm((PetscObject)xin,&comm);
1542:   MPI_Comm_size(comm,&size);
1543:   if (size > 1) {
1544:     xin_type = VEC_MPI_ID;
1545:   } else xin_type = VEC_SEQ_ID;
1546:   *xin_type1 = xin_type;

1548:   PetscObjectGetComm((PetscObject)yin,&ycomm);
1549:   MPI_Comm_size(ycomm,&size);
1550:   if (size > 1) {
1551:     yin_type = VEC_MPI_ID;
1552:   } else yin_type = VEC_SEQ_ID;
1553:   *yin_type1 = yin_type;
1554:   return(0);
1555: }

1557: static PetscErrorCode GetInputISType_private(VecScatter ctx,PetscInt xin_type,PetscInt yin_type,PetscInt *ix_type1,IS *tix1,PetscInt *iy_type1,IS *tiy1)
1558: {
1559:   PetscErrorCode    ierr;
1560:   MPI_Comm          comm;
1561:   Vec               xin = ctx->from_v,yin = ctx->to_v;
1562:   IS                tix = 0,tiy = 0,ix = ctx->from_is, iy = ctx->to_is;
1563:   PetscInt          ix_type  = IS_GENERAL_ID,iy_type = IS_GENERAL_ID;
1564:   PetscBool         flag;

1567:   PetscObjectGetComm((PetscObject)ctx,&comm);

1569:   /* if ix or iy is not included; assume just grabbing entire vector */
1570:   if (!ix && xin_type == VEC_SEQ_ID) {
1571:     ISCreateStride(comm,ctx->from_n,0,1,&ix);
1572:     tix  = ix;
1573:   } else if (!ix && xin_type == VEC_MPI_ID) {
1574:     if (yin_type == VEC_MPI_ID) {
1575:       PetscInt ntmp, low;
1576:       VecGetLocalSize(xin,&ntmp);
1577:       VecGetOwnershipRange(xin,&low,NULL);
1578:       ISCreateStride(comm,ntmp,low,1,&ix);
1579:     } else {
1580:       PetscInt Ntmp;
1581:       VecGetSize(xin,&Ntmp);
1582:       ISCreateStride(comm,Ntmp,0,1,&ix);
1583:     }
1584:     tix = ix;
1585:   } else if (!ix) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ix not given, but not Seq or MPI vector");

1587:   if (!iy && yin_type == VEC_SEQ_ID) {
1588:     ISCreateStride(comm,ctx->to_n,0,1,&iy);
1589:     tiy  = iy;
1590:   } else if (!iy && yin_type == VEC_MPI_ID) {
1591:     if (xin_type == VEC_MPI_ID) {
1592:       PetscInt ntmp, low;
1593:       VecGetLocalSize(yin,&ntmp);
1594:       VecGetOwnershipRange(yin,&low,NULL);
1595:       ISCreateStride(comm,ntmp,low,1,&iy);
1596:     } else {
1597:       PetscInt Ntmp;
1598:       VecGetSize(yin,&Ntmp);
1599:       ISCreateStride(comm,Ntmp,0,1,&iy);
1600:     }
1601:     tiy = iy;
1602:   } else if (!iy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"iy not given, but not Seq or MPI vector");

1604:   /* Determine types of index sets */
1605:   PetscObjectTypeCompare((PetscObject)ix,ISBLOCK,&flag);
1606:   if (flag) ix_type = IS_BLOCK_ID;
1607:   PetscObjectTypeCompare((PetscObject)iy,ISBLOCK,&flag);
1608:   if (flag) iy_type = IS_BLOCK_ID;
1609:   PetscObjectTypeCompare((PetscObject)ix,ISSTRIDE,&flag);
1610:   if (flag) ix_type = IS_STRIDE_ID;
1611:   PetscObjectTypeCompare((PetscObject)iy,ISSTRIDE,&flag);
1612:   if (flag) iy_type = IS_STRIDE_ID;

1614:   if (ix_type1) *ix_type1 = ix_type;
1615:   if (iy_type1) *iy_type1 = iy_type;
1616:   if (tix1)     *tix1     = tix;
1617:   if (tiy1)     *tiy1     = tiy;
1618:   return(0);
1619: }

1621: static PetscErrorCode VecScatterCreate_vectype_private(VecScatter ctx)
1622: {
1623:   PetscErrorCode    ierr;
1624:   PetscInt          xin_type=-1,yin_type=-1;

1627:   VecScatterGetInputVecType_private(ctx,&xin_type,&yin_type);
1628:   if (xin_type == VEC_MPI_ID && yin_type == VEC_SEQ_ID) {
1629:     VecScatterCreate_PtoS(ctx);
1630:   } else if (xin_type == VEC_SEQ_ID && yin_type == VEC_MPI_ID) {
1631:     VecScatterCreate_StoP(ctx);
1632:   } else if (xin_type == VEC_MPI_ID && yin_type == VEC_MPI_ID) {
1633:     VecScatterCreate_PtoP(ctx);
1634:   }
1635:   return(0);
1636: }

1638: PetscErrorCode VecScatterCreate_MPI1(VecScatter ctx)
1639: {
1640:   PetscErrorCode    ierr;

1643:   /* subroutines called in VecScatterCreate_vectype_private() need scatter_type as an input */
1644:   PetscObjectChangeTypeName((PetscObject)ctx,VECSCATTERMPI1);
1645:   PetscInfo(ctx,"Using MPI1 for vector scatter\n");
1646:   VecScatterCreate_vectype_private(ctx);
1647:   return(0);
1648: }

1650: PetscErrorCode VecScatterCreate_MPI3(VecScatter ctx)
1651: {
1652:   PetscErrorCode    ierr;

1655:   /* subroutines called in VecScatterCreate_vectype_private() need scatter_type as an input */
1656:   PetscObjectChangeTypeName((PetscObject)ctx,VECSCATTERMPI3);
1657:   PetscInfo(ctx,"Using MPI3 for vector scatter\n");
1658:   VecScatterCreate_vectype_private(ctx);
1659:   return(0);
1660: }

1662: PetscErrorCode VecScatterCreate_MPI3Node(VecScatter ctx)
1663: {
1664:   PetscErrorCode    ierr;

1667:   /* subroutines called in VecScatterCreate_vectype_private() need scatter_type as an input */
1668:   PetscObjectChangeTypeName((PetscObject)ctx,VECSCATTERMPI3NODE);
1669:   PetscInfo(ctx,"Using MPI3NODE for vector scatter\n");
1670:   VecScatterCreate_vectype_private(ctx);
1671:   return(0);
1672: }

1674: /* ------------------------------------------------------------------*/
1675: /*@
1676:    VecScatterGetMerged - Returns true if the scatter is completed in the VecScatterBegin()
1677:       and the VecScatterEnd() does nothing

1679:    Not Collective

1681:    Input Parameter:
1682: .   ctx - scatter context created with VecScatterCreate()

1684:    Output Parameter:
1685: .   flg - PETSC_TRUE if the VecScatterBegin/End() are all done during the VecScatterBegin()

1687:    Level: developer

1689: .seealso: VecScatterCreate(), VecScatterEnd(), VecScatterBegin()
1690: @*/
1691: PetscErrorCode  VecScatterGetMerged(VecScatter ctx,PetscBool  *flg)
1692: {
1695:   *flg = ctx->beginandendtogether;
1696:   return(0);
1697: }

1699: /*@
1700:    VecScatterBegin - Begins a generalized scatter from one vector to
1701:    another. Complete the scattering phase with VecScatterEnd().

1703:    Neighbor-wise Collective on VecScatter and Vec

1705:    Input Parameters:
1706: +  inctx - scatter context generated by VecScatterCreate()
1707: .  x - the vector from which we scatter
1708: .  y - the vector to which we scatter
1709: .  addv - either ADD_VALUES or INSERT_VALUES, with INSERT_VALUES mode any location
1710:           not scattered to retains its old value; i.e. the vector is NOT first zeroed.
1711: -  mode - the scattering mode, usually SCATTER_FORWARD.  The available modes are:
1712:     SCATTER_FORWARD or SCATTER_REVERSE


1715:    Level: intermediate

1717:    Options Database: See VecScatterCreate()

1719:    Notes:
1720:    The vectors x and y need not be the same vectors used in the call
1721:    to VecScatterCreate(), but x must have the same parallel data layout
1722:    as that passed in as the x to VecScatterCreate(), similarly for the y.
1723:    Most likely they have been obtained from VecDuplicate().

1725:    You cannot change the values in the input vector between the calls to VecScatterBegin()
1726:    and VecScatterEnd().

1728:    If you use SCATTER_REVERSE the two arguments x and y should be reversed, from
1729:    the SCATTER_FORWARD.

1731:    y[iy[i]] = x[ix[i]], for i=0,...,ni-1

1733:    This scatter is far more general than the conventional
1734:    scatter, since it can be a gather or a scatter or a combination,
1735:    depending on the indices ix and iy.  If x is a parallel vector and y
1736:    is sequential, VecScatterBegin() can serve to gather values to a
1737:    single processor.  Similarly, if y is parallel and x sequential, the
1738:    routine can scatter from one processor to many processors.

1740:    Concepts: scatter^between vectors
1741:    Concepts: gather^between vectors

1743: .seealso: VecScatterCreate(), VecScatterEnd()
1744: @*/
1745: PetscErrorCode  VecScatterBegin(VecScatter inctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
1746: {
1748: #if defined(PETSC_USE_DEBUG)
1749:   PetscInt       to_n,from_n;
1750: #endif
1755:   if (inctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");

1757: #if defined(PETSC_USE_DEBUG)
1758:   /*
1759:      Error checking to make sure these vectors match the vectors used
1760:    to create the vector scatter context. -1 in the from_n and to_n indicate the
1761:    vector lengths are unknown (for example with mapped scatters) and thus
1762:    no error checking is performed.
1763:   */
1764:   if (inctx->from_n >= 0 && inctx->to_n >= 0) {
1765:     VecGetLocalSize(x,&from_n);
1766:     VecGetLocalSize(y,&to_n);
1767:     if (mode & SCATTER_REVERSE) {
1768:       if (to_n != inctx->from_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter reverse and vector to != ctx from size)",to_n,inctx->from_n);
1769:       if (from_n != inctx->to_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter reverse and vector from != ctx to size)",from_n,inctx->to_n);
1770:     } else {
1771:       if (to_n != inctx->to_n)     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter forward and vector to != ctx to size)",to_n,inctx->to_n);
1772:       if (from_n != inctx->from_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter forward and vector from != ctx from size)",from_n,inctx->from_n);
1773:     }
1774:   }
1775: #endif

1777:   inctx->inuse = PETSC_TRUE;
1778:   PetscLogEventBarrierBegin(VEC_ScatterBarrier,0,0,0,0,PetscObjectComm((PetscObject)inctx));
1779:   (*inctx->ops->begin)(inctx,x,y,addv,mode);
1780:   if (inctx->beginandendtogether && inctx->ops->end) {
1781:     inctx->inuse = PETSC_FALSE;
1782:     (*inctx->ops->end)(inctx,x,y,addv,mode);
1783:   }
1784:   PetscLogEventBarrierEnd(VEC_ScatterBarrier,0,0,0,0,PetscObjectComm((PetscObject)inctx));
1785:   return(0);
1786: }

1788: /* --------------------------------------------------------------------*/
1789: /*@
1790:    VecScatterEnd - Ends a generalized scatter from one vector to another.  Call
1791:    after first calling VecScatterBegin().

1793:    Neighbor-wise Collective on VecScatter and Vec

1795:    Input Parameters:
1796: +  ctx - scatter context generated by VecScatterCreate()
1797: .  x - the vector from which we scatter
1798: .  y - the vector to which we scatter
1799: .  addv - either ADD_VALUES or INSERT_VALUES.
1800: -  mode - the scattering mode, usually SCATTER_FORWARD.  The available modes are:
1801:      SCATTER_FORWARD, SCATTER_REVERSE

1803:    Level: intermediate

1805:    Notes:
1806:    If you use SCATTER_REVERSE the arguments x and y should be reversed, from the SCATTER_FORWARD.

1808:    y[iy[i]] = x[ix[i]], for i=0,...,ni-1

1810: .seealso: VecScatterBegin(), VecScatterCreate()
1811: @*/
1812: PetscErrorCode  VecScatterEnd(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
1813: {

1820:   ctx->inuse = PETSC_FALSE;
1821:   if (!ctx->ops->end) return(0);
1822:   if (!ctx->beginandendtogether) {
1823:     PetscLogEventBegin(VEC_ScatterEnd,ctx,x,y,0);
1824:     (*(ctx)->ops->end)(ctx,x,y,addv,mode);
1825:     PetscLogEventEnd(VEC_ScatterEnd,ctx,x,y,0);
1826:   }
1827:   return(0);
1828: }

1830: /*@C
1831:    VecScatterDestroy - Destroys a scatter context created by
1832:    VecScatterCreate().

1834:    Collective on VecScatter

1836:    Input Parameter:
1837: .  ctx - the scatter context

1839:    Level: intermediate

1841: .seealso: VecScatterCreate(), VecScatterCopy()
1842: @*/
1843: PetscErrorCode VecScatterDestroy(VecScatter *ctx)
1844: {

1848:   if (!*ctx) return(0);
1850:   if ((*ctx)->inuse && ((PetscObject)(*ctx))->refct == 1) SETERRQ(((PetscObject)(*ctx))->comm,PETSC_ERR_ARG_WRONGSTATE,"Scatter context is in use");
1851:   if (--((PetscObject)(*ctx))->refct > 0) {*ctx = 0; return(0);}

1853:   /* if memory was published with SAWs then destroy it */
1854:   PetscObjectSAWsViewOff((PetscObject)(*ctx));
1855:   if ((*ctx)->ops->destroy) {(*(*ctx)->ops->destroy)(*ctx);}
1856: #if defined(PETSC_HAVE_VECCUDA)
1857:   VecScatterCUDAIndicesDestroy((PetscCUDAIndices*)&((*ctx)->spptr));
1858: #endif
1859:   PetscHeaderDestroy(ctx);
1860:   return(0);
1861: }

1863: /*@
1864:    VecScatterCopy - Makes a copy of a scatter context.

1866:    Collective on VecScatter

1868:    Input Parameter:
1869: .  sctx - the scatter context

1871:    Output Parameter:
1872: .  ctx - the context copy

1874:    Level: advanced

1876: .seealso: VecScatterCreate(), VecScatterDestroy()
1877: @*/
1878: PetscErrorCode  VecScatterCopy(VecScatter sctx,VecScatter *ctx)
1879: {
1881:   VecScatterType type;

1886:   if (!sctx->ops->copy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot copy this type");
1887:   PetscHeaderCreate(*ctx,VEC_SCATTER_CLASSID,"VecScatter","VecScatter","Vec",PetscObjectComm((PetscObject)sctx),VecScatterDestroy,VecScatterView);
1888:   (*ctx)->to_n   = sctx->to_n;
1889:   (*ctx)->from_n = sctx->from_n;
1890:   (*sctx->ops->copy)(sctx,*ctx);

1892:   VecScatterGetType(sctx,&type);
1893:   PetscObjectChangeTypeName((PetscObject)(*ctx),type);
1894:   return(0);
1895: }

1897: /* ------------------------------------------------------------------*/
1898: /*@C
1899:    VecScatterView - Views a vector scatter context.

1901:    Collective on VecScatter

1903:    Input Parameters:
1904: +  ctx - the scatter context
1905: -  viewer - the viewer for displaying the context

1907:    Level: intermediate

1909: @*/
1910: PetscErrorCode  VecScatterView(VecScatter ctx,PetscViewer viewer)
1911: {

1916:   if (!viewer) {
1917:     PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)ctx),&viewer);
1918:   }
1920:   if (ctx->ops->view) {
1921:     (*ctx->ops->view)(ctx,viewer);
1922:   }
1923:   return(0);
1924: }

1926: /*@C
1927:    VecScatterRemap - Remaps the "from" and "to" indices in a
1928:    vector scatter context. FOR EXPERTS ONLY!

1930:    Collective on VecScatter

1932:    Input Parameters:
1933: +  scat - vector scatter context
1934: .  from - remapping for "from" indices (may be NULL)
1935: -  to   - remapping for "to" indices (may be NULL)

1937:    Level: developer

1939:    Notes: In the parallel case the todata is actually the indices
1940:           from which the data is TAKEN! The from stuff is where the
1941:           data is finally put. This is VERY VERY confusing!

1943:           In the sequential case the todata is the indices where the
1944:           data is put and the fromdata is where it is taken from.
1945:           This is backwards from the paralllel case! CRY! CRY! CRY!

1947: @*/
1948: PetscErrorCode  VecScatterRemap(VecScatter scat,PetscInt *rto,PetscInt *rfrom)
1949: {
1950:   VecScatter_Seq_General *to,*from;
1951:   VecScatter_MPI_General *mto;
1952:   PetscInt               i;


1959:   from = (VecScatter_Seq_General*)scat->fromdata;
1960:   mto  = (VecScatter_MPI_General*)scat->todata;

1962:   if (mto->format == VEC_SCATTER_MPI_TOALL) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Not for to all scatter");

1964:   if (rto) {
1965:     if (mto->format == VEC_SCATTER_MPI_GENERAL) {
1966:       /* handle off processor parts */
1967:       for (i=0; i<mto->starts[mto->n]; i++) mto->indices[i] = rto[mto->indices[i]];

1969:       /* handle local part */
1970:       to = &mto->local;
1971:       for (i=0; i<to->n; i++) to->vslots[i] = rto[to->vslots[i]];
1972:     } else if (from->format == VEC_SCATTER_SEQ_GENERAL) {
1973:       for (i=0; i<from->n; i++) from->vslots[i] = rto[from->vslots[i]];
1974:     } else if (from->format == VEC_SCATTER_SEQ_STRIDE) {
1975:       VecScatter_Seq_Stride *sto = (VecScatter_Seq_Stride*)from;

1977:       /* if the remapping is the identity and stride is identity then skip remap */
1978:       if (sto->step == 1 && sto->first == 0) {
1979:         for (i=0; i<sto->n; i++) {
1980:           if (rto[i] != i) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Unable to remap such scatters");
1981:         }
1982:       } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Unable to remap such scatters");
1983:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Unable to remap such scatters");
1984:   }

1986:   if (rfrom) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unable to remap the FROM in scatters yet");

1988:   /*
1989:      Mark then vector lengths as unknown because we do not know the
1990:    lengths of the remapped vectors
1991:   */
1992:   scat->from_n = -1;
1993:   scat->to_n   = -1;
1994:   return(0);
1995: }

1997: /*
1998:  VecScatterGetTypes_Private - Returns the scatter types.

2000:  scatter - The scatter.
2001:  from    - Upon exit this contains the type of the from scatter.
2002:  to      - Upon exit this contains the type of the to scatter.
2003: */
2004: PetscErrorCode VecScatterGetTypes_Private(VecScatter scatter,VecScatterFormat *from,VecScatterFormat *to)
2005: {
2006:   VecScatter_Common* fromdata = (VecScatter_Common*)scatter->fromdata;
2007:   VecScatter_Common* todata   = (VecScatter_Common*)scatter->todata;

2010:   *from = fromdata->format;
2011:   *to = todata->format;
2012:   return(0);
2013: }


2016: /*
2017:   VecScatterIsSequential_Private - Returns true if the scatter is sequential.

2019:   scatter - The scatter.
2020:   flag    - Upon exit flag is true if the scatter is of type VecScatter_Seq_General
2021:             or VecScatter_Seq_Stride; otherwise flag is false.
2022: */
2023: PetscErrorCode VecScatterIsSequential_Private(VecScatter_Common *scatter,PetscBool *flag)
2024: {
2025:   VecScatterFormat scatterType = scatter->format;

2028:   if (scatterType == VEC_SCATTER_SEQ_GENERAL || scatterType == VEC_SCATTER_SEQ_STRIDE) {
2029:     *flag = PETSC_TRUE;
2030:   } else {
2031:     *flag = PETSC_FALSE;
2032:   }
2033:   return(0);
2034: }

2036: #if defined(PETSC_HAVE_VECCUDA)

2038: /*@C
2039:    VecScatterInitializeForGPU - Initializes a generalized scatter from one vector
2040:    to another for GPU based computation.

2042:    Input Parameters:
2043: +  inctx - scatter context generated by VecScatterCreate()
2044: .  x - the vector from which we scatter
2045: -  mode - the scattering mode, usually SCATTER_FORWARD.  The available modes are:
2046:     SCATTER_FORWARD or SCATTER_REVERSE

2048:   Level: intermediate

2050:   Notes:
2051:    Effectively, this function creates all the necessary indexing buffers and work
2052:    vectors needed to move data only those data points in a vector which need to
2053:    be communicated across ranks. This is done at the first time this function is
2054:    called. Currently, this only used in the context of the parallel SpMV call in
2055:    MatMult_MPIAIJCUSPARSE.

2057:    This function is executed before the call to MatMult. This enables the memory
2058:    transfers to be overlapped with the MatMult SpMV kernel call.

2060: .seealso: VecScatterFinalizeForGPU(), VecScatterCreate(), VecScatterEnd()
2061: @*/
2062: PETSC_EXTERN PetscErrorCode VecScatterInitializeForGPU(VecScatter inctx,Vec x,ScatterMode mode)
2063: {
2065:   VecScatter_MPI_General *to,*from;
2066:   PetscErrorCode         ierr;
2067:   PetscInt               i,*indices,*sstartsSends,*sstartsRecvs,nrecvs,nsends,bs;
2068:   PetscBool              isSeq1,isSeq2;

2071:   VecScatterIsSequential_Private((VecScatter_Common*)inctx->fromdata,&isSeq1);
2072:   VecScatterIsSequential_Private((VecScatter_Common*)inctx->todata,&isSeq2);
2073:   if (isSeq1 || isSeq2) {
2074:     return(0);
2075:   }
2076:   if (mode & SCATTER_REVERSE) {
2077:     to     = (VecScatter_MPI_General*)inctx->fromdata;
2078:     from   = (VecScatter_MPI_General*)inctx->todata;
2079:   } else {
2080:     to     = (VecScatter_MPI_General*)inctx->todata;
2081:     from   = (VecScatter_MPI_General*)inctx->fromdata;
2082:   }
2083:   bs           = to->bs;
2084:   nrecvs       = from->n;
2085:   nsends       = to->n;
2086:   indices      = to->indices;
2087:   sstartsSends = to->starts;
2088:   sstartsRecvs = from->starts;
2089:   if (x->valid_GPU_array != PETSC_OFFLOAD_UNALLOCATED && (nsends>0 || nrecvs>0)) {
2090:     if (!inctx->spptr) {
2091:       PetscInt k,*tindicesSends,*sindicesSends,*tindicesRecvs,*sindicesRecvs;
2092:       PetscInt ns = sstartsSends[nsends],nr = sstartsRecvs[nrecvs];
2093:       /* Here we create indices for both the senders and receivers. */
2094:       PetscMalloc1(ns,&tindicesSends);
2095:       PetscMalloc1(nr,&tindicesRecvs);

2097:       PetscMemcpy(tindicesSends,indices,ns*sizeof(PetscInt));
2098:       PetscMemcpy(tindicesRecvs,from->indices,nr*sizeof(PetscInt));

2100:       PetscSortRemoveDupsInt(&ns,tindicesSends);
2101:       PetscSortRemoveDupsInt(&nr,tindicesRecvs);

2103:       PetscMalloc1(bs*ns,&sindicesSends);
2104:       PetscMalloc1(from->bs*nr,&sindicesRecvs);

2106:       /* sender indices */
2107:       for (i=0; i<ns; i++) {
2108:         for (k=0; k<bs; k++) sindicesSends[i*bs+k] = tindicesSends[i]+k;
2109:       }
2110:       PetscFree(tindicesSends);

2112:       /* receiver indices */
2113:       for (i=0; i<nr; i++) {
2114:         for (k=0; k<from->bs; k++) sindicesRecvs[i*from->bs+k] = tindicesRecvs[i]+k;
2115:       }
2116:       PetscFree(tindicesRecvs);

2118:       /* create GPU indices, work vectors, ... */
2119:       VecScatterCUDAIndicesCreate_PtoP(ns*bs,sindicesSends,nr*from->bs,sindicesRecvs,(PetscCUDAIndices*)&inctx->spptr);
2120:       PetscFree(sindicesSends);
2121:       PetscFree(sindicesRecvs);
2122:     }
2123:   }
2124:   return(0);
2125: }

2127: /*@C
2128:    VecScatterFinalizeForGPU - Finalizes a generalized scatter from one vector to
2129:    another for GPU based computation.

2131:    Input Parameter:
2132: +  inctx - scatter context generated by VecScatterCreate()

2134:   Level: intermediate

2136:   Notes:
2137:    Effectively, this function resets the temporary buffer flags. Currently, this
2138:    only used in the context of the parallel SpMV call in in MatMult_MPIAIJCUDA
2139:    or MatMult_MPIAIJCUDAARSE. Once the MatMultAdd is finished, the GPU temporary
2140:    buffers used for messaging are no longer valid.

2142: .seealso: VecScatterInitializeForGPU(), VecScatterCreate(), VecScatterEnd()
2143: @*/
2144: PETSC_EXTERN PetscErrorCode VecScatterFinalizeForGPU(VecScatter inctx)
2145: {
2147:   return(0);
2148: }

2150: #endif