Actual source code: vscat.c

petsc-3.8.4 2018-03-24
Report Typos and Errors

  2: /*
  3:      Code for creating scatters between vectors. This file
  4:   includes the code for scattering between sequential vectors and
  5:   some special cases for parallel scatters.
  6: */

  8:  #include <petsc/private/isimpl.h>
  9:  #include <petsc/private/vecimpl.h>

 11: #if defined(PETSC_HAVE_CUSP)
 12: PETSC_INTERN PetscErrorCode VecScatterCUSPIndicesCreate_PtoP(PetscInt,PetscInt*,PetscInt,PetscInt*,PetscCUSPIndices*);
 13: PETSC_INTERN PetscErrorCode VecScatterCUSPIndicesCreate_StoS(PetscInt,PetscInt,PetscInt,PetscInt,PetscInt,PetscInt*,PetscInt*,PetscCUSPIndices*);
 14: PETSC_INTERN PetscErrorCode VecScatterCUSPIndicesDestroy(PetscCUSPIndices*);
 15: PETSC_INTERN PetscErrorCode VecScatterCUSP_StoS(Vec,Vec,PetscCUSPIndices,InsertMode,ScatterMode);
 16: #elif defined(PETSC_HAVE_VECCUDA)
 17: PETSC_INTERN PetscErrorCode VecScatterCUDAIndicesCreate_PtoP(PetscInt,PetscInt*,PetscInt,PetscInt*,PetscCUDAIndices*);
 18: PETSC_INTERN PetscErrorCode VecScatterCUDAIndicesCreate_StoS(PetscInt,PetscInt,PetscInt,PetscInt,PetscInt,PetscInt*,PetscInt*,PetscCUDAIndices*);
 19: PETSC_INTERN PetscErrorCode VecScatterCUDAIndicesDestroy(PetscCUDAIndices*);
 20: PETSC_INTERN PetscErrorCode VecScatterCUDA_StoS(Vec,Vec,PetscCUDAIndices,InsertMode,ScatterMode);
 21: #endif

 23: /* Logging support */
 24: PetscClassId VEC_SCATTER_CLASSID;

 26: #if defined(PETSC_USE_DEBUG)
 27: /*
 28:      Checks if any indices are less than zero and generates an error
 29: */
 30: static PetscErrorCode VecScatterCheckIndices_Private(PetscInt nmax,PetscInt n,const PetscInt *idx)
 31: {
 32:   PetscInt i;

 35:   for (i=0; i<n; i++) {
 36:     if (idx[i] < 0)     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative index %D at %D location",idx[i],i);
 37:     if (idx[i] >= nmax) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index %D at %D location greater than max %D",idx[i],i,nmax);
 38:   }
 39:   return(0);
 40: }
 41: #endif

 43: /*
 44:       This is special scatter code for when the entire parallel vector is copied to each processor.

 46:    This code was written by Cameron Cooper, Occidental College, Fall 1995,
 47:    will working at ANL as a SERS student.
 48: */
 49: PetscErrorCode VecScatterBegin_MPI_ToAll(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
 50: {
 52:   PetscInt       yy_n,xx_n;
 53:   PetscScalar    *xv,*yv;

 56:   VecGetLocalSize(y,&yy_n);
 57:   VecGetLocalSize(x,&xx_n);
 58:   VecGetArrayPair(x,y,&xv,&yv);

 60:   if (mode & SCATTER_REVERSE) {
 61:     PetscScalar          *xvt,*xvt2;
 62:     VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
 63:     PetscInt             i;
 64:     PetscMPIInt          *disply = scat->displx;

 66:     if (addv == INSERT_VALUES) {
 67:       PetscInt rstart,rend;
 68:       /*
 69:          copy the correct part of the local vector into the local storage of
 70:          the MPI one.  Note: this operation only makes sense if all the local
 71:          vectors have the same values
 72:       */
 73:       VecGetOwnershipRange(y,&rstart,&rend);
 74:       PetscMemcpy(yv,xv+rstart,yy_n*sizeof(PetscScalar));
 75:     } else {
 76:       MPI_Comm    comm;
 77:       PetscMPIInt rank;
 78:       PetscObjectGetComm((PetscObject)y,&comm);
 79:       MPI_Comm_rank(comm,&rank);
 80:       if (scat->work1) xvt = scat->work1;
 81:       else {
 82:         PetscMalloc1(xx_n,&xvt);
 83:         scat->work1 = xvt;
 84:       }
 85:       if (!rank) { /* I am the zeroth processor, values are accumulated here */
 86:         if   (scat->work2) xvt2 = scat->work2;
 87:         else {
 88:           PetscMalloc1(xx_n,&xvt2);
 89:           scat->work2 = xvt2;
 90:         }
 91:         MPI_Gatherv(yv,yy_n,MPIU_SCALAR,xvt2,scat->count,disply,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
 92:         MPI_Reduce(xv,xvt,xx_n,MPIU_SCALAR,MPIU_SUM,0,PetscObjectComm((PetscObject)ctx));
 93:         if (addv == ADD_VALUES) {
 94:           for (i=0; i<xx_n; i++) xvt[i] += xvt2[i];
 95: #if !defined(PETSC_USE_COMPLEX)
 96:         } else if (addv == MAX_VALUES) {
 97:           for (i=0; i<xx_n; i++) xvt[i] = PetscMax(xvt[i],xvt2[i]);
 98: #endif
 99:         } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
100:         MPI_Scatterv(xvt,scat->count,disply,MPIU_SCALAR,yv,yy_n,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
101:       } else {
102:         MPI_Gatherv(yv,yy_n,MPIU_SCALAR,0, 0,0,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
103:         MPI_Reduce(xv,xvt,xx_n,MPIU_SCALAR,MPIU_SUM,0,PetscObjectComm((PetscObject)ctx));
104:         MPI_Scatterv(0,scat->count,disply,MPIU_SCALAR,yv,yy_n,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
105:       }
106:     }
107:   } else {
108:     PetscScalar          *yvt;
109:     VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
110:     PetscInt             i;
111:     PetscMPIInt          *displx = scat->displx;

113:     if (addv == INSERT_VALUES) {
114:       MPI_Allgatherv(xv,xx_n,MPIU_SCALAR,yv,scat->count,displx,MPIU_SCALAR,PetscObjectComm((PetscObject)ctx));
115:     } else {
116:       if (scat->work1) yvt = scat->work1;
117:       else {
118:         PetscMalloc1(yy_n,&yvt);
119:         scat->work1 = yvt;
120:       }
121:       MPI_Allgatherv(xv,xx_n,MPIU_SCALAR,yvt,scat->count,displx,MPIU_SCALAR,PetscObjectComm((PetscObject)ctx));
122:       if (addv == ADD_VALUES) {
123:         for (i=0; i<yy_n; i++) yv[i] += yvt[i];
124: #if !defined(PETSC_USE_COMPLEX)
125:       } else if (addv == MAX_VALUES) {
126:         for (i=0; i<yy_n; i++) yv[i] = PetscMax(yv[i],yvt[i]);
127: #endif
128:       } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
129:     }
130:   }
131:   VecRestoreArrayPair(x,y,&xv,&yv);
132:   return(0);
133: }

135: PetscErrorCode VecScatterView_MPI_ToAll(VecScatter in,PetscViewer viewer)
136: {
138:   PetscBool      isascii;

141:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
142:   if (isascii) {
143:     PetscViewerASCIIPrintf(viewer,"Entire parallel vector is copied to each process\n");
144:   }
145:   return(0);
146: }

148: /*
149:       This is special scatter code for when the entire parallel vector is  copied to processor 0.

151: */
152: PetscErrorCode VecScatterBegin_MPI_ToOne(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
153: {
154:   PetscErrorCode    ierr;
155:   PetscMPIInt       rank;
156:   PetscInt          yy_n,xx_n;
157:   PetscScalar       *yv;
158:   const PetscScalar *xv;
159:   MPI_Comm          comm;

162:   VecGetLocalSize(y,&yy_n);
163:   VecGetLocalSize(x,&xx_n);
164:   VecGetArrayRead(x,&xv);
165:   VecGetArray(y,&yv);

167:   PetscObjectGetComm((PetscObject)x,&comm);
168:   MPI_Comm_rank(comm,&rank);

170:   /* --------  Reverse scatter; spread from processor 0 to other processors */
171:   if (mode & SCATTER_REVERSE) {
172:     PetscScalar          *yvt;
173:     VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
174:     PetscInt             i;
175:     PetscMPIInt          *disply = scat->displx;

177:     if (addv == INSERT_VALUES) {
178:       MPI_Scatterv((PetscScalar*)xv,scat->count,disply,MPIU_SCALAR,yv,yy_n,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
179:     } else {
180:       if (scat->work2) yvt = scat->work2;
181:       else {
182:         PetscMalloc1(xx_n,&yvt);
183:         scat->work2 = yvt;
184:       }
185:       MPI_Scatterv((PetscScalar*)xv,scat->count,disply,MPIU_SCALAR,yvt,yy_n,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
186:       if (addv == ADD_VALUES) {
187:         for (i=0; i<yy_n; i++) yv[i] += yvt[i];
188: #if !defined(PETSC_USE_COMPLEX)
189:       } else if (addv == MAX_VALUES) {
190:         for (i=0; i<yy_n; i++) yv[i] = PetscMax(yv[i],yvt[i]);
191: #endif
192:       } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
193:     }
194:   /* ---------  Forward scatter; gather all values onto processor 0 */
195:   } else {
196:     PetscScalar          *yvt  = 0;
197:     VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
198:     PetscInt             i;
199:     PetscMPIInt          *displx = scat->displx;

201:     if (addv == INSERT_VALUES) {
202:       MPI_Gatherv((PetscScalar*)xv,xx_n,MPIU_SCALAR,yv,scat->count,displx,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
203:     } else {
204:       if (!rank) {
205:         if (scat->work1) yvt = scat->work1;
206:         else {
207:           PetscMalloc1(yy_n,&yvt);
208:           scat->work1 = yvt;
209:         }
210:       }
211:       MPI_Gatherv((PetscScalar*)xv,xx_n,MPIU_SCALAR,yvt,scat->count,displx,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
212:       if (!rank) {
213:         if (addv == ADD_VALUES) {
214:           for (i=0; i<yy_n; i++) yv[i] += yvt[i];
215: #if !defined(PETSC_USE_COMPLEX)
216:         } else if (addv == MAX_VALUES) {
217:           for (i=0; i<yy_n; i++) yv[i] = PetscMax(yv[i],yvt[i]);
218: #endif
219:         }  else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
220:       }
221:     }
222:   }
223:   VecRestoreArrayRead(x,&xv);
224:   VecRestoreArray(y,&yv);
225:   return(0);
226: }

228: /*
229:        The follow to are used for both VecScatterBegin_MPI_ToAll() and VecScatterBegin_MPI_ToOne()
230: */
231: PetscErrorCode VecScatterDestroy_MPI_ToAll(VecScatter ctx)
232: {
233:   VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
234:   PetscErrorCode       ierr;

237:   PetscFree(scat->work1);
238:   PetscFree(scat->work2);
239:   PetscFree3(ctx->todata,scat->count,scat->displx);
240:   return(0);
241: }

243: PetscErrorCode VecScatterDestroy_SGToSG(VecScatter ctx)
244: {

248:   PetscFree2(((VecScatter_Seq_General*)ctx->todata)->vslots,((VecScatter_Seq_General*)ctx->fromdata)->vslots);
249:   PetscFree2(ctx->todata,ctx->fromdata);
250:   return(0);
251: }

253: PetscErrorCode VecScatterDestroy_SGToSS(VecScatter ctx)
254: {

258:   PetscFree(((VecScatter_Seq_General*)ctx->fromdata)->vslots);
259:   PetscFree2(ctx->todata,ctx->fromdata);
260:   return(0);
261: }

263: PetscErrorCode VecScatterDestroy_SSToSG(VecScatter ctx)
264: {

268:   PetscFree(((VecScatter_Seq_General*)ctx->todata)->vslots);
269:   PetscFree2(ctx->todata,ctx->fromdata);
270:   return(0);
271: }

273: PetscErrorCode VecScatterDestroy_SSToSS(VecScatter ctx)
274: {

278:   PetscFree2(ctx->todata,ctx->fromdata);
279:   return(0);
280: }

282: /* -------------------------------------------------------------------------*/
283: PetscErrorCode VecScatterCopy_MPI_ToAll(VecScatter in,VecScatter out)
284: {
285:   VecScatter_MPI_ToAll *in_to = (VecScatter_MPI_ToAll*)in->todata,*sto;
286:   PetscErrorCode       ierr;
287:   PetscMPIInt          size,*count,*displx;
288:   PetscInt             i;

291:   out->ops->begin   = in->ops->begin;
292:   out->ops->end     = in->ops->end;
293:   out->ops->copy    = in->ops->copy;
294:   out->ops->destroy = in->ops->destroy;
295:   out->ops->view    = in->ops->view;

297:   MPI_Comm_size(PetscObjectComm((PetscObject)out),&size);
298:   PetscMalloc3(1,&sto,size,&count,size,&displx);
299:   sto->type   = in_to->type;
300:   sto->count  = count;
301:   sto->displx = displx;
302:   for (i=0; i<size; i++) {
303:     sto->count[i]  = in_to->count[i];
304:     sto->displx[i] = in_to->displx[i];
305:   }
306:   sto->work1    = 0;
307:   sto->work2    = 0;
308:   out->todata   = (void*)sto;
309:   out->fromdata = (void*)0;
310:   return(0);
311: }

313: /* --------------------------------------------------------------------------------------*/
314: /*
315:         Scatter: sequential general to sequential general
316: */
317: PetscErrorCode VecScatterBegin_SGToSG(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
318: {
319:   VecScatter_Seq_General *gen_to   = (VecScatter_Seq_General*)ctx->todata;
320:   VecScatter_Seq_General *gen_from = (VecScatter_Seq_General*)ctx->fromdata;
321:   PetscErrorCode         ierr;
322:   PetscInt               i,n = gen_from->n,*fslots,*tslots;
323:   PetscScalar            *xv,*yv;

326: #if defined(PETSC_HAVE_CUSP)
327:   if (x->valid_GPU_array == PETSC_CUSP_GPU) {
328:     /* create the scatter indices if not done already */
329:     if (!ctx->spptr) {
330:       PetscInt tofirst = 0,tostep = 0,fromfirst = 0,fromstep = 0;
331:       fslots = gen_from->vslots;
332:       tslots = gen_to->vslots;
333:       VecScatterCUSPIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUSPIndices*)&(ctx->spptr));
334:     }
335:     /* next do the scatter */
336:     VecScatterCUSP_StoS(x,y,(PetscCUSPIndices)ctx->spptr,addv,mode);
337:     return(0);
338:   }
339: #elif defined(PETSC_HAVE_VECCUDA)
340:   if (x->valid_GPU_array == PETSC_CUDA_GPU) {
341:     /* create the scatter indices if not done already */
342:     if (!ctx->spptr) {
343:       PetscInt tofirst = 0,tostep = 0,fromfirst = 0,fromstep = 0;
344:       fslots = gen_from->vslots;
345:       tslots = gen_to->vslots;
346:       VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
347:     }
348:     /* next do the scatter */
349:     VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
350:     return(0);
351:   }
352: #endif

354:   VecGetArrayPair(x,y,&xv,&yv);
355:   if (mode & SCATTER_REVERSE) {
356:     gen_to   = (VecScatter_Seq_General*)ctx->fromdata;
357:     gen_from = (VecScatter_Seq_General*)ctx->todata;
358:   }
359:   fslots = gen_from->vslots;
360:   tslots = gen_to->vslots;

362:   if (addv == INSERT_VALUES) {
363:     for (i=0; i<n; i++) yv[tslots[i]]  = xv[fslots[i]];
364:   } else if (addv == ADD_VALUES) {
365:     for (i=0; i<n; i++) yv[tslots[i]] += xv[fslots[i]];
366: #if !defined(PETSC_USE_COMPLEX)
367:   } else if (addv == MAX_VALUES) {
368:     for (i=0; i<n; i++) yv[tslots[i]] = PetscMax(yv[tslots[i]],xv[fslots[i]]);
369: #endif
370:   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
371:   VecRestoreArrayPair(x,y,&xv,&yv);
372:   return(0);
373: }

375: /*
376:     Scatter: sequential general to sequential stride 1
377: */
378: PetscErrorCode VecScatterBegin_SGToSS_Stride1(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
379: {
380:   VecScatter_Seq_Stride  *gen_to   = (VecScatter_Seq_Stride*)ctx->todata;
381:   VecScatter_Seq_General *gen_from = (VecScatter_Seq_General*)ctx->fromdata;
382:   PetscInt               i,n = gen_from->n,*fslots = gen_from->vslots;
383:   PetscErrorCode         ierr;
384:   PetscInt               first = gen_to->first;
385:   PetscScalar            *xv,*yv;

388: #if defined(PETSC_HAVE_CUSP)
389:   if (x->valid_GPU_array == PETSC_CUSP_GPU) {
390:     /* create the scatter indices if not done already */
391:     if (!ctx->spptr) {
392:       PetscInt tofirst = first,tostep = 1,fromfirst = 0,fromstep = 0;
393:       PetscInt *tslots = 0;
394:       VecScatterCUSPIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUSPIndices*)&(ctx->spptr));
395:     }
396:     /* next do the scatter */
397:     VecScatterCUSP_StoS(x,y,(PetscCUSPIndices)ctx->spptr,addv,mode);
398:     return(0);
399:   }
400: #elif defined(PETSC_HAVE_VECCUDA)
401:   if (x->valid_GPU_array == PETSC_CUDA_GPU) {
402:     /* create the scatter indices if not done already */
403:     if (!ctx->spptr) {
404:       PetscInt tofirst = first,tostep = 1,fromfirst = 0,fromstep = 0;
405:       PetscInt *tslots = 0;
406:       VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
407:     }
408:     /* next do the scatter */
409:     VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
410:     return(0);
411:   }
412: #endif

414:   VecGetArrayPair(x,y,&xv,&yv);
415:   if (mode & SCATTER_REVERSE) {
416:     xv += first;
417:     if (addv == INSERT_VALUES) {
418:       for (i=0; i<n; i++) yv[fslots[i]] = xv[i];
419:     } else if (addv == ADD_VALUES) {
420:       for (i=0; i<n; i++) yv[fslots[i]] += xv[i];
421: #if !defined(PETSC_USE_COMPLEX)
422:     } else if (addv == MAX_VALUES) {
423:       for (i=0; i<n; i++) yv[fslots[i]] = PetscMax(yv[fslots[i]],xv[i]);
424: #endif
425:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
426:   } else {
427:     yv += first;
428:     if (addv == INSERT_VALUES) {
429:       for (i=0; i<n; i++) yv[i] = xv[fslots[i]];
430:     } else if (addv == ADD_VALUES) {
431:       for (i=0; i<n; i++) yv[i] += xv[fslots[i]];
432: #if !defined(PETSC_USE_COMPLEX)
433:     } else if (addv == MAX_VALUES) {
434:       for (i=0; i<n; i++) yv[i] = PetscMax(yv[i],xv[fslots[i]]);
435: #endif
436:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
437:   }
438:   VecRestoreArrayPair(x,y,&xv,&yv);
439:   return(0);
440: }

442: /*
443:    Scatter: sequential general to sequential stride
444: */
445: PetscErrorCode VecScatterBegin_SGToSS(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
446: {
447:   VecScatter_Seq_Stride  *gen_to   = (VecScatter_Seq_Stride*)ctx->todata;
448:   VecScatter_Seq_General *gen_from = (VecScatter_Seq_General*)ctx->fromdata;
449:   PetscInt               i,n = gen_from->n,*fslots = gen_from->vslots;
450:   PetscErrorCode         ierr;
451:   PetscInt               first = gen_to->first,step = gen_to->step;
452:   PetscScalar            *xv,*yv;

455: #if defined(PETSC_HAVE_CUSP)
456:   if (x->valid_GPU_array == PETSC_CUSP_GPU) {
457:     /* create the scatter indices if not done already */
458:     if (!ctx->spptr) {
459:       PetscInt tofirst = first,tostep = step,fromfirst = 0,fromstep = 0;
460:       PetscInt * tslots = 0;
461:       VecScatterCUSPIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUSPIndices*)&(ctx->spptr));
462:     }
463:     /* next do the scatter */
464:     VecScatterCUSP_StoS(x,y,(PetscCUSPIndices)ctx->spptr,addv,mode);
465:     return(0);
466:   }
467: #elif defined(PETSC_HAVE_VECCUDA)
468:   if (x->valid_GPU_array == PETSC_CUDA_GPU) {
469:     /* create the scatter indices if not done already */
470:     if (!ctx->spptr) {
471:       PetscInt tofirst = first,tostep = step,fromfirst = 0,fromstep = 0;
472:       PetscInt * tslots = 0;
473:       VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
474:     }
475:     /* next do the scatter */
476:     VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
477:     return(0);
478:   }
479: #endif

481:   VecGetArrayPair(x,y,&xv,&yv);
482:   if (mode & SCATTER_REVERSE) {
483:     if (addv == INSERT_VALUES) {
484:       for (i=0; i<n; i++) yv[fslots[i]] = xv[first + i*step];
485:     } else if (addv == ADD_VALUES) {
486:       for (i=0; i<n; i++) yv[fslots[i]] += xv[first + i*step];
487: #if !defined(PETSC_USE_COMPLEX)
488:     } else if (addv == MAX_VALUES) {
489:       for (i=0; i<n; i++) yv[fslots[i]] = PetscMax(yv[fslots[i]],xv[first + i*step]);
490: #endif
491:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
492:   } else {
493:     if (addv == INSERT_VALUES) {
494:       for (i=0; i<n; i++) yv[first + i*step] = xv[fslots[i]];
495:     } else if (addv == ADD_VALUES) {
496:       for (i=0; i<n; i++) yv[first + i*step] += xv[fslots[i]];
497: #if !defined(PETSC_USE_COMPLEX)
498:     } else if (addv == MAX_VALUES) {
499:       for (i=0; i<n; i++) yv[first + i*step] = PetscMax(yv[first + i*step],xv[fslots[i]]);
500: #endif
501:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
502:   }
503:   VecRestoreArrayPair(x,y,&xv,&yv);
504:   return(0);
505: }

507: /*
508:     Scatter: sequential stride 1 to sequential general
509: */
510: PetscErrorCode VecScatterBegin_SSToSG_Stride1(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
511: {
512:   VecScatter_Seq_Stride  *gen_from = (VecScatter_Seq_Stride*)ctx->fromdata;
513:   VecScatter_Seq_General *gen_to   = (VecScatter_Seq_General*)ctx->todata;
514:   PetscInt               i,n = gen_from->n,*tslots = gen_to->vslots;
515:   PetscErrorCode         ierr;
516:   PetscInt               first = gen_from->first;
517:   PetscScalar            *xv,*yv;

520: #if defined(PETSC_HAVE_CUSP)
521:   if (x->valid_GPU_array == PETSC_CUSP_GPU) {
522:     /* create the scatter indices if not done already */
523:     if (!ctx->spptr) {
524:       PetscInt tofirst = 0,tostep = 0,fromfirst = first,fromstep = 1;
525:       PetscInt *fslots = 0;
526:       VecScatterCUSPIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUSPIndices*)&(ctx->spptr));
527:     }
528:     /* next do the scatter */
529:     VecScatterCUSP_StoS(x,y,(PetscCUSPIndices)ctx->spptr,addv,mode);
530:     return(0);
531:   }
532: #elif defined(PETSC_HAVE_VECCUDA)
533:   if (x->valid_GPU_array == PETSC_CUDA_GPU) {
534:     /* create the scatter indices if not done already */
535:     if (!ctx->spptr) {
536:       PetscInt tofirst = 0,tostep = 0,fromfirst = first,fromstep = 1;
537:       PetscInt *fslots = 0;
538:       VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
539:     }
540:     /* next do the scatter */
541:     VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
542:     return(0);
543:   }
544: #endif

546:   VecGetArrayPair(x,y,&xv,&yv);
547:   if (mode & SCATTER_REVERSE) {
548:     yv += first;
549:     if (addv == INSERT_VALUES) {
550:       for (i=0; i<n; i++) yv[i] = xv[tslots[i]];
551:     } else  if (addv == ADD_VALUES) {
552:       for (i=0; i<n; i++) yv[i] += xv[tslots[i]];
553: #if !defined(PETSC_USE_COMPLEX)
554:     } else  if (addv == MAX_VALUES) {
555:       for (i=0; i<n; i++) yv[i] = PetscMax(yv[i],xv[tslots[i]]);
556: #endif
557:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
558:   } else {
559:     xv += first;
560:     if (addv == INSERT_VALUES) {
561:       for (i=0; i<n; i++) yv[tslots[i]] = xv[i];
562:     } else  if (addv == ADD_VALUES) {
563:       for (i=0; i<n; i++) yv[tslots[i]] += xv[i];
564: #if !defined(PETSC_USE_COMPLEX)
565:     } else  if (addv == MAX_VALUES) {
566:       for (i=0; i<n; i++) yv[tslots[i]] = PetscMax(yv[tslots[i]],xv[i]);
567: #endif
568:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
569:   }
570:   VecRestoreArrayPair(x,y,&xv,&yv);
571:   return(0);
572: }

574: /*
575:    Scatter: sequential stride to sequential general
576: */
577: PetscErrorCode VecScatterBegin_SSToSG(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
578: {
579:   VecScatter_Seq_Stride  *gen_from = (VecScatter_Seq_Stride*)ctx->fromdata;
580:   VecScatter_Seq_General *gen_to   = (VecScatter_Seq_General*)ctx->todata;
581:   PetscInt               i,n = gen_from->n,*tslots = gen_to->vslots;
582:   PetscErrorCode         ierr;
583:   PetscInt               first = gen_from->first,step = gen_from->step;
584:   PetscScalar            *xv,*yv;

587: #if defined(PETSC_HAVE_CUSP)
588:   if (x->valid_GPU_array == PETSC_CUSP_GPU) {
589:     /* create the scatter indices if not done already */
590:     if (!ctx->spptr) {
591:       PetscInt tofirst = 0,tostep = 0,fromfirst = first,fromstep = step;
592:       PetscInt *fslots = 0;
593:       VecScatterCUSPIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUSPIndices*)&(ctx->spptr));
594:     }
595:     /* next do the scatter */
596:     VecScatterCUSP_StoS(x,y,(PetscCUSPIndices)ctx->spptr,addv,mode);
597:     return(0);
598:   }
599: #elif defined(PETSC_HAVE_VECCUDA)
600:   if (x->valid_GPU_array == PETSC_CUDA_GPU) {
601:     /* create the scatter indices if not done already */
602:     if (!ctx->spptr) {
603:       PetscInt tofirst = 0,tostep = 0,fromfirst = first,fromstep = step;
604:       PetscInt *fslots = 0;
605:       VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
606:     }
607:     /* next do the scatter */
608:     VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
609:     return(0);
610:   }
611: #endif

613:   VecGetArrayPair(x,y,&xv,&yv);
614:   if (mode & SCATTER_REVERSE) {
615:     if (addv == INSERT_VALUES) {
616:       for (i=0; i<n; i++) yv[first + i*step] = xv[tslots[i]];
617:     } else if (addv == ADD_VALUES) {
618:       for (i=0; i<n; i++) yv[first + i*step] += xv[tslots[i]];
619: #if !defined(PETSC_USE_COMPLEX)
620:     } else if (addv == MAX_VALUES) {
621:       for (i=0; i<n; i++) yv[first + i*step] = PetscMax(yv[first + i*step],xv[tslots[i]]);
622: #endif
623:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
624:   } else {
625:     if (addv == INSERT_VALUES) {
626:       for (i=0; i<n; i++) yv[tslots[i]] = xv[first + i*step];
627:     } else if (addv == ADD_VALUES) {
628:       for (i=0; i<n; i++) yv[tslots[i]] += xv[first + i*step];
629: #if !defined(PETSC_USE_COMPLEX)
630:     } else if (addv == MAX_VALUES) {
631:       for (i=0; i<n; i++) yv[tslots[i]] = PetscMax(yv[tslots[i]],xv[first + i*step]);
632: #endif
633:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
634:   }
635:   VecRestoreArrayPair(x,y,&xv,&yv);
636:   return(0);
637: }

639: PetscErrorCode VecScatterView_SSToSG(VecScatter in,PetscViewer viewer)
640: {
641:   PetscErrorCode         ierr;
642:   VecScatter_Seq_Stride  *in_from = (VecScatter_Seq_Stride*)in->fromdata;
643:   VecScatter_Seq_General *in_to   = (VecScatter_Seq_General*)in->todata;
644:   PetscInt               i;
645:   PetscBool              isascii;

648:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
649:   if (isascii) {
650:     PetscViewerASCIIPrintf(viewer,"Sequential stride to general scatter\n");
651:     for (i=0; i<in_to->n; i++) {
652:       PetscViewerASCIIPrintf(viewer,"%D to %D\n",in_from->first + in_from->step*i,in_to->vslots[i]);
653:     }
654:   }
655:   return(0);
656: }
657: /*
658:      Scatter: sequential stride to sequential stride
659: */
660: PetscErrorCode VecScatterBegin_SSToSS(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
661: {
662:   VecScatter_Seq_Stride *gen_to   = (VecScatter_Seq_Stride*)ctx->todata;
663:   VecScatter_Seq_Stride *gen_from = (VecScatter_Seq_Stride*)ctx->fromdata;
664:   PetscInt              i,n = gen_from->n,to_first = gen_to->first,to_step = gen_to->step;
665:   PetscErrorCode        ierr;
666:   PetscInt              from_first = gen_from->first,from_step = gen_from->step;
667:   PetscScalar           *xv,*yv;

670: #if defined(PETSC_HAVE_CUSP)
671:   if (x->valid_GPU_array == PETSC_CUSP_GPU) {
672:     /* create the scatter indices if not done already */
673:     if (!ctx->spptr) {
674:       PetscInt *tslots = 0,*fslots = 0;
675:       VecScatterCUSPIndicesCreate_StoS(n,to_first,from_first,to_step,from_step,tslots,fslots,(PetscCUSPIndices*)&(ctx->spptr));
676:     }
677:     /* next do the scatter */
678:     VecScatterCUSP_StoS(x,y,(PetscCUSPIndices)ctx->spptr,addv,mode);
679:     return(0);
680:   }
681: #elif defined(PETSC_HAVE_VECCUDA)
682:   if (x->valid_GPU_array == PETSC_CUDA_GPU) {
683:     /* create the scatter indices if not done already */
684:     if (!ctx->spptr) {
685:       PetscInt *tslots = 0,*fslots = 0;
686:       VecScatterCUDAIndicesCreate_StoS(n,to_first,from_first,to_step,from_step,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
687:     }
688:     /* next do the scatter */
689:     VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
690:     return(0);
691:   }
692: #endif

694:   VecGetArrayPair(x,y,&xv,&yv);
695:   if (mode & SCATTER_REVERSE) {
696:     from_first = gen_to->first;
697:     to_first   = gen_from->first;
698:     from_step  = gen_to->step;
699:     to_step    = gen_from->step;
700:   }

702:   if (addv == INSERT_VALUES) {
703:     if (to_step == 1 && from_step == 1) {
704:       PetscMemcpy(yv+to_first,xv+from_first,n*sizeof(PetscScalar));
705:     } else  {
706:       for (i=0; i<n; i++) yv[to_first + i*to_step] = xv[from_first+i*from_step];
707:     }
708:   } else if (addv == ADD_VALUES) {
709:     if (to_step == 1 && from_step == 1) {
710:       yv += to_first; xv += from_first;
711:       for (i=0; i<n; i++) yv[i] += xv[i];
712:     } else {
713:       for (i=0; i<n; i++) yv[to_first + i*to_step] += xv[from_first+i*from_step];
714:     }
715: #if !defined(PETSC_USE_COMPLEX)
716:   } else if (addv == MAX_VALUES) {
717:     if (to_step == 1 && from_step == 1) {
718:       yv += to_first; xv += from_first;
719:       for (i=0; i<n; i++) yv[i] = PetscMax(yv[i],xv[i]);
720:     } else {
721:       for (i=0; i<n; i++) yv[to_first + i*to_step] = PetscMax(yv[to_first + i*to_step],xv[from_first+i*from_step]);
722:     }
723: #endif
724:   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
725:   VecRestoreArrayPair(x,y,&xv,&yv);
726:   return(0);
727: }

729: /* --------------------------------------------------------------------------*/


732: PetscErrorCode VecScatterCopy_SGToSG(VecScatter in,VecScatter out)
733: {
734:   PetscErrorCode         ierr;
735:   VecScatter_Seq_General *in_to   = (VecScatter_Seq_General*)in->todata,*out_to = NULL;
736:   VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata,*out_from = NULL;

739:   out->ops->begin   = in->ops->begin;
740:   out->ops->end     = in->ops->end;
741:   out->ops->copy    = in->ops->copy;
742:   out->ops->destroy = in->ops->destroy;
743:   out->ops->view    = in->ops->view;

745:   PetscMalloc2(1,&out_to,1,&out_from);
746:   PetscMalloc2(in_to->n,&out_to->vslots,in_from->n,&out_from->vslots);
747:   out_to->n                    = in_to->n;
748:   out_to->type                 = in_to->type;
749:   out_to->nonmatching_computed = PETSC_FALSE;
750:   out_to->slots_nonmatching    = 0;
751:   out_to->is_copy              = PETSC_FALSE;
752:   PetscMemcpy(out_to->vslots,in_to->vslots,(out_to->n)*sizeof(PetscInt));

754:   out_from->n                    = in_from->n;
755:   out_from->type                 = in_from->type;
756:   out_from->nonmatching_computed = PETSC_FALSE;
757:   out_from->slots_nonmatching    = 0;
758:   out_from->is_copy              = PETSC_FALSE;
759:   PetscMemcpy(out_from->vslots,in_from->vslots,(out_from->n)*sizeof(PetscInt));

761:   out->todata   = (void*)out_to;
762:   out->fromdata = (void*)out_from;
763:   return(0);
764: }

766: PetscErrorCode VecScatterView_SGToSG(VecScatter in,PetscViewer viewer)
767: {
768:   PetscErrorCode         ierr;
769:   VecScatter_Seq_General *in_to   = (VecScatter_Seq_General*)in->todata;
770:   VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata;
771:   PetscInt               i;
772:   PetscBool              isascii;

775:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
776:   if (isascii) {
777:     PetscViewerASCIIPrintf(viewer,"Sequential general scatter\n");
778:     for (i=0; i<in_to->n; i++) {
779:       PetscViewerASCIIPrintf(viewer,"%D to %D\n",in_from->vslots[i],in_to->vslots[i]);
780:     }
781:   }
782:   return(0);
783: }


786: PetscErrorCode VecScatterCopy_SGToSS(VecScatter in,VecScatter out)
787: {
788:   PetscErrorCode         ierr;
789:   VecScatter_Seq_Stride  *in_to   = (VecScatter_Seq_Stride*)in->todata,*out_to = NULL;
790:   VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata,*out_from = NULL;

793:   out->ops->begin   = in->ops->begin;
794:   out->ops->end     = in->ops->end;
795:   out->ops->copy    = in->ops->copy;
796:   out->ops->destroy = in->ops->destroy;
797:   out->ops->view    = in->ops->view;

799:   PetscMalloc2(1,&out_to,1,&out_from);
800:   PetscMalloc1(in_from->n,&out_from->vslots);
801:   out_to->n     = in_to->n;
802:   out_to->type  = in_to->type;
803:   out_to->first = in_to->first;
804:   out_to->step  = in_to->step;
805:   out_to->type  = in_to->type;

807:   out_from->n                    = in_from->n;
808:   out_from->type                 = in_from->type;
809:   out_from->nonmatching_computed = PETSC_FALSE;
810:   out_from->slots_nonmatching    = 0;
811:   out_from->is_copy              = PETSC_FALSE;
812:   PetscMemcpy(out_from->vslots,in_from->vslots,(out_from->n)*sizeof(PetscInt));

814:   out->todata   = (void*)out_to;
815:   out->fromdata = (void*)out_from;
816:   return(0);
817: }

819: PetscErrorCode VecScatterView_SGToSS(VecScatter in,PetscViewer viewer)
820: {
821:   PetscErrorCode         ierr;
822:   VecScatter_Seq_Stride  *in_to   = (VecScatter_Seq_Stride*)in->todata;
823:   VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata;
824:   PetscInt               i;
825:   PetscBool              isascii;

828:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
829:   if (isascii) {
830:     PetscViewerASCIIPrintf(viewer,"Sequential general scatter to stride\n");
831:     for (i=0; i<in_to->n; i++) {
832:       PetscViewerASCIIPrintf(viewer,"%D to %D\n",in_from->vslots[i],in_to->first + in_to->step*i);
833:     }
834:   }
835:   return(0);
836: }

838: /* --------------------------------------------------------------------------*/
839: /*
840:     Scatter: parallel to sequential vector, sequential strides for both.
841: */
842: PetscErrorCode VecScatterCopy_SSToSS(VecScatter in,VecScatter out)
843: {
844:   VecScatter_Seq_Stride *in_to   = (VecScatter_Seq_Stride*)in->todata,*out_to = NULL;
845:   VecScatter_Seq_Stride *in_from = (VecScatter_Seq_Stride*)in->fromdata,*out_from = NULL;
846:   PetscErrorCode        ierr;

849:   out->ops->begin   = in->ops->begin;
850:   out->ops->end     = in->ops->end;
851:   out->ops->copy    = in->ops->copy;
852:   out->ops->destroy = in->ops->destroy;
853:   out->ops->view    = in->ops->view;

855:   PetscMalloc2(1,&out_to,1,&out_from);
856:   out_to->n       = in_to->n;
857:   out_to->type    = in_to->type;
858:   out_to->first   = in_to->first;
859:   out_to->step    = in_to->step;
860:   out_to->type    = in_to->type;
861:   out_from->n     = in_from->n;
862:   out_from->type  = in_from->type;
863:   out_from->first = in_from->first;
864:   out_from->step  = in_from->step;
865:   out_from->type  = in_from->type;
866:   out->todata     = (void*)out_to;
867:   out->fromdata   = (void*)out_from;
868:   return(0);
869: }

871: PetscErrorCode VecScatterView_SSToSS(VecScatter in,PetscViewer viewer)
872: {
873:   VecScatter_Seq_Stride *in_to   = (VecScatter_Seq_Stride*)in->todata;
874:   VecScatter_Seq_Stride *in_from = (VecScatter_Seq_Stride*)in->fromdata;
875:   PetscErrorCode        ierr;
876:   PetscBool             isascii;

879:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
880:   if (isascii) {
881:     PetscViewerASCIIPrintf(viewer,"Sequential stride count %D start %D step to start %D stride %D\n",in_to->n,in_to->first,in_to->step,in_from->first,in_from->step);
882:   }
883:   return(0);
884: }


887: extern PetscErrorCode VecScatterCreate_PtoS(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter);
888: extern PetscErrorCode VecScatterCreate_PtoP(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter);
889: extern PetscErrorCode VecScatterCreate_StoP(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter);

891: /* =======================================================================*/
892: #define VEC_SEQ_ID 0
893: #define VEC_MPI_ID 1
894: #define IS_GENERAL_ID 0
895: #define IS_STRIDE_ID  1
896: #define IS_BLOCK_ID   2

898: /*
899:    Blocksizes we have optimized scatters for
900: */

902: #define VecScatterOptimizedBS(mbs) (2 <= mbs)


905: PetscErrorCode  VecScatterCreateEmpty(MPI_Comm comm,VecScatter *newctx)
906: {
907:   VecScatter     ctx;

911:   PetscHeaderCreate(ctx,VEC_SCATTER_CLASSID,"VecScatter","VecScatter","Vec",comm,VecScatterDestroy,VecScatterView);
912:   ctx->inuse               = PETSC_FALSE;
913:   ctx->beginandendtogether = PETSC_FALSE;
914:   PetscOptionsGetBool(NULL,NULL,"-vecscatter_merge",&ctx->beginandendtogether,NULL);
915:   if (ctx->beginandendtogether) {
916:     PetscInfo(ctx,"Using combined (merged) vector scatter begin and end\n");
917:   }
918:   ctx->packtogether = PETSC_FALSE;
919:   PetscOptionsGetBool(NULL,NULL,"-vecscatter_packtogether",&ctx->packtogether,NULL);
920:   if (ctx->packtogether) {
921:     PetscInfo(ctx,"Pack all messages before sending\n");
922:   }
923:   *newctx = ctx;
924:   return(0);
925: }

927: /*@C
928:    VecScatterCreate - Creates a vector scatter context.

930:    Collective on Vec

932:    Input Parameters:
933: +  xin - a vector that defines the shape (parallel data layout of the vector)
934:          of vectors from which we scatter
935: .  yin - a vector that defines the shape (parallel data layout of the vector)
936:          of vectors to which we scatter
937: .  ix - the indices of xin to scatter (if NULL scatters all values)
938: -  iy - the indices of yin to hold results (if NULL fills entire vector yin)

940:    Output Parameter:
941: .  newctx - location to store the new scatter context

943:    Options Database Keys: (uses regular MPI_Sends by default)
944: +  -vecscatter_view         - Prints detail of communications
945: .  -vecscatter_view ::ascii_info    - Print less details about communication
946: .  -vecscatter_ssend        - Uses MPI_Ssend_init() instead of MPI_Send_init()
947: .  -vecscatter_rsend           - use ready receiver mode for MPI sends
948: .  -vecscatter_merge        - VecScatterBegin() handles all of the communication, VecScatterEnd() is a nop
949:                               eliminates the chance for overlap of computation and communication
950: .  -vecscatter_sendfirst    - Posts sends before receives
951: .  -vecscatter_packtogether - Pack all messages before sending, receive all messages before unpacking
952: .  -vecscatter_alltoall     - Uses MPI all to all communication for scatter
953: .  -vecscatter_window       - Use MPI 2 window operations to move data
954: .  -vecscatter_nopack       - Avoid packing to work vector when possible (if used with -vecscatter_alltoall then will use MPI_Alltoallw()
955: -  -vecscatter_reproduce    - insure that the order of the communications are done the same for each scatter, this under certain circumstances
956:                               will make the results of scatters deterministic when otherwise they are not (it may be slower also).

958: $
959: $                                                                                    --When packing is used--
960: $                               MPI Datatypes (no packing)  sendfirst   merge        packtogether  persistent*
961: $                                _nopack                   _sendfirst    _merge      _packtogether                -vecscatter_
962: $ ----------------------------------------------------------------------------------------------------------------------------
963: $    Message passing    Send       p                           X            X           X         always
964: $                      Ssend       p                           X            X           X         always          _ssend
965: $                      Rsend       p                        nonsense        X           X         always          _rsend
966: $    AlltoAll  v or w              X                        nonsense     always         X         nonsense        _alltoall
967: $    MPI_Win                       p                        nonsense        p           p         nonsense        _window
968: $
969: $   Since persistent sends and receives require a constant memory address they can only be used when data is packed into the work vector
970: $   because the in and out array may be different for each call to VecScatterBegin/End().
971: $
972: $    p indicates possible, but not implemented. X indicates implemented
973: $

975:     Level: intermediate

977:   Notes:
978:    In calls to VecScatterBegin() and VecScatterEnd() you can use different vectors than the xin and
979:    yin you used above; BUT they must have the same parallel data layout, for example,
980:    they could be obtained from VecDuplicate().
981:    A VecScatter context CANNOT be used in two or more simultaneous scatters;
982:    that is you cannot call a second VecScatterBegin() with the same scatter
983:    context until the VecScatterEnd() has been called on the first VecScatterBegin().
984:    In this case a separate VecScatter is needed for each concurrent scatter.

986:   Developer Notes:
987:    Currently the MPI_Send(), MPI_Ssend() and MPI_Rsend() all use PERSISTENT versions.
988:    (this unfortunately requires that the same in and out arrays be used for each use, this
989:     is why when not using MPI_alltoallw() we always need to pack the input into the work array before sending
990:     and unpack upon receiving instead of using MPI datatypes to avoid the packing/unpacking).

992:    Both ix and iy cannot be NULL at the same time.

994:    Concepts: scatter^between vectors
995:    Concepts: gather^between vectors

997: .seealso: VecScatterDestroy(), VecScatterCreateToAll(), VecScatterCreateToZero()
998: @*/
999: PetscErrorCode  VecScatterCreate(Vec xin,IS ix,Vec yin,IS iy,VecScatter *newctx)
1000: {
1001:   VecScatter        ctx;
1002:   PetscErrorCode    ierr;
1003:   PetscMPIInt       size;
1004:   PetscInt          xin_type = VEC_SEQ_ID,yin_type = VEC_SEQ_ID,*range;
1005:   PetscInt          ix_type  = IS_GENERAL_ID,iy_type = IS_GENERAL_ID;
1006:   MPI_Comm          comm,ycomm;
1007:   PetscBool         totalv,ixblock,iyblock,iystride,islocal,cando,flag;
1008:   IS                tix = 0,tiy = 0;

1011:   if (!ix && !iy) SETERRQ(PetscObjectComm((PetscObject)xin),PETSC_ERR_SUP,"Cannot pass default in for both input and output indices");

1013:   /*
1014:       Determine if the vectors are "parallel", ie. it shares a comm with other processors, or
1015:       sequential (it does not share a comm). The difference is that parallel vectors treat the
1016:       index set as providing indices in the global parallel numbering of the vector, with
1017:       sequential vectors treat the index set as providing indices in the local sequential
1018:       numbering
1019:   */
1020:   PetscObjectGetComm((PetscObject)xin,&comm);
1021:   MPI_Comm_size(comm,&size);
1022:   if (size > 1) xin_type = VEC_MPI_ID;

1024:   PetscObjectGetComm((PetscObject)yin,&ycomm);
1025:   MPI_Comm_size(ycomm,&size);
1026:   if (size > 1) {comm = ycomm; yin_type = VEC_MPI_ID;}

1028:   /* generate the Scatter context */
1029:   PetscHeaderCreate(ctx,VEC_SCATTER_CLASSID,"VecScatter","VecScatter","Vec",comm,VecScatterDestroy,VecScatterView);
1030:   ctx->inuse               = PETSC_FALSE;

1032:   ctx->beginandendtogether = PETSC_FALSE;
1033:   PetscOptionsGetBool(NULL,NULL,"-vecscatter_merge",&ctx->beginandendtogether,NULL);
1034:   if (ctx->beginandendtogether) {
1035:     PetscInfo(ctx,"Using combined (merged) vector scatter begin and end\n");
1036:   }
1037:   ctx->packtogether = PETSC_FALSE;
1038:   PetscOptionsGetBool(NULL,NULL,"-vecscatter_packtogether",&ctx->packtogether,NULL);
1039:   if (ctx->packtogether) {
1040:     PetscInfo(ctx,"Pack all messages before sending\n");
1041:   }

1043:   VecGetLocalSize(xin,&ctx->from_n);
1044:   VecGetLocalSize(yin,&ctx->to_n);

1046:   /*
1047:       if ix or iy is not included; assume just grabbing entire vector
1048:   */
1049:   if (!ix && xin_type == VEC_SEQ_ID) {
1050:     ISCreateStride(comm,ctx->from_n,0,1,&ix);
1051:     tix  = ix;
1052:   } else if (!ix && xin_type == VEC_MPI_ID) {
1053:     if (yin_type == VEC_MPI_ID) {
1054:       PetscInt ntmp, low;
1055:       VecGetLocalSize(xin,&ntmp);
1056:       VecGetOwnershipRange(xin,&low,NULL);
1057:       ISCreateStride(comm,ntmp,low,1,&ix);
1058:     } else {
1059:       PetscInt Ntmp;
1060:       VecGetSize(xin,&Ntmp);
1061:       ISCreateStride(comm,Ntmp,0,1,&ix);
1062:     }
1063:     tix = ix;
1064:   } else if (!ix) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ix not given, but not Seq or MPI vector");

1066:   if (!iy && yin_type == VEC_SEQ_ID) {
1067:     ISCreateStride(comm,ctx->to_n,0,1,&iy);
1068:     tiy  = iy;
1069:   } else if (!iy && yin_type == VEC_MPI_ID) {
1070:     if (xin_type == VEC_MPI_ID) {
1071:       PetscInt ntmp, low;
1072:       VecGetLocalSize(yin,&ntmp);
1073:       VecGetOwnershipRange(yin,&low,NULL);
1074:       ISCreateStride(comm,ntmp,low,1,&iy);
1075:     } else {
1076:       PetscInt Ntmp;
1077:       VecGetSize(yin,&Ntmp);
1078:       ISCreateStride(comm,Ntmp,0,1,&iy);
1079:     }
1080:     tiy = iy;
1081:   } else if (!iy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"iy not given, but not Seq or MPI vector");

1083:   /*
1084:      Determine types of index sets
1085:   */
1086:   PetscObjectTypeCompare((PetscObject)ix,ISBLOCK,&flag);
1087:   if (flag) ix_type = IS_BLOCK_ID;
1088:   PetscObjectTypeCompare((PetscObject)iy,ISBLOCK,&flag);
1089:   if (flag) iy_type = IS_BLOCK_ID;
1090:   PetscObjectTypeCompare((PetscObject)ix,ISSTRIDE,&flag);
1091:   if (flag) ix_type = IS_STRIDE_ID;
1092:   PetscObjectTypeCompare((PetscObject)iy,ISSTRIDE,&flag);
1093:   if (flag) iy_type = IS_STRIDE_ID;

1095:   /* ===========================================================================================================
1096:         Check for special cases
1097:      ==========================================================================================================*/
1098:   /* ---------------------------------------------------------------------------*/
1099:   if (xin_type == VEC_SEQ_ID && yin_type == VEC_SEQ_ID) {
1100:     if (ix_type == IS_GENERAL_ID && iy_type == IS_GENERAL_ID) {
1101:       PetscInt               nx,ny;
1102:       const PetscInt         *idx,*idy;
1103:       VecScatter_Seq_General *to = NULL,*from = NULL;

1105:       ISGetLocalSize(ix,&nx);
1106:       ISGetLocalSize(iy,&ny);
1107:       if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1108:       ISGetIndices(ix,&idx);
1109:       ISGetIndices(iy,&idy);
1110:       PetscMalloc2(1,&to,1,&from);
1111:       PetscMalloc2(nx,&to->vslots,nx,&from->vslots);
1112:       to->n = nx;
1113: #if defined(PETSC_USE_DEBUG)
1114:       VecScatterCheckIndices_Private(ctx->to_n,ny,idy);
1115: #endif
1116:       PetscMemcpy(to->vslots,idy,nx*sizeof(PetscInt));
1117:       from->n = nx;
1118: #if defined(PETSC_USE_DEBUG)
1119:       VecScatterCheckIndices_Private(ctx->from_n,nx,idx);
1120: #endif
1121:        PetscMemcpy(from->vslots,idx,nx*sizeof(PetscInt));
1122:       to->type          = VEC_SCATTER_SEQ_GENERAL;
1123:       from->type        = VEC_SCATTER_SEQ_GENERAL;
1124:       ctx->todata       = (void*)to;
1125:       ctx->fromdata     = (void*)from;
1126:       ctx->ops->begin   = VecScatterBegin_SGToSG;
1127:       ctx->ops->end     = 0;
1128:       ctx->ops->destroy = VecScatterDestroy_SGToSG;
1129:       ctx->ops->copy    = VecScatterCopy_SGToSG;
1130:       ctx->ops->view    = VecScatterView_SGToSG;
1131:       PetscInfo(xin,"Special case: sequential vector general scatter\n");
1132:       goto functionend;
1133:     } else if (ix_type == IS_STRIDE_ID &&  iy_type == IS_STRIDE_ID) {
1134:       PetscInt              nx,ny,to_first,to_step,from_first,from_step;
1135:       VecScatter_Seq_Stride *from8 = NULL,*to8 = NULL;

1137:       ISGetLocalSize(ix,&nx);
1138:       ISGetLocalSize(iy,&ny);
1139:       if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1140:       ISStrideGetInfo(iy,&to_first,&to_step);
1141:       ISStrideGetInfo(ix,&from_first,&from_step);
1142:       PetscMalloc2(1,&to8,1,&from8);
1143:       to8->n            = nx;
1144:       to8->first        = to_first;
1145:       to8->step         = to_step;
1146:       from8->n          = nx;
1147:       from8->first      = from_first;
1148:       from8->step       = from_step;
1149:       to8->type         = VEC_SCATTER_SEQ_STRIDE;
1150:       from8->type       = VEC_SCATTER_SEQ_STRIDE;
1151:       ctx->todata       = (void*)to8;
1152:       ctx->fromdata     = (void*)from8;
1153:       ctx->ops->begin   = VecScatterBegin_SSToSS;
1154:       ctx->ops->end     = 0;
1155:       ctx->ops->destroy = VecScatterDestroy_SSToSS;
1156:       ctx->ops->copy    = VecScatterCopy_SSToSS;
1157:       ctx->ops->view    = VecScatterView_SSToSS;
1158:       PetscInfo(xin,"Special case: sequential vector stride to stride\n");
1159:       goto functionend;
1160:     } else if (ix_type == IS_GENERAL_ID && iy_type == IS_STRIDE_ID) {
1161:       PetscInt               nx,ny,first,step;
1162:       const PetscInt         *idx;
1163:       VecScatter_Seq_General *from9 = NULL;
1164:       VecScatter_Seq_Stride  *to9   = NULL;

1166:       ISGetLocalSize(ix,&nx);
1167:       ISGetIndices(ix,&idx);
1168:       ISGetLocalSize(iy,&ny);
1169:       ISStrideGetInfo(iy,&first,&step);
1170:       if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1171:       PetscMalloc2(1,&to9,1,&from9);
1172:       PetscMalloc1(nx,&from9->vslots);
1173:       to9->n     = nx;
1174:       to9->first = first;
1175:       to9->step  = step;
1176:       from9->n   = nx;
1177: #if defined(PETSC_USE_DEBUG)
1178:       VecScatterCheckIndices_Private(ctx->from_n,nx,idx);
1179: #endif
1180:       PetscMemcpy(from9->vslots,idx,nx*sizeof(PetscInt));
1181:       ctx->todata    = (void*)to9; ctx->fromdata = (void*)from9;
1182:       if (step == 1)  ctx->ops->begin = VecScatterBegin_SGToSS_Stride1;
1183:       else            ctx->ops->begin = VecScatterBegin_SGToSS;
1184:       ctx->ops->destroy   = VecScatterDestroy_SGToSS;
1185:       ctx->ops->end       = 0;
1186:       ctx->ops->copy      = VecScatterCopy_SGToSS;
1187:       ctx->ops->view      = VecScatterView_SGToSS;
1188:       to9->type      = VEC_SCATTER_SEQ_STRIDE;
1189:       from9->type    = VEC_SCATTER_SEQ_GENERAL;
1190:       PetscInfo(xin,"Special case: sequential vector general to stride\n");
1191:       goto functionend;
1192:     } else if (ix_type == IS_STRIDE_ID && iy_type == IS_GENERAL_ID) {
1193:       PetscInt               nx,ny,first,step;
1194:       const PetscInt         *idy;
1195:       VecScatter_Seq_General *to10 = NULL;
1196:       VecScatter_Seq_Stride  *from10 = NULL;

1198:       ISGetLocalSize(ix,&nx);
1199:       ISGetIndices(iy,&idy);
1200:       ISGetLocalSize(iy,&ny);
1201:       ISStrideGetInfo(ix,&first,&step);
1202:       if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1203:       PetscMalloc2(1,&to10,1,&from10);
1204:       PetscMalloc1(nx,&to10->vslots);
1205:       from10->n     = nx;
1206:       from10->first = first;
1207:       from10->step  = step;
1208:       to10->n       = nx;
1209: #if defined(PETSC_USE_DEBUG)
1210:       VecScatterCheckIndices_Private(ctx->to_n,ny,idy);
1211: #endif
1212:       PetscMemcpy(to10->vslots,idy,nx*sizeof(PetscInt));
1213:       ctx->todata   = (void*)to10;
1214:       ctx->fromdata = (void*)from10;
1215:       if (step == 1) ctx->ops->begin = VecScatterBegin_SSToSG_Stride1;
1216:       else           ctx->ops->begin = VecScatterBegin_SSToSG;
1217:       ctx->ops->destroy = VecScatterDestroy_SSToSG;
1218:       ctx->ops->end     = 0;
1219:       ctx->ops->copy    = 0;
1220:       ctx->ops->view    = VecScatterView_SSToSG;
1221:       to10->type   = VEC_SCATTER_SEQ_GENERAL;
1222:       from10->type = VEC_SCATTER_SEQ_STRIDE;
1223:       PetscInfo(xin,"Special case: sequential vector stride to general\n");
1224:       goto functionend;
1225:     } else {
1226:       PetscInt               nx,ny;
1227:       const PetscInt         *idx,*idy;
1228:       VecScatter_Seq_General *to11 = NULL,*from11 = NULL;
1229:       PetscBool              idnx,idny;

1231:       ISGetLocalSize(ix,&nx);
1232:       ISGetLocalSize(iy,&ny);
1233:       if (nx != ny) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match, in %D out %D",nx,ny);

1235:       ISIdentity(ix,&idnx);
1236:       ISIdentity(iy,&idny);
1237:       if (idnx && idny) {
1238:         VecScatter_Seq_Stride *to13 = NULL,*from13 = NULL;
1239:         PetscMalloc2(1,&to13,1,&from13);
1240:         to13->n       = nx;
1241:         to13->first   = 0;
1242:         to13->step    = 1;
1243:         from13->n     = nx;
1244:         from13->first = 0;
1245:         from13->step  = 1;
1246:         to13->type    = VEC_SCATTER_SEQ_STRIDE;
1247:         from13->type  = VEC_SCATTER_SEQ_STRIDE;
1248:         ctx->todata   = (void*)to13;
1249:         ctx->fromdata = (void*)from13;
1250:         ctx->ops->begin    = VecScatterBegin_SSToSS;
1251:         ctx->ops->end      = 0;
1252:         ctx->ops->destroy  = VecScatterDestroy_SSToSS;
1253:         ctx->ops->copy     = VecScatterCopy_SSToSS;
1254:         ctx->ops->view     = VecScatterView_SSToSS;
1255:         PetscInfo(xin,"Special case: sequential copy\n");
1256:         goto functionend;
1257:       }

1259:       ISGetIndices(iy,&idy);
1260:       ISGetIndices(ix,&idx);
1261:       PetscMalloc2(1,&to11,1,&from11);
1262:       PetscMalloc2(nx,&to11->vslots,nx,&from11->vslots);
1263:       to11->n = nx;
1264: #if defined(PETSC_USE_DEBUG)
1265:       VecScatterCheckIndices_Private(ctx->to_n,ny,idy);
1266: #endif
1267:       PetscMemcpy(to11->vslots,idy,nx*sizeof(PetscInt));
1268:       from11->n = nx;
1269: #if defined(PETSC_USE_DEBUG)
1270:       VecScatterCheckIndices_Private(ctx->from_n,nx,idx);
1271: #endif
1272:       PetscMemcpy(from11->vslots,idx,nx*sizeof(PetscInt));
1273:       to11->type        = VEC_SCATTER_SEQ_GENERAL;
1274:       from11->type      = VEC_SCATTER_SEQ_GENERAL;
1275:       ctx->todata       = (void*)to11;
1276:       ctx->fromdata     = (void*)from11;
1277:       ctx->ops->begin   = VecScatterBegin_SGToSG;
1278:       ctx->ops->end     = 0;
1279:       ctx->ops->destroy = VecScatterDestroy_SGToSG;
1280:       ctx->ops->copy    = VecScatterCopy_SGToSG;
1281:       ctx->ops->view    = VecScatterView_SGToSG;
1282:       ISRestoreIndices(ix,&idx);
1283:       ISRestoreIndices(iy,&idy);
1284:       PetscInfo(xin,"Sequential vector scatter with block indices\n");
1285:       goto functionend;
1286:     }
1287:   }
1288:   /* ---------------------------------------------------------------------------*/
1289:   if (xin_type == VEC_MPI_ID && yin_type == VEC_SEQ_ID) {

1291:     /* ===========================================================================================================
1292:           Check for special cases
1293:        ==========================================================================================================*/
1294:     islocal = PETSC_FALSE;
1295:     /* special case extracting (subset of) local portion */
1296:     if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
1297:       PetscInt              nx,ny,to_first,to_step,from_first,from_step;
1298:       PetscInt              start,end,min,max;
1299:       VecScatter_Seq_Stride *from12 = NULL,*to12 = NULL;

1301:       VecGetOwnershipRange(xin,&start,&end);
1302:       ISGetLocalSize(ix,&nx);
1303:       ISStrideGetInfo(ix,&from_first,&from_step);
1304:       ISGetLocalSize(iy,&ny);
1305:       ISStrideGetInfo(iy,&to_first,&to_step);
1306:       if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1307:       ISGetMinMax(ix,&min,&max);
1308:       if (min >= start && max < end) islocal = PETSC_TRUE;
1309:       else islocal = PETSC_FALSE;
1310:       /* cannot use MPIU_Allreduce() since this call matches with the MPI_Allreduce() in the else statement below */
1311:       MPI_Allreduce(&islocal,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
1312:       if (cando) {
1313:         PetscMalloc2(1,&to12,1,&from12);
1314:         to12->n            = nx;
1315:         to12->first        = to_first;
1316:         to12->step         = to_step;
1317:         from12->n          = nx;
1318:         from12->first      = from_first-start;
1319:         from12->step       = from_step;
1320:         to12->type         = VEC_SCATTER_SEQ_STRIDE;
1321:         from12->type       = VEC_SCATTER_SEQ_STRIDE;
1322:         ctx->todata        = (void*)to12;
1323:         ctx->fromdata      = (void*)from12;
1324:         ctx->ops->begin    = VecScatterBegin_SSToSS;
1325:         ctx->ops->end      = 0;
1326:         ctx->ops->destroy  = VecScatterDestroy_SSToSS;
1327:         ctx->ops->copy     = VecScatterCopy_SSToSS;
1328:         ctx->ops->view     = VecScatterView_SSToSS;
1329:         PetscInfo(xin,"Special case: processors only getting local values\n");
1330:         goto functionend;
1331:       }
1332:     } else {
1333:       MPI_Allreduce(&islocal,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
1334:     }

1336:     /* test for special case of all processors getting entire vector */
1337:     /* contains check that PetscMPIInt can handle the sizes needed */
1338:     totalv = PETSC_FALSE;
1339:     if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
1340:       PetscInt             i,nx,ny,to_first,to_step,from_first,from_step,N;
1341:       PetscMPIInt          *count = NULL,*displx;
1342:       VecScatter_MPI_ToAll *sto   = NULL;

1344:       ISGetLocalSize(ix,&nx);
1345:       ISStrideGetInfo(ix,&from_first,&from_step);
1346:       ISGetLocalSize(iy,&ny);
1347:       ISStrideGetInfo(iy,&to_first,&to_step);
1348:       if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1349:       VecGetSize(xin,&N);
1350:       if (nx != N) totalv = PETSC_FALSE;
1351:       else if (from_first == 0 && from_step == 1 && from_first == to_first && from_step == to_step) totalv = PETSC_TRUE;
1352:       else totalv = PETSC_FALSE;
1353:       /* cannot use MPIU_Allreduce() since this call matches with the MPI_Allreduce() in the else statement below */
1354:       MPI_Allreduce(&totalv,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));

1356: #if defined(PETSC_USE_64BIT_INDICES)
1357:       if (cando && (yin->map->N < PETSC_MPI_INT_MAX)) {
1358: #else
1359:       if (cando) {
1360: #endif
1361:         MPI_Comm_size(PetscObjectComm((PetscObject)ctx),&size);
1362:         PetscMalloc3(1,&sto,size,&count,size,&displx);
1363:         range = xin->map->range;
1364:         for (i=0; i<size; i++) {
1365:           PetscMPIIntCast(range[i+1] - range[i],count+i);
1366:           PetscMPIIntCast(range[i],displx+i);
1367:         }
1368:         sto->count        = count;
1369:         sto->displx       = displx;
1370:         sto->work1        = 0;
1371:         sto->work2        = 0;
1372:         sto->type         = VEC_SCATTER_MPI_TOALL;
1373:         ctx->todata       = (void*)sto;
1374:         ctx->fromdata     = 0;
1375:         ctx->ops->begin   = VecScatterBegin_MPI_ToAll;
1376:         ctx->ops->end     = 0;
1377:         ctx->ops->destroy = VecScatterDestroy_MPI_ToAll;
1378:         ctx->ops->copy    = VecScatterCopy_MPI_ToAll;
1379:         ctx->ops->view    = VecScatterView_MPI_ToAll;
1380:         PetscInfo(xin,"Special case: all processors get entire parallel vector\n");
1381:         goto functionend;
1382:       }
1383:     } else {
1384:       MPI_Allreduce(&totalv,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
1385:     }

1387:     /* test for special case of processor 0 getting entire vector */
1388:     /* contains check that PetscMPIInt can handle the sizes needed */
1389:     totalv = PETSC_FALSE;
1390:     if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
1391:       PetscInt             i,nx,ny,to_first,to_step,from_first,from_step,N;
1392:       PetscMPIInt          rank,*count = NULL,*displx;
1393:       VecScatter_MPI_ToAll *sto = NULL;

1395:       PetscObjectGetComm((PetscObject)xin,&comm);
1396:       MPI_Comm_rank(comm,&rank);
1397:       ISGetLocalSize(ix,&nx);
1398:       ISStrideGetInfo(ix,&from_first,&from_step);
1399:       ISGetLocalSize(iy,&ny);
1400:       ISStrideGetInfo(iy,&to_first,&to_step);
1401:       if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1402:       if (!rank) {
1403:         VecGetSize(xin,&N);
1404:         if (nx != N) totalv = PETSC_FALSE;
1405:         else if (from_first == 0        && from_step == 1 &&
1406:                  from_first == to_first && from_step == to_step) totalv = PETSC_TRUE;
1407:         else totalv = PETSC_FALSE;
1408:       } else {
1409:         if (!nx) totalv = PETSC_TRUE;
1410:         else     totalv = PETSC_FALSE;
1411:       }
1412:       /* cannot use MPIU_Allreduce() since this call matches with the MPI_Allreduce() in the else statement below */
1413:       MPI_Allreduce(&totalv,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));

1415: #if defined(PETSC_USE_64BIT_INDICES)
1416:       if (cando && (yin->map->N < PETSC_MPI_INT_MAX)) {
1417: #else
1418:       if (cando) {
1419: #endif
1420:         MPI_Comm_size(PetscObjectComm((PetscObject)ctx),&size);
1421:         PetscMalloc3(1,&sto,size,&count,size,&displx);
1422:         range = xin->map->range;
1423:         for (i=0; i<size; i++) {
1424:           PetscMPIIntCast(range[i+1] - range[i],count+i);
1425:           PetscMPIIntCast(range[i],displx+i);
1426:         }
1427:         sto->count        = count;
1428:         sto->displx       = displx;
1429:         sto->work1        = 0;
1430:         sto->work2        = 0;
1431:         sto->type         = VEC_SCATTER_MPI_TOONE;
1432:         ctx->todata       = (void*)sto;
1433:         ctx->fromdata     = 0;
1434:         ctx->ops->begin   = VecScatterBegin_MPI_ToOne;
1435:         ctx->ops->end     = 0;
1436:         ctx->ops->destroy = VecScatterDestroy_MPI_ToAll;
1437:         ctx->ops->copy    = VecScatterCopy_MPI_ToAll;
1438:         ctx->ops->view    = VecScatterView_MPI_ToAll;
1439:         PetscInfo(xin,"Special case: processor zero gets entire parallel vector, rest get none\n");
1440:         goto functionend;
1441:       }
1442:     } else {
1443:       MPI_Allreduce(&totalv,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
1444:     }

1446:     PetscObjectTypeCompare((PetscObject)ix,ISBLOCK,&ixblock);
1447:     PetscObjectTypeCompare((PetscObject)iy,ISBLOCK,&iyblock);
1448:     PetscObjectTypeCompare((PetscObject)iy,ISSTRIDE,&iystride);
1449:     if (ixblock) {
1450:       /* special case block to block */
1451:       if (iyblock) {
1452:         PetscInt       nx,ny,bsx,bsy;
1453:         const PetscInt *idx,*idy;
1454:         ISGetBlockSize(iy,&bsy);
1455:         ISGetBlockSize(ix,&bsx);
1456:         if (bsx == bsy && VecScatterOptimizedBS(bsx)) {
1457:           ISBlockGetLocalSize(ix,&nx);
1458:           ISBlockGetIndices(ix,&idx);
1459:           ISBlockGetLocalSize(iy,&ny);
1460:           ISBlockGetIndices(iy,&idy);
1461:           if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1462:           VecScatterCreate_PtoS(nx,idx,ny,idy,xin,yin,bsx,ctx);
1463:           ISBlockRestoreIndices(ix,&idx);
1464:           ISBlockRestoreIndices(iy,&idy);
1465:           PetscInfo(xin,"Special case: blocked indices\n");
1466:           goto functionend;
1467:         }
1468:         /* special case block to stride */
1469:       } else if (iystride) {
1470:         PetscInt ystart,ystride,ysize,bsx;
1471:         ISStrideGetInfo(iy,&ystart,&ystride);
1472:         ISGetLocalSize(iy,&ysize);
1473:         ISGetBlockSize(ix,&bsx);
1474:         /* see if stride index set is equivalent to block index set */
1475:         if (VecScatterOptimizedBS(bsx) && ((ystart % bsx) == 0) && (ystride == 1) && ((ysize % bsx) == 0)) {
1476:           PetscInt       nx,il,*idy;
1477:           const PetscInt *idx;
1478:           ISBlockGetLocalSize(ix,&nx);
1479:           ISBlockGetIndices(ix,&idx);
1480:           if (ysize != bsx*nx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1481:           PetscMalloc1(nx,&idy);
1482:           if (nx) {
1483:             idy[0] = ystart/bsx;
1484:             for (il=1; il<nx; il++) idy[il] = idy[il-1] + 1;
1485:           }
1486:           VecScatterCreate_PtoS(nx,idx,nx,idy,xin,yin,bsx,ctx);
1487:           PetscFree(idy);
1488:           ISBlockRestoreIndices(ix,&idx);
1489:           PetscInfo(xin,"Special case: blocked indices to stride\n");
1490:           goto functionend;
1491:         }
1492:       }
1493:     }
1494:     /* left over general case */
1495:     {
1496:       PetscInt       nx,ny;
1497:       const PetscInt *idx,*idy;
1498:       ISGetLocalSize(ix,&nx);
1499:       ISGetIndices(ix,&idx);
1500:       ISGetLocalSize(iy,&ny);
1501:       ISGetIndices(iy,&idy);
1502:       if (nx != ny) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match (%d %d)",nx,ny);
1503:       VecScatterCreate_PtoS(nx,idx,ny,idy,xin,yin,1,ctx);
1504:       ISRestoreIndices(ix,&idx);
1505:       ISRestoreIndices(iy,&idy);
1506:       PetscInfo(xin,"General case: MPI to Seq\n");
1507:       goto functionend;
1508:     }
1509:   }
1510:   /* ---------------------------------------------------------------------------*/
1511:   if (xin_type == VEC_SEQ_ID && yin_type == VEC_MPI_ID) {
1512:     /* ===========================================================================================================
1513:           Check for special cases
1514:        ==========================================================================================================*/
1515:     /* special case local copy portion */
1516:     islocal = PETSC_FALSE;
1517:     if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
1518:       PetscInt              nx,ny,to_first,to_step,from_step,start,end,from_first,min,max;
1519:       VecScatter_Seq_Stride *from = NULL,*to = NULL;

1521:       VecGetOwnershipRange(yin,&start,&end);
1522:       ISGetLocalSize(ix,&nx);
1523:       ISStrideGetInfo(ix,&from_first,&from_step);
1524:       ISGetLocalSize(iy,&ny);
1525:       ISStrideGetInfo(iy,&to_first,&to_step);
1526:       if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1527:       ISGetMinMax(iy,&min,&max);
1528:       if (min >= start && max < end) islocal = PETSC_TRUE;
1529:       else islocal = PETSC_FALSE;
1530:       /* cannot use MPIU_Allreduce() since this call matches with the MPI_Allreduce() in the else statement below */
1531:       MPI_Allreduce(&islocal,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)yin));
1532:       if (cando) {
1533:         PetscMalloc2(1,&to,1,&from);
1534:         to->n             = nx;
1535:         to->first         = to_first-start;
1536:         to->step          = to_step;
1537:         from->n           = nx;
1538:         from->first       = from_first;
1539:         from->step        = from_step;
1540:         to->type          = VEC_SCATTER_SEQ_STRIDE;
1541:         from->type        = VEC_SCATTER_SEQ_STRIDE;
1542:         ctx->todata       = (void*)to;
1543:         ctx->fromdata     = (void*)from;
1544:         ctx->ops->begin   = VecScatterBegin_SSToSS;
1545:         ctx->ops->end     = 0;
1546:         ctx->ops->destroy = VecScatterDestroy_SSToSS;
1547:         ctx->ops->copy    = VecScatterCopy_SSToSS;
1548:         ctx->ops->view    = VecScatterView_SSToSS;
1549:         PetscInfo(xin,"Special case: sequential stride to MPI stride\n");
1550:         goto functionend;
1551:       }
1552:     } else {
1553:       MPI_Allreduce(&islocal,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)yin));
1554:     }
1555:     /* special case block to stride */
1556:     if (ix_type == IS_BLOCK_ID && iy_type == IS_STRIDE_ID) {
1557:       PetscInt ystart,ystride,ysize,bsx;
1558:       ISStrideGetInfo(iy,&ystart,&ystride);
1559:       ISGetLocalSize(iy,&ysize);
1560:       ISGetBlockSize(ix,&bsx);
1561:       /* see if stride index set is equivalent to block index set */
1562:       if (VecScatterOptimizedBS(bsx) && ((ystart % bsx) == 0) && (ystride == 1) && ((ysize % bsx) == 0)) {
1563:         PetscInt       nx,il,*idy;
1564:         const PetscInt *idx;
1565:         ISBlockGetLocalSize(ix,&nx);
1566:         ISBlockGetIndices(ix,&idx);
1567:         if (ysize != bsx*nx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1568:         PetscMalloc1(nx,&idy);
1569:         if (nx) {
1570:           idy[0] = ystart/bsx;
1571:           for (il=1; il<nx; il++) idy[il] = idy[il-1] + 1;
1572:         }
1573:         VecScatterCreate_StoP(nx,idx,nx,idy,xin,yin,bsx,ctx);
1574:         PetscFree(idy);
1575:         ISBlockRestoreIndices(ix,&idx);
1576:         PetscInfo(xin,"Special case: Blocked indices to stride\n");
1577:         goto functionend;
1578:       }
1579:     }

1581:     /* general case */
1582:     {
1583:       PetscInt       nx,ny;
1584:       const PetscInt *idx,*idy;
1585:       ISGetLocalSize(ix,&nx);
1586:       ISGetIndices(ix,&idx);
1587:       ISGetLocalSize(iy,&ny);
1588:       ISGetIndices(iy,&idy);
1589:       if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1590:       VecScatterCreate_StoP(nx,idx,ny,idy,xin,yin,1,ctx);
1591:       ISRestoreIndices(ix,&idx);
1592:       ISRestoreIndices(iy,&idy);
1593:       PetscInfo(xin,"General case: Seq to MPI\n");
1594:       goto functionend;
1595:     }
1596:   }
1597:   /* ---------------------------------------------------------------------------*/
1598:   if (xin_type == VEC_MPI_ID && yin_type == VEC_MPI_ID) {
1599:     /* no special cases for now */
1600:     PetscInt       nx,ny;
1601:     const PetscInt *idx,*idy;
1602:     ISGetLocalSize(ix,&nx);
1603:     ISGetIndices(ix,&idx);
1604:     ISGetLocalSize(iy,&ny);
1605:     ISGetIndices(iy,&idy);
1606:     if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1607:     VecScatterCreate_PtoP(nx,idx,ny,idy,xin,yin,1,ctx);
1608:     ISRestoreIndices(ix,&idx);
1609:     ISRestoreIndices(iy,&idy);
1610:     PetscInfo(xin,"General case: MPI to MPI\n");
1611:     goto functionend;
1612:   }

1614:   functionend:
1615:   *newctx = ctx;
1616:   ISDestroy(&tix);
1617:   ISDestroy(&tiy);
1618:   VecScatterViewFromOptions(ctx,NULL,"-vecscatter_view");
1619:   return(0);
1620: }

1622: /* ------------------------------------------------------------------*/
1623: /*@
1624:    VecScatterGetMerged - Returns true if the scatter is completed in the VecScatterBegin()
1625:       and the VecScatterEnd() does nothing

1627:    Not Collective

1629:    Input Parameter:
1630: .   ctx - scatter context created with VecScatterCreate()

1632:    Output Parameter:
1633: .   flg - PETSC_TRUE if the VecScatterBegin/End() are all done during the VecScatterBegin()

1635:    Level: developer

1637: .seealso: VecScatterCreate(), VecScatterEnd(), VecScatterBegin()
1638: @*/
1639: PetscErrorCode  VecScatterGetMerged(VecScatter ctx,PetscBool  *flg)
1640: {
1643:   *flg = ctx->beginandendtogether;
1644:   return(0);
1645: }

1647: /*@
1648:    VecScatterBegin - Begins a generalized scatter from one vector to
1649:    another. Complete the scattering phase with VecScatterEnd().

1651:    Neighbor-wise Collective on VecScatter and Vec

1653:    Input Parameters:
1654: +  inctx - scatter context generated by VecScatterCreate()
1655: .  x - the vector from which we scatter
1656: .  y - the vector to which we scatter
1657: .  addv - either ADD_VALUES or INSERT_VALUES, with INSERT_VALUES mode any location
1658:           not scattered to retains its old value; i.e. the vector is NOT first zeroed.
1659: -  mode - the scattering mode, usually SCATTER_FORWARD.  The available modes are:
1660:     SCATTER_FORWARD or SCATTER_REVERSE


1663:    Level: intermediate

1665:    Options Database: See VecScatterCreate()

1667:    Notes:
1668:    The vectors x and y need not be the same vectors used in the call
1669:    to VecScatterCreate(), but x must have the same parallel data layout
1670:    as that passed in as the x to VecScatterCreate(), similarly for the y.
1671:    Most likely they have been obtained from VecDuplicate().

1673:    You cannot change the values in the input vector between the calls to VecScatterBegin()
1674:    and VecScatterEnd().

1676:    If you use SCATTER_REVERSE the two arguments x and y should be reversed, from
1677:    the SCATTER_FORWARD.

1679:    y[iy[i]] = x[ix[i]], for i=0,...,ni-1

1681:    This scatter is far more general than the conventional
1682:    scatter, since it can be a gather or a scatter or a combination,
1683:    depending on the indices ix and iy.  If x is a parallel vector and y
1684:    is sequential, VecScatterBegin() can serve to gather values to a
1685:    single processor.  Similarly, if y is parallel and x sequential, the
1686:    routine can scatter from one processor to many processors.

1688:    Concepts: scatter^between vectors
1689:    Concepts: gather^between vectors

1691: .seealso: VecScatterCreate(), VecScatterEnd()
1692: @*/
1693: PetscErrorCode  VecScatterBegin(VecScatter inctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
1694: {
1696: #if defined(PETSC_USE_DEBUG)
1697:   PetscInt       to_n,from_n;
1698: #endif
1703:   if (inctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");

1705: #if defined(PETSC_USE_DEBUG)
1706:   /*
1707:      Error checking to make sure these vectors match the vectors used
1708:    to create the vector scatter context. -1 in the from_n and to_n indicate the
1709:    vector lengths are unknown (for example with mapped scatters) and thus
1710:    no error checking is performed.
1711:   */
1712:   if (inctx->from_n >= 0 && inctx->to_n >= 0) {
1713:     VecGetLocalSize(x,&from_n);
1714:     VecGetLocalSize(y,&to_n);
1715:     if (mode & SCATTER_REVERSE) {
1716:       if (to_n != inctx->from_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter reverse and vector to != ctx from size)",to_n,inctx->from_n);
1717:       if (from_n != inctx->to_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter reverse and vector from != ctx to size)",from_n,inctx->to_n);
1718:     } else {
1719:       if (to_n != inctx->to_n)     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter forward and vector to != ctx to size)",to_n,inctx->to_n);
1720:       if (from_n != inctx->from_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter forward and vector from != ctx from size)",from_n,inctx->from_n);
1721:     }
1722:   }
1723: #endif

1725:   inctx->inuse = PETSC_TRUE;
1726:   PetscLogEventBarrierBegin(VEC_ScatterBarrier,0,0,0,0,PetscObjectComm((PetscObject)inctx));
1727:   (*inctx->ops->begin)(inctx,x,y,addv,mode);
1728:   if (inctx->beginandendtogether && inctx->ops->end) {
1729:     inctx->inuse = PETSC_FALSE;
1730:     (*inctx->ops->end)(inctx,x,y,addv,mode);
1731:   }
1732:   PetscLogEventBarrierEnd(VEC_ScatterBarrier,0,0,0,0,PetscObjectComm((PetscObject)inctx));
1733:   return(0);
1734: }

1736: /* --------------------------------------------------------------------*/
1737: /*@
1738:    VecScatterEnd - Ends a generalized scatter from one vector to another.  Call
1739:    after first calling VecScatterBegin().

1741:    Neighbor-wise Collective on VecScatter and Vec

1743:    Input Parameters:
1744: +  ctx - scatter context generated by VecScatterCreate()
1745: .  x - the vector from which we scatter
1746: .  y - the vector to which we scatter
1747: .  addv - either ADD_VALUES or INSERT_VALUES.
1748: -  mode - the scattering mode, usually SCATTER_FORWARD.  The available modes are:
1749:      SCATTER_FORWARD, SCATTER_REVERSE

1751:    Level: intermediate

1753:    Notes:
1754:    If you use SCATTER_REVERSE the arguments x and y should be reversed, from the SCATTER_FORWARD.

1756:    y[iy[i]] = x[ix[i]], for i=0,...,ni-1

1758: .seealso: VecScatterBegin(), VecScatterCreate()
1759: @*/
1760: PetscErrorCode  VecScatterEnd(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
1761: {

1768:   ctx->inuse = PETSC_FALSE;
1769:   if (!ctx->ops->end) return(0);
1770:   if (!ctx->beginandendtogether) {
1771:     PetscLogEventBegin(VEC_ScatterEnd,ctx,x,y,0);
1772:     (*(ctx)->ops->end)(ctx,x,y,addv,mode);
1773:     PetscLogEventEnd(VEC_ScatterEnd,ctx,x,y,0);
1774:   }
1775:   return(0);
1776: }

1778: /*@C
1779:    VecScatterDestroy - Destroys a scatter context created by
1780:    VecScatterCreate().

1782:    Collective on VecScatter

1784:    Input Parameter:
1785: .  ctx - the scatter context

1787:    Level: intermediate

1789: .seealso: VecScatterCreate(), VecScatterCopy()
1790: @*/
1791: PetscErrorCode  VecScatterDestroy(VecScatter *ctx)
1792: {

1796:   if (!*ctx) return(0);
1798:   if ((*ctx)->inuse) SETERRQ(((PetscObject)(*ctx))->comm,PETSC_ERR_ARG_WRONGSTATE,"Scatter context is in use");
1799:   if (--((PetscObject)(*ctx))->refct > 0) {*ctx = 0; return(0);}

1801:   /* if memory was published with SAWs then destroy it */
1802:   PetscObjectSAWsViewOff((PetscObject)(*ctx));
1803:   (*(*ctx)->ops->destroy)(*ctx);
1804: #if defined(PETSC_HAVE_CUSP)
1805:   VecScatterCUSPIndicesDestroy((PetscCUSPIndices*)&((*ctx)->spptr));
1806: #elif defined(PETSC_HAVE_VECCUDA)
1807:   VecScatterCUDAIndicesDestroy((PetscCUDAIndices*)&((*ctx)->spptr));
1808: #endif
1809:   PetscHeaderDestroy(ctx);
1810:   return(0);
1811: }

1813: /*@
1814:    VecScatterCopy - Makes a copy of a scatter context.

1816:    Collective on VecScatter

1818:    Input Parameter:
1819: .  sctx - the scatter context

1821:    Output Parameter:
1822: .  ctx - the context copy

1824:    Level: advanced

1826: .seealso: VecScatterCreate(), VecScatterDestroy()
1827: @*/
1828: PetscErrorCode  VecScatterCopy(VecScatter sctx,VecScatter *ctx)
1829: {

1835:   if (!sctx->ops->copy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot copy this type");
1836:   PetscHeaderCreate(*ctx,VEC_SCATTER_CLASSID,"VecScatter","VecScatter","Vec",PetscObjectComm((PetscObject)sctx),VecScatterDestroy,VecScatterView);
1837:   (*ctx)->to_n   = sctx->to_n;
1838:   (*ctx)->from_n = sctx->from_n;
1839:   (*sctx->ops->copy)(sctx,*ctx);
1840:   return(0);
1841: }


1844: /* ------------------------------------------------------------------*/
1845: /*@C
1846:    VecScatterView - Views a vector scatter context.

1848:    Collective on VecScatter

1850:    Input Parameters:
1851: +  ctx - the scatter context
1852: -  viewer - the viewer for displaying the context

1854:    Level: intermediate

1856: C@*/
1857: PetscErrorCode  VecScatterView(VecScatter ctx,PetscViewer viewer)
1858: {

1863:   if (!viewer) {
1864:     PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)ctx),&viewer);
1865:   }
1867:   if (ctx->ops->view) {
1868:     (*ctx->ops->view)(ctx,viewer);
1869:   }
1870:   return(0);
1871: }

1873: /*@C
1874:    VecScatterRemap - Remaps the "from" and "to" indices in a
1875:    vector scatter context. FOR EXPERTS ONLY!

1877:    Collective on VecScatter

1879:    Input Parameters:
1880: +  scat - vector scatter context
1881: .  from - remapping for "from" indices (may be NULL)
1882: -  to   - remapping for "to" indices (may be NULL)

1884:    Level: developer

1886:    Notes: In the parallel case the todata is actually the indices
1887:           from which the data is TAKEN! The from stuff is where the
1888:           data is finally put. This is VERY VERY confusing!

1890:           In the sequential case the todata is the indices where the
1891:           data is put and the fromdata is where it is taken from.
1892:           This is backwards from the paralllel case! CRY! CRY! CRY!

1894: @*/
1895: PetscErrorCode  VecScatterRemap(VecScatter scat,PetscInt *rto,PetscInt *rfrom)
1896: {
1897:   VecScatter_Seq_General *to,*from;
1898:   VecScatter_MPI_General *mto;
1899:   PetscInt               i;


1906:   from = (VecScatter_Seq_General*)scat->fromdata;
1907:   mto  = (VecScatter_MPI_General*)scat->todata;

1909:   if (mto->type == VEC_SCATTER_MPI_TOALL) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Not for to all scatter");

1911:   if (rto) {
1912:     if (mto->type == VEC_SCATTER_MPI_GENERAL) {
1913:       /* handle off processor parts */
1914:       for (i=0; i<mto->starts[mto->n]; i++) mto->indices[i] = rto[mto->indices[i]];

1916:       /* handle local part */
1917:       to = &mto->local;
1918:       for (i=0; i<to->n; i++) to->vslots[i] = rto[to->vslots[i]];
1919:     } else if (from->type == VEC_SCATTER_SEQ_GENERAL) {
1920:       for (i=0; i<from->n; i++) from->vslots[i] = rto[from->vslots[i]];
1921:     } else if (from->type == VEC_SCATTER_SEQ_STRIDE) {
1922:       VecScatter_Seq_Stride *sto = (VecScatter_Seq_Stride*)from;

1924:       /* if the remapping is the identity and stride is identity then skip remap */
1925:       if (sto->step == 1 && sto->first == 0) {
1926:         for (i=0; i<sto->n; i++) {
1927:           if (rto[i] != i) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Unable to remap such scatters");
1928:         }
1929:       } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Unable to remap such scatters");
1930:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Unable to remap such scatters");
1931:   }

1933:   if (rfrom) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unable to remap the FROM in scatters yet");

1935:   /*
1936:      Mark then vector lengths as unknown because we do not know the
1937:    lengths of the remapped vectors
1938:   */
1939:   scat->from_n = -1;
1940:   scat->to_n   = -1;
1941:   return(0);
1942: }

1944: /*
1945:  VecScatterGetTypes_Private - Returns the scatter types.

1947:  scatter - The scatter.
1948:  from    - Upon exit this contains the type of the from scatter.
1949:  to      - Upon exit this contains the type of the to scatter.
1950: */
1951: PetscErrorCode VecScatterGetTypes_Private(VecScatter scatter,VecScatterType *from,VecScatterType *to)
1952: {
1953:   VecScatter_Common* fromdata = (VecScatter_Common*)scatter->fromdata;
1954:   VecScatter_Common* todata   = (VecScatter_Common*)scatter->todata;

1957:   *from = fromdata->type;
1958:   *to = todata->type;
1959:   return(0);
1960: }


1963: /*
1964:   VecScatterIsSequential_Private - Returns true if the scatter is sequential.

1966:   scatter - The scatter.
1967:   flag    - Upon exit flag is true if the scatter is of type VecScatter_Seq_General 
1968:             or VecScatter_Seq_Stride; otherwise flag is false.
1969: */
1970: PetscErrorCode VecScatterIsSequential_Private(VecScatter_Common *scatter,PetscBool *flag)
1971: {
1972:   VecScatterType scatterType = scatter->type;

1975:   if (scatterType == VEC_SCATTER_SEQ_GENERAL || scatterType == VEC_SCATTER_SEQ_STRIDE) {
1976:     *flag = PETSC_TRUE;
1977:   } else {
1978:     *flag = PETSC_FALSE;
1979:   }
1980:   return(0);
1981: }

1983: #if defined(PETSC_HAVE_CUSP) || defined(PETSC_HAVE_VECCUDA)

1985: /*@C
1986:    VecScatterInitializeForGPU - Initializes a generalized scatter from one vector
1987:    to another for GPU based computation.

1989:    Input Parameters:
1990: +  inctx - scatter context generated by VecScatterCreate()
1991: .  x - the vector from which we scatter
1992: -  mode - the scattering mode, usually SCATTER_FORWARD.  The available modes are:
1993:     SCATTER_FORWARD or SCATTER_REVERSE

1995:   Level: intermediate

1997:   Notes:
1998:    Effectively, this function creates all the necessary indexing buffers and work
1999:    vectors needed to move data only those data points in a vector which need to
2000:    be communicated across ranks. This is done at the first time this function is
2001:    called. Currently, this only used in the context of the parallel SpMV call in
2002:    MatMult_MPIAIJCUSP or MatMult_MPIAIJCUSPARSE.

2004:    This function is executed before the call to MatMult. This enables the memory
2005:    transfers to be overlapped with the MatMult SpMV kernel call.

2007: .seealso: VecScatterFinalizeForGPU(), VecScatterCreate(), VecScatterEnd()
2008: @*/
2009: PETSC_EXTERN PetscErrorCode VecScatterInitializeForGPU(VecScatter inctx,Vec x,ScatterMode mode)
2010: {
2012:   VecScatter_MPI_General *to,*from;
2013:   PetscErrorCode         ierr;
2014:   PetscInt               i,*indices,*sstartsSends,*sstartsRecvs,nrecvs,nsends,bs;
2015:   PetscBool              isSeq1,isSeq2;

2018:   VecScatterIsSequential_Private((VecScatter_Common*)inctx->fromdata,&isSeq1);
2019:   VecScatterIsSequential_Private((VecScatter_Common*)inctx->todata,&isSeq2);
2020:   if (isSeq1 || isSeq2) {
2021:     return(0);
2022:   }
2023:   if (mode & SCATTER_REVERSE) {
2024:     to     = (VecScatter_MPI_General*)inctx->fromdata;
2025:     from   = (VecScatter_MPI_General*)inctx->todata;
2026:   } else {
2027:     to     = (VecScatter_MPI_General*)inctx->todata;
2028:     from   = (VecScatter_MPI_General*)inctx->fromdata;
2029:   }
2030:   bs           = to->bs;
2031:   nrecvs       = from->n;
2032:   nsends       = to->n;
2033:   indices      = to->indices;
2034:   sstartsSends = to->starts;
2035:   sstartsRecvs = from->starts;
2036: #if defined(PETSC_HAVE_CUSP)
2037:   if (x->valid_GPU_array != PETSC_CUSP_UNALLOCATED && (nsends>0 || nrecvs>0)) {
2038: #else
2039:   if (x->valid_GPU_array != PETSC_CUDA_UNALLOCATED && (nsends>0 || nrecvs>0)) {
2040: #endif
2041:     if (!inctx->spptr) {
2042:       PetscInt k,*tindicesSends,*sindicesSends,*tindicesRecvs,*sindicesRecvs;
2043:       PetscInt ns = sstartsSends[nsends],nr = sstartsRecvs[nrecvs];
2044:       /* Here we create indices for both the senders and receivers. */
2045:       PetscMalloc1(ns,&tindicesSends);
2046:       PetscMalloc1(nr,&tindicesRecvs);

2048:       PetscMemcpy(tindicesSends,indices,ns*sizeof(PetscInt));
2049:       PetscMemcpy(tindicesRecvs,from->indices,nr*sizeof(PetscInt));

2051:       PetscSortRemoveDupsInt(&ns,tindicesSends);
2052:       PetscSortRemoveDupsInt(&nr,tindicesRecvs);

2054:       PetscMalloc1(bs*ns,&sindicesSends);
2055:       PetscMalloc1(from->bs*nr,&sindicesRecvs);

2057:       /* sender indices */
2058:       for (i=0; i<ns; i++) {
2059:         for (k=0; k<bs; k++) sindicesSends[i*bs+k] = tindicesSends[i]+k;
2060:       }
2061:       PetscFree(tindicesSends);

2063:       /* receiver indices */
2064:       for (i=0; i<nr; i++) {
2065:         for (k=0; k<from->bs; k++) sindicesRecvs[i*from->bs+k] = tindicesRecvs[i]+k;
2066:       }
2067:       PetscFree(tindicesRecvs);

2069:       /* create GPU indices, work vectors, ... */
2070: #if defined(PETSC_HAVE_CUSP)
2071:       VecScatterCUSPIndicesCreate_PtoP(ns*bs,sindicesSends,nr*from->bs,sindicesRecvs,(PetscCUSPIndices*)&inctx->spptr);
2072: #else
2073:       VecScatterCUDAIndicesCreate_PtoP(ns*bs,sindicesSends,nr*from->bs,sindicesRecvs,(PetscCUDAIndices*)&inctx->spptr);
2074: #endif
2075:       PetscFree(sindicesSends);
2076:       PetscFree(sindicesRecvs);
2077:     }
2078:   }
2079:   return(0);
2080: }

2082: /*@C
2083:    VecScatterFinalizeForGPU - Finalizes a generalized scatter from one vector to
2084:    another for GPU based computation.

2086:    Input Parameter:
2087: +  inctx - scatter context generated by VecScatterCreate()

2089:   Level: intermediate

2091:   Notes:
2092:    Effectively, this function resets the temporary buffer flags. Currently, this
2093:    only used in the context of the parallel SpMV call in in MatMult_MPIAIJCUDA
2094:    or MatMult_MPIAIJCUDAARSE. Once the MatMultAdd is finished, the GPU temporary
2095:    buffers used for messaging are no longer valid.

2097: .seealso: VecScatterInitializeForGPU(), VecScatterCreate(), VecScatterEnd()
2098: @*/
2099: PETSC_EXTERN PetscErrorCode VecScatterFinalizeForGPU(VecScatter inctx)
2100: {
2102:   return(0);
2103: }

2105: #endif