Actual source code: vscat.c

petsc-3.14.6 2021-03-30
Report Typos and Errors

  2: /*
  3:      Higher level code for creating scatters between vectors called by some of the implementations.

  5:      The routines check for special cases and then call the implementation function for the general cases
  6: */

  8: #include <petsc/private/vecscatterimpl.h>

 10: #if defined(PETSC_HAVE_CUDA)
 11: #include <petsc/private/cudavecimpl.h>
 12: #endif

 14: /*
 15:       This is special scatter code for when the entire parallel vector is copied to each processor.

 17:    This code was written by Cameron Cooper, Occidental College, Fall 1995,
 18:    will working at ANL as a SERS student.
 19: */
 20: static PetscErrorCode VecScatterBegin_MPI_ToAll(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
 21: {
 23:   PetscInt       yy_n,xx_n;
 24:   PetscScalar    *xv,*yv;

 27:   VecGetLocalSize(y,&yy_n);
 28:   VecGetLocalSize(x,&xx_n);
 29:   VecGetArrayPair(x,y,&xv,&yv);

 31:   if (mode & SCATTER_REVERSE) {
 32:     PetscScalar          *xvt,*xvt2;
 33:     VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
 34:     PetscInt             i;
 35:     PetscMPIInt          *disply = scat->displx;

 37:     if (addv == INSERT_VALUES) {
 38:       PetscInt rstart,rend;
 39:       /*
 40:          copy the correct part of the local vector into the local storage of
 41:          the MPI one.  Note: this operation only makes sense if all the local
 42:          vectors have the same values
 43:       */
 44:       VecGetOwnershipRange(y,&rstart,&rend);
 45:       PetscArraycpy(yv,xv+rstart,yy_n);
 46:     } else {
 47:       MPI_Comm    comm;
 48:       PetscMPIInt rank;
 49:       PetscObjectGetComm((PetscObject)y,&comm);
 50:       MPI_Comm_rank(comm,&rank);
 51:       if (scat->work1) xvt = scat->work1;
 52:       else {
 53:         PetscMalloc1(xx_n,&xvt);
 54:         scat->work1 = xvt;
 55:       }
 56:       if (!rank) { /* I am the zeroth processor, values are accumulated here */
 57:         if   (scat->work2) xvt2 = scat->work2;
 58:         else {
 59:           PetscMalloc1(xx_n,&xvt2);
 60:           scat->work2 = xvt2;
 61:         }
 62:         MPI_Gatherv(yv,yy_n,MPIU_SCALAR,xvt2,scat->count,disply,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
 63:         MPI_Reduce(xv,xvt,xx_n,MPIU_SCALAR,MPIU_SUM,0,PetscObjectComm((PetscObject)ctx));
 64:         if (addv == ADD_VALUES) {
 65:           for (i=0; i<xx_n; i++) xvt[i] += xvt2[i];
 66: #if !defined(PETSC_USE_COMPLEX)
 67:         } else if (addv == MAX_VALUES) {
 68:           for (i=0; i<xx_n; i++) xvt[i] = PetscMax(xvt[i],xvt2[i]);
 69: #endif
 70:         } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
 71:         MPI_Scatterv(xvt,scat->count,disply,MPIU_SCALAR,yv,yy_n,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
 72:       } else {
 73:         MPI_Gatherv(yv,yy_n,MPIU_SCALAR,NULL,NULL,NULL,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
 74:         MPI_Reduce(xv,xvt,xx_n,MPIU_SCALAR,MPIU_SUM,0,PetscObjectComm((PetscObject)ctx));
 75:         MPI_Scatterv(NULL,scat->count,disply,MPIU_SCALAR,yv,yy_n,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
 76:       }
 77:     }
 78:   } else {
 79:     PetscScalar          *yvt;
 80:     VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
 81:     PetscInt             i;
 82:     PetscMPIInt          *displx = scat->displx;

 84:     if (addv == INSERT_VALUES) {
 85:       MPI_Allgatherv(xv,xx_n,MPIU_SCALAR,yv,scat->count,displx,MPIU_SCALAR,PetscObjectComm((PetscObject)ctx));
 86:     } else {
 87:       if (scat->work1) yvt = scat->work1;
 88:       else {
 89:         PetscMalloc1(yy_n,&yvt);
 90:         scat->work1 = yvt;
 91:       }
 92:       MPI_Allgatherv(xv,xx_n,MPIU_SCALAR,yvt,scat->count,displx,MPIU_SCALAR,PetscObjectComm((PetscObject)ctx));
 93:       if (addv == ADD_VALUES) {
 94:         for (i=0; i<yy_n; i++) yv[i] += yvt[i];
 95: #if !defined(PETSC_USE_COMPLEX)
 96:       } else if (addv == MAX_VALUES) {
 97:         for (i=0; i<yy_n; i++) yv[i] = PetscMax(yv[i],yvt[i]);
 98: #endif
 99:       } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
100:     }
101:   }
102:   VecRestoreArrayPair(x,y,&xv,&yv);
103:   return(0);
104: }

106: PetscErrorCode VecScatterView_MPI_ToAll(VecScatter in,PetscViewer viewer)
107: {
109:   PetscBool      isascii;

112:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
113:   if (isascii) {
114:     PetscViewerASCIIPrintf(viewer,"Entire parallel vector is copied to each process\n");
115:   }
116:   return(0);
117: }

119: /*
120:       This is special scatter code for when the entire parallel vector is  copied to processor 0.

122: */
123: static PetscErrorCode VecScatterBegin_MPI_ToOne(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
124: {
125:   PetscErrorCode    ierr;
126:   PetscMPIInt       rank;
127:   PetscInt          yy_n,xx_n;
128:   PetscScalar       *yv;
129:   const PetscScalar *xv;
130:   MPI_Comm          comm;

133:   VecGetLocalSize(y,&yy_n);
134:   VecGetLocalSize(x,&xx_n);
135:   VecGetArrayRead(x,&xv);
136:   VecGetArray(y,&yv);

138:   PetscObjectGetComm((PetscObject)x,&comm);
139:   MPI_Comm_rank(comm,&rank);

141:   /* --------  Reverse scatter; spread from processor 0 to other processors */
142:   if (mode & SCATTER_REVERSE) {
143:     PetscScalar          *yvt;
144:     VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
145:     PetscInt             i;
146:     PetscMPIInt          *disply = scat->displx;

148:     if (addv == INSERT_VALUES) {
149:       MPI_Scatterv((PetscScalar*)xv,scat->count,disply,MPIU_SCALAR,yv,yy_n,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
150:     } else {
151:       if (scat->work2) yvt = scat->work2;
152:       else {
153:         PetscInt xx_nt;
154:         MPI_Allreduce(&xx_n,&xx_nt,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)y));
155:         PetscMalloc1(xx_nt,&yvt);
156:         scat->work2 = yvt;
157:       }
158:       MPI_Scatterv((PetscScalar*)xv,scat->count,disply,MPIU_SCALAR,yvt,yy_n,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
159:       if (addv == ADD_VALUES) {
160:         for (i=0; i<yy_n; i++) yv[i] += yvt[i];
161: #if !defined(PETSC_USE_COMPLEX)
162:       } else if (addv == MAX_VALUES) {
163:         for (i=0; i<yy_n; i++) yv[i] = PetscMax(yv[i],yvt[i]);
164: #endif
165:       } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
166:     }
167:   /* ---------  Forward scatter; gather all values onto processor 0 */
168:   } else {
169:     PetscScalar          *yvt  = NULL;
170:     VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
171:     PetscInt             i;
172:     PetscMPIInt          *displx = scat->displx;

174:     if (addv == INSERT_VALUES) {
175:       MPI_Gatherv((PetscScalar*)xv,xx_n,MPIU_SCALAR,yv,scat->count,displx,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
176:     } else {
177:       if (!rank) {
178:         if (scat->work1) yvt = scat->work1;
179:         else {
180:           PetscMalloc1(yy_n,&yvt);
181:           scat->work1 = yvt;
182:         }
183:       }
184:       MPI_Gatherv((PetscScalar*)xv,xx_n,MPIU_SCALAR,yvt,scat->count,displx,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
185:       if (!rank) {
186:         if (addv == ADD_VALUES) {
187:           for (i=0; i<yy_n; i++) yv[i] += yvt[i];
188: #if !defined(PETSC_USE_COMPLEX)
189:         } else if (addv == MAX_VALUES) {
190:           for (i=0; i<yy_n; i++) yv[i] = PetscMax(yv[i],yvt[i]);
191: #endif
192:         }  else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
193:       }
194:     }
195:   }
196:   VecRestoreArrayRead(x,&xv);
197:   VecRestoreArray(y,&yv);
198:   return(0);
199: }

201: /*
202:        The follow to are used for both VecScatterBegin_MPI_ToAll() and VecScatterBegin_MPI_ToOne()
203: */
204: static PetscErrorCode VecScatterDestroy_MPI_ToAll(VecScatter ctx)
205: {
206:   VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
207:   PetscErrorCode       ierr;

210:   PetscFree(scat->work1);
211:   PetscFree(scat->work2);
212:   PetscFree3(ctx->todata,scat->count,scat->displx);
213:   return(0);
214: }
215: /* -------------------------------------------------------------------------*/
216: static PetscErrorCode VecScatterCopy_MPI_ToAll(VecScatter in,VecScatter out)
217: {
218:   VecScatter_MPI_ToAll *in_to = (VecScatter_MPI_ToAll*)in->todata,*sto;
219:   PetscErrorCode       ierr;
220:   PetscMPIInt          size,*count,*displx;
221:   PetscInt             i;

224:   out->ops->begin   = in->ops->begin;
225:   out->ops->end     = in->ops->end;
226:   out->ops->copy    = in->ops->copy;
227:   out->ops->destroy = in->ops->destroy;
228:   out->ops->view    = in->ops->view;

230:   MPI_Comm_size(PetscObjectComm((PetscObject)out),&size);
231:   PetscMalloc3(1,&sto,size,&count,size,&displx);
232:   sto->format = in_to->format;
233:   sto->count  = count;
234:   sto->displx = displx;
235:   for (i=0; i<size; i++) {
236:     sto->count[i]  = in_to->count[i];
237:     sto->displx[i] = in_to->displx[i];
238:   }
239:   sto->work1    = NULL;
240:   sto->work2    = NULL;
241:   out->todata   = (void*)sto;
242:   out->fromdata = (void*)0;
243:   return(0);
244: }

246: /*
247:   Create memcpy optimization plans based on indices of vector entries we want to scatter

249:    Input Parameters:
250:   +  n       - number of target processors
251:   .  starts  - [n+1] for the i-th processor, its associated indices are indices[starts[i], starts[i+1])
252:   .  indices - [] array storing indices. Its length is starts[n+1]
253:   -  bs      - block size

255:    Output Parameters:
256:   +  plan    - the memcpy plan
257: */
258: PetscErrorCode VecScatterMemcpyPlanCreate_Index(PetscInt n,const PetscInt *starts,const PetscInt *indices,PetscInt bs,VecScatterMemcpyPlan *plan)
259: {
261:   PetscInt       i,j,k,my_copies,n_copies=0,step;
262:   PetscBool      strided,has_strided;

265:   PetscMemzero(plan,sizeof(VecScatterMemcpyPlan));
266:   plan->n = n;
267:   PetscMalloc2(n,&plan->optimized,n+1,&plan->copy_offsets);

269:   /* check if each remote part of the scatter is made of copies, and count total_copies */
270:   for (i=0; i<n; i++) { /* for each target processor procs[i] */
271:     my_copies = 1; /* count num. of copies for procs[i] */
272:     for (j=starts[i]; j<starts[i+1]-1; j++) { /* go through indices from the first to the second to last */
273:       if (indices[j]+bs != indices[j+1]) my_copies++;
274:     }
275:     if (bs*(starts[i+1]-starts[i])*sizeof(PetscScalar)/my_copies >= 256) { /* worth using memcpy? */
276:       plan->optimized[i] = PETSC_TRUE;
277:       n_copies += my_copies;
278:     } else {
279:       plan->optimized[i] = PETSC_FALSE;
280:     }
281:   }

283:   /* do malloc with the recently known n_copies */
284:   PetscMalloc2(n_copies,&plan->copy_starts,n_copies,&plan->copy_lengths);

286:   /* analyze the total_copies one by one */
287:   k                     = 0; /* k-th copy */
288:   plan->copy_offsets[0] = 0;
289:   for (i=0; i<n; i++) { /* for each target processor procs[i] */
290:     if (plan->optimized[i]) {
291:       my_copies            = 1;
292:       plan->copy_starts[k] = indices[starts[i]];
293:       for (j=starts[i]; j<starts[i+1]-1; j++) {
294:         if (indices[j]+bs != indices[j+1]) { /* meet end of a copy (and next copy must exist) */
295:           my_copies++;
296:           plan->copy_starts[k+1] = indices[j+1];
297:           plan->copy_lengths[k]  = indices[j]+bs-plan->copy_starts[k];
298:           k++;
299:         }
300:       }
301:       /* set copy length of the last copy for this remote proc */
302:       plan->copy_lengths[k] = indices[j]+bs-plan->copy_starts[k];
303:       k++;
304:     }

306:     /* set offset for next proc. When optimized[i] is false, copy_offsets[i] = copy_offsets[i+1] */
307:     plan->copy_offsets[i+1] = k;
308:   }

310:   /* try the last chance to optimize. If a scatter is not memory copies, then is it strided? */
311:   has_strided = PETSC_FALSE;
312:   PetscMalloc3(n,&plan->stride_first,n,&plan->stride_step,n,&plan->stride_n);
313:   for (i=0; i<n; i++) { /* for each target processor procs[i] */
314:     if (!plan->optimized[i] && starts[i+1] - starts[i] >= 16) { /* few indices (<16) are not worth striding */
315:       strided = PETSC_TRUE;
316:       step    = indices[starts[i]+1] - indices[starts[i]];
317:       for (j=starts[i]; j<starts[i+1]-1; j++) {
318:         if (indices[j]+step != indices[j+1]) { strided = PETSC_FALSE; break; }
319:       }
320:       if (strided) {
321:         plan->optimized[i]    = PETSC_TRUE;
322:         plan->stride_first[i] = indices[starts[i]];
323:         plan->stride_step[i]  = step;
324:         plan->stride_n[i]     = starts[i+1] - starts[i];
325:         has_strided           = PETSC_TRUE;
326:       }
327:     }
328:   }
329:   /* if none is strided, free the arrays to save memory here and also in plan copying */
330:   if (!has_strided) { PetscFree3(plan->stride_first,plan->stride_step,plan->stride_n); }
331:   return(0);
332: }

334: /* Copy the memcpy plan from in to out */
335: PetscErrorCode VecScatterMemcpyPlanCopy(const VecScatterMemcpyPlan *in,VecScatterMemcpyPlan *out)
336: {

340:   PetscMemzero(out,sizeof(VecScatterMemcpyPlan));
341:   out->n = in->n;
342:   PetscMalloc2(in->n,&out->optimized,in->n+1,&out->copy_offsets);
343:   PetscMalloc2(in->copy_offsets[in->n],&out->copy_starts,in->copy_offsets[in->n],&out->copy_lengths);
344:   PetscArraycpy(out->optimized,in->optimized,in->n);
345:   PetscArraycpy(out->copy_offsets,in->copy_offsets,in->n+1);
346:   PetscArraycpy(out->copy_starts,in->copy_starts,in->copy_offsets[in->n]);
347:   PetscArraycpy(out->copy_lengths,in->copy_lengths,in->copy_offsets[in->n]);
348:   if (in->stride_first) {
349:     PetscMalloc3(in->n,&out->stride_first,in->n,&out->stride_step,in->n,&out->stride_n);
350:     PetscArraycpy(out->stride_first,in->stride_first,in->n);
351:     PetscArraycpy(out->stride_step,in->stride_step,in->n);
352:     PetscArraycpy(out->stride_n,in->stride_n,in->n);
353:   }
354:   return(0);
355: }

357: /* Destroy the vecscatter memcpy plan */
358: PetscErrorCode VecScatterMemcpyPlanDestroy(VecScatterMemcpyPlan *plan)
359: {

363:   PetscFree2(plan->optimized,plan->copy_offsets);
364:   PetscFree2(plan->copy_starts,plan->copy_lengths);
365:   PetscFree3(plan->stride_first,plan->stride_step,plan->stride_n);
366:   return(0);
367: }

369: /* =======================================================================*/
370: /*
371:    Blocksizes we have optimized scatters for
372: */

374: #define VecScatterOptimizedBS(mbs) (2 <= mbs)


377: static PetscErrorCode VecScatterCreate_PtoS(VecScatter ctx,PetscErrorCode (*vecscattercreatelocal_ptos)(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter))
378: {
379:   PetscErrorCode    ierr;
380:   PetscMPIInt       size;
381:   PetscInt          ix_type=-1,iy_type=-1,*range;
382:   MPI_Comm          comm;
383:   IS                tix=NULL,tiy=NULL,ix=ctx->from_is,iy=ctx->to_is;
384:   Vec               xin=ctx->from_v,yin=ctx->to_v;
385:   PetscBool         totalv,ixblock,iyblock,iystride,islocal,cando;

388:   PetscObjectGetComm((PetscObject)ctx,&comm);
389:   GetInputISType_private(ctx,VEC_MPI_ID,VEC_SEQ_ID,&ix_type,&tix,&iy_type,&tiy);
390:   if (tix) ix = tix;
391:   if (tiy) iy = tiy;

393:   islocal = PETSC_FALSE;
394:   /* special case extracting (subset of) local portion */
395:   if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
396:     /* Case (2-a) */
397:     PetscInt              nx,ny,to_first,to_step,from_first,from_step;
398:     PetscInt              start,end,min,max;
399:     VecScatter_Seq_Stride *from12 = NULL,*to12 = NULL;

401:     VecGetOwnershipRange(xin,&start,&end);
402:     ISGetLocalSize(ix,&nx);
403:     ISStrideGetInfo(ix,&from_first,&from_step);
404:     ISGetLocalSize(iy,&ny);
405:     ISStrideGetInfo(iy,&to_first,&to_step);
406:     if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
407:     ISGetMinMax(ix,&min,&max);
408:     if (min >= start && max < end) islocal = PETSC_TRUE;
409:     else islocal = PETSC_FALSE;
410:     /* cannot use MPIU_Allreduce() since this call matches with the MPI_Allreduce() in the else statement below */
411:     MPI_Allreduce(&islocal,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
412:     if (cando) {
413:       PetscMalloc2(1,&to12,1,&from12);
414:       to12->n            = nx;
415:       to12->first        = to_first;
416:       to12->step         = to_step;
417:       from12->n          = nx;
418:       from12->first      = from_first-start;
419:       from12->step       = from_step;
420:       to12->format       = VEC_SCATTER_SEQ_STRIDE;
421:       from12->format     = VEC_SCATTER_SEQ_STRIDE;
422:       ctx->todata        = (void*)to12;
423:       ctx->fromdata      = (void*)from12;
424:       ctx->ops->begin    = VecScatterBegin_SSToSS;
425:       ctx->ops->end      = NULL;
426:       ctx->ops->destroy  = VecScatterDestroy_SSToSS;
427:       ctx->ops->copy     = VecScatterCopy_SSToSS;
428:       ctx->ops->view     = VecScatterView_SSToSS;
429:       PetscInfo(xin,"Special case: processors only getting local values\n");
430:       goto functionend;
431:     }
432:   } else {
433:     MPI_Allreduce(&islocal,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
434:   }

436:   /* test for special case of all processors getting entire vector */
437:   /* contains check that PetscMPIInt can handle the sizes needed */
438:   totalv = PETSC_FALSE;
439:   if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
440:     /* Case (2-b) */
441:     PetscInt             i,nx,ny,to_first,to_step,from_first,from_step,N;
442:     PetscMPIInt          *count = NULL,*displx;
443:     VecScatter_MPI_ToAll *sto   = NULL;

445:     ISGetLocalSize(ix,&nx);
446:     ISStrideGetInfo(ix,&from_first,&from_step);
447:     ISGetLocalSize(iy,&ny);
448:     ISStrideGetInfo(iy,&to_first,&to_step);
449:     if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
450:     VecGetSize(xin,&N);
451:     if (nx != N) totalv = PETSC_FALSE;
452:     else if (from_first == 0 && from_step == 1 && from_first == to_first && from_step == to_step) totalv = PETSC_TRUE;
453:     else totalv = PETSC_FALSE;
454:     /* cannot use MPIU_Allreduce() since this call matches with the MPI_Allreduce() in the else statement below */
455:     MPI_Allreduce(&totalv,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));

457: #if defined(PETSC_USE_64BIT_INDICES)
458:     if (cando && (yin->map->N < PETSC_MPI_INT_MAX)) {
459: #else
460:     if (cando) {
461: #endif
462:       MPI_Comm_size(PetscObjectComm((PetscObject)ctx),&size);
463:       PetscMalloc3(1,&sto,size,&count,size,&displx);
464:       range = xin->map->range;
465:       for (i=0; i<size; i++) {
466:         PetscMPIIntCast(range[i+1] - range[i],count+i);
467:         PetscMPIIntCast(range[i],displx+i);
468:       }
469:       sto->count        = count;
470:       sto->displx       = displx;
471:       sto->work1        = NULL;
472:       sto->work2        = NULL;
473:       sto->format         = VEC_SCATTER_MPI_TOALL;
474:       ctx->todata       = (void*)sto;
475:       ctx->fromdata     = NULL;
476:       ctx->ops->begin   = VecScatterBegin_MPI_ToAll;
477:       ctx->ops->end     = NULL;
478:       ctx->ops->destroy = VecScatterDestroy_MPI_ToAll;
479:       ctx->ops->copy    = VecScatterCopy_MPI_ToAll;
480:       ctx->ops->view    = VecScatterView_MPI_ToAll;
481:       PetscInfo(xin,"Special case: all processors get entire parallel vector\n");
482:       goto functionend;
483:     }
484:   } else {
485:     MPI_Allreduce(&totalv,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
486:   }

488:   /* test for special case of processor 0 getting entire vector */
489:   /* contains check that PetscMPIInt can handle the sizes needed */
490:   totalv = PETSC_FALSE;
491:   if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
492:     /* Case (2-c) */
493:     PetscInt             i,nx,ny,to_first,to_step,from_first,from_step,N;
494:     PetscMPIInt          rank,*count = NULL,*displx;
495:     VecScatter_MPI_ToAll *sto = NULL;

497:     PetscObjectGetComm((PetscObject)xin,&comm);
498:     MPI_Comm_rank(comm,&rank);
499:     ISGetLocalSize(ix,&nx);
500:     ISStrideGetInfo(ix,&from_first,&from_step);
501:     ISGetLocalSize(iy,&ny);
502:     ISStrideGetInfo(iy,&to_first,&to_step);
503:     if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
504:     if (!rank) {
505:       VecGetSize(xin,&N);
506:       if (nx != N) totalv = PETSC_FALSE;
507:       else if (from_first == 0        && from_step == 1 &&
508:                from_first == to_first && from_step == to_step) totalv = PETSC_TRUE;
509:       else totalv = PETSC_FALSE;
510:     } else {
511:       if (!nx) totalv = PETSC_TRUE;
512:       else     totalv = PETSC_FALSE;
513:     }
514:     /* cannot use MPIU_Allreduce() since this call matches with the MPI_Allreduce() in the else statement below */
515:     MPI_Allreduce(&totalv,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));

517: #if defined(PETSC_USE_64BIT_INDICES)
518:     if (cando && (yin->map->N < PETSC_MPI_INT_MAX)) {
519: #else
520:     if (cando) {
521: #endif
522:       MPI_Comm_size(PetscObjectComm((PetscObject)ctx),&size);
523:       PetscMalloc3(1,&sto,size,&count,size,&displx);
524:       range = xin->map->range;
525:       for (i=0; i<size; i++) {
526:         PetscMPIIntCast(range[i+1] - range[i],count+i);
527:         PetscMPIIntCast(range[i],displx+i);
528:       }
529:       sto->count        = count;
530:       sto->displx       = displx;
531:       sto->work1        = NULL;
532:       sto->work2        = NULL;
533:       sto->format       = VEC_SCATTER_MPI_TOONE;
534:       ctx->todata       = (void*)sto;
535:       ctx->fromdata     = NULL;
536:       ctx->ops->begin   = VecScatterBegin_MPI_ToOne;
537:       ctx->ops->end     = NULL;
538:       ctx->ops->destroy = VecScatterDestroy_MPI_ToAll;
539:       ctx->ops->copy    = VecScatterCopy_MPI_ToAll;
540:       ctx->ops->view    = VecScatterView_MPI_ToAll;
541:       PetscInfo(xin,"Special case: processor zero gets entire parallel vector, rest get none\n");
542:       goto functionend;
543:     }
544:   } else {
545:     MPI_Allreduce(&totalv,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
546:   }

548:   /* Case 2-d */
549:   PetscObjectTypeCompare((PetscObject)ix,ISBLOCK,&ixblock);
550:   PetscObjectTypeCompare((PetscObject)iy,ISBLOCK,&iyblock);
551:   PetscObjectTypeCompare((PetscObject)iy,ISSTRIDE,&iystride);
552:   if (ixblock) {
553:     /* special case block to block */
554:     if (iyblock) {
555:       PetscInt       nx,ny,bsx,bsy,min,max;
556:       const PetscInt *idx,*idy;
557:       ISGetBlockSize(iy,&bsy);
558:       ISGetBlockSize(ix,&bsx);
559:       min  = PetscMin(bsx,bsy);
560:       max  = PetscMax(bsx,bsy);
561:       MPIU_Allreduce(MPI_IN_PLACE,&min,1,MPIU_INT,MPI_MIN,comm);
562:       MPIU_Allreduce(MPI_IN_PLACE,&max,1,MPIU_INT,MPI_MAX,comm);
563:       if (min == max && VecScatterOptimizedBS(bsx)) {
564:         ISBlockGetLocalSize(ix,&nx);
565:         ISBlockGetIndices(ix,&idx);
566:         ISBlockGetLocalSize(iy,&ny);
567:         ISBlockGetIndices(iy,&idy);
568:         if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
569:         (*vecscattercreatelocal_ptos)(nx,idx,ny,idy,xin,yin,bsx,ctx);
570:         ISBlockRestoreIndices(ix,&idx);
571:         ISBlockRestoreIndices(iy,&idy);
572:         PetscInfo(xin,"Special case: blocked indices\n");
573:         goto functionend;
574:       }
575:       /* special case block to stride */
576:     } else if (iystride) {
577:       /* Case (2-e) */
578:       PetscInt ystart,ystride,ysize,bsx;
579:       ISStrideGetInfo(iy,&ystart,&ystride);
580:       ISGetLocalSize(iy,&ysize);
581:       ISGetBlockSize(ix,&bsx);
582:       /* see if stride index set is equivalent to block index set */
583:       if (VecScatterOptimizedBS(bsx) && ((ystart % bsx) == 0) && (ystride == 1) && ((ysize % bsx) == 0)) {
584:         PetscInt       nx,il,*idy;
585:         const PetscInt *idx;
586:         ISBlockGetLocalSize(ix,&nx);
587:         ISBlockGetIndices(ix,&idx);
588:         if (ysize != bsx*nx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
589:         PetscMalloc1(nx,&idy);
590:         if (nx) {
591:           idy[0] = ystart/bsx;
592:           for (il=1; il<nx; il++) idy[il] = idy[il-1] + 1;
593:         }
594:         (*vecscattercreatelocal_ptos)(nx,idx,nx,idy,xin,yin,bsx,ctx);
595:         PetscFree(idy);
596:         ISBlockRestoreIndices(ix,&idx);
597:         PetscInfo(xin,"Special case: blocked indices to stride\n");
598:         goto functionend;
599:       }
600:     }
601:   }

603:   /* left over general case (2-f) */
604:   {
605:     PetscInt       nx,ny;
606:     const PetscInt *idx,*idy;
607:     ISGetLocalSize(ix,&nx);
608:     ISGetIndices(ix,&idx);
609:     ISGetLocalSize(iy,&ny);
610:     ISGetIndices(iy,&idy);
611:     if (nx != ny) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match (%D %D)",nx,ny);
612:     (*vecscattercreatelocal_ptos)(nx,idx,ny,idy,xin,yin,1,ctx);
613:     ISRestoreIndices(ix,&idx);
614:     ISRestoreIndices(iy,&idy);
615:     PetscInfo(xin,"General case: MPI to Seq\n");
616:     goto functionend;
617:   }
618:   functionend:
619:   ISDestroy(&tix);
620:   ISDestroy(&tiy);
621:   VecScatterViewFromOptions(ctx,NULL,"-vecscatter_view");
622:   return(0);
623: }

625:   static PetscErrorCode VecScatterCreate_StoP(VecScatter ctx,PetscErrorCode (*vecscattercreatelocal_stop)(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter))
626: {
627:   PetscErrorCode    ierr;
628:   PetscInt          ix_type=-1,iy_type=-1;
629:   IS                tix=NULL,tiy=NULL,ix=ctx->from_is,iy=ctx->to_is;
630:   Vec               xin=ctx->from_v,yin=ctx->to_v;
631:   PetscBool         islocal,cando;

634:   GetInputISType_private(ctx,VEC_MPI_ID,VEC_SEQ_ID,&ix_type,&tix,&iy_type,&tiy);
635:   if (tix) ix = tix;
636:   if (tiy) iy = tiy;

638:   /* special case local copy portion */
639:   islocal = PETSC_FALSE;
640:   if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
641:     PetscInt              nx,ny,to_first,to_step,from_step,start,end,from_first,min,max;
642:     VecScatter_Seq_Stride *from = NULL,*to = NULL;

644:     VecGetOwnershipRange(yin,&start,&end);
645:     ISGetLocalSize(ix,&nx);
646:     ISStrideGetInfo(ix,&from_first,&from_step);
647:     ISGetLocalSize(iy,&ny);
648:     ISStrideGetInfo(iy,&to_first,&to_step);
649:     if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
650:     ISGetMinMax(iy,&min,&max);
651:     if (min >= start && max < end) islocal = PETSC_TRUE;
652:     else islocal = PETSC_FALSE;
653:     /* cannot use MPIU_Allreduce() since this call matches with the MPI_Allreduce() in the else statement below */
654:     MPI_Allreduce(&islocal,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)yin));
655:     if (cando) {
656:       PetscMalloc2(1,&to,1,&from);
657:       to->n             = nx;
658:       to->first         = to_first-start;
659:       to->step          = to_step;
660:       from->n           = nx;
661:       from->first       = from_first;
662:       from->step        = from_step;
663:       to->format          = VEC_SCATTER_SEQ_STRIDE;
664:       from->format        = VEC_SCATTER_SEQ_STRIDE;
665:       ctx->todata       = (void*)to;
666:       ctx->fromdata     = (void*)from;
667:       ctx->ops->begin   = VecScatterBegin_SSToSS;
668:       ctx->ops->end     = NULL;
669:       ctx->ops->destroy = VecScatterDestroy_SSToSS;
670:       ctx->ops->copy    = VecScatterCopy_SSToSS;
671:       ctx->ops->view    = VecScatterView_SSToSS;
672:       PetscInfo(xin,"Special case: sequential stride to MPI stride\n");
673:       goto functionend;
674:     }
675:   } else {
676:     MPI_Allreduce(&islocal,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)yin));
677:   }
678:   /* special case block to stride */
679:   if (ix_type == IS_BLOCK_ID && iy_type == IS_STRIDE_ID) {
680:     PetscInt ystart,ystride,ysize,bsx;
681:     ISStrideGetInfo(iy,&ystart,&ystride);
682:     ISGetLocalSize(iy,&ysize);
683:     ISGetBlockSize(ix,&bsx);
684:     /* see if stride index set is equivalent to block index set */
685:     if (VecScatterOptimizedBS(bsx) && ((ystart % bsx) == 0) && (ystride == 1) && ((ysize % bsx) == 0)) {
686:       PetscInt       nx,il,*idy;
687:       const PetscInt *idx;
688:       ISBlockGetLocalSize(ix,&nx);
689:       ISBlockGetIndices(ix,&idx);
690:       if (ysize != bsx*nx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
691:       PetscMalloc1(nx,&idy);
692:       if (nx) {
693:         idy[0] = ystart/bsx;
694:         for (il=1; il<nx; il++) idy[il] = idy[il-1] + 1;
695:       }
696:       (*vecscattercreatelocal_stop)(nx,idx,nx,idy,xin,yin,bsx,ctx);
697:       PetscFree(idy);
698:       ISBlockRestoreIndices(ix,&idx);
699:       PetscInfo(xin,"Special case: Blocked indices to stride\n");
700:       goto functionend;
701:     }
702:   }

704:   /* general case */
705:   {
706:     PetscInt       nx,ny;
707:     const PetscInt *idx,*idy;
708:     ISGetLocalSize(ix,&nx);
709:     ISGetIndices(ix,&idx);
710:     ISGetLocalSize(iy,&ny);
711:     ISGetIndices(iy,&idy);
712:     if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
713:     (*vecscattercreatelocal_stop)(nx,idx,ny,idy,xin,yin,1,ctx);
714:     ISRestoreIndices(ix,&idx);
715:     ISRestoreIndices(iy,&idy);
716:     PetscInfo(xin,"General case: Seq to MPI\n");
717:     goto functionend;
718:   }
719:   functionend:
720:   ISDestroy(&tix);
721:   ISDestroy(&tiy);
722:   VecScatterViewFromOptions(ctx,NULL,"-vecscatter_view");
723:   return(0);
724: }

726: static PetscErrorCode VecScatterCreate_PtoP(VecScatter ctx,PetscErrorCode (*vecscattercreatelocal_ptop)(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter))
727: {
728:   PetscErrorCode    ierr;
729:   PetscInt          ix_type=-1,iy_type=-1;
730:   IS                tix=NULL,tiy=NULL,ix=ctx->from_is,iy=ctx->to_is;
731:   Vec               xin=ctx->from_v,yin=ctx->to_v;
732:   PetscInt          nx,ny;
733:   const PetscInt    *idx,*idy;

736:   GetInputISType_private(ctx,VEC_MPI_ID,VEC_MPI_ID,&ix_type,&tix,&iy_type,&tiy);
737:   if (tix) ix = tix;
738:   if (tiy) iy = tiy;

740:   /* no special cases for now */
741:   ISGetLocalSize(ix,&nx);
742:   ISGetIndices(ix,&idx);
743:   ISGetLocalSize(iy,&ny);
744:   ISGetIndices(iy,&idy);
745:   if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
746:   (*vecscattercreatelocal_ptop)(nx,idx,ny,idy,xin,yin,1,ctx);
747:   ISRestoreIndices(ix,&idx);
748:   ISRestoreIndices(iy,&idy);
749:   PetscInfo(xin,"General case: MPI to MPI\n");

751:   ISDestroy(&tix);
752:   ISDestroy(&tiy);
753:   VecScatterViewFromOptions(ctx,NULL,"-vecscatter_view");
754:   return(0);
755: }

757: static PetscErrorCode VecScatterGetInputVecType_private(VecScatter ctx,PetscInt *xin_type1,PetscInt *yin_type1)
758: {
759:   PetscErrorCode    ierr;
760:   MPI_Comm          comm,ycomm;
761:   PetscMPIInt       size;
762:   Vec               xin = ctx->from_v,yin = ctx->to_v;
763:   PetscInt          xin_type,yin_type;

766:   /*
767:       Determine if the vectors are "parallel", ie. it shares a comm with other processors, or
768:       sequential (it does not share a comm). The difference is that parallel vectors treat the
769:       index set as providing indices in the global parallel numbering of the vector, with
770:       sequential vectors treat the index set as providing indices in the local sequential
771:       numbering
772:   */
773:   PetscObjectGetComm((PetscObject)xin,&comm);
774:   MPI_Comm_size(comm,&size);
775:   if (size > 1) {
776:     xin_type = VEC_MPI_ID;
777:   } else xin_type = VEC_SEQ_ID;
778:   *xin_type1 = xin_type;

780:   PetscObjectGetComm((PetscObject)yin,&ycomm);
781:   MPI_Comm_size(ycomm,&size);
782:   if (size > 1) {
783:     yin_type = VEC_MPI_ID;
784:   } else yin_type = VEC_SEQ_ID;
785:   *yin_type1 = yin_type;
786:   return(0);
787: }

789: PetscErrorCode GetInputISType_private(VecScatter ctx,PetscInt xin_type,PetscInt yin_type,PetscInt *ix_type1,IS *tix1,PetscInt *iy_type1,IS *tiy1)
790: {
791:   PetscErrorCode    ierr;
792:   MPI_Comm          comm;
793:   Vec               xin = ctx->from_v,yin = ctx->to_v;
794:   IS                tix = NULL,tiy = NULL,ix = ctx->from_is, iy = ctx->to_is;
795:   PetscInt          ix_type  = IS_GENERAL_ID,iy_type = IS_GENERAL_ID;
796:   PetscBool         flag;

799:   PetscObjectGetComm((PetscObject)ctx,&comm);

801:   /* if ix or iy is not included; assume just grabbing entire vector */
802:   if (!ix && xin_type == VEC_SEQ_ID) {
803:     ISCreateStride(comm,ctx->from_n,0,1,&ix);
804:     tix  = ix;
805:   } else if (!ix && xin_type == VEC_MPI_ID) {
806:     if (yin_type == VEC_MPI_ID) {
807:       PetscInt ntmp, low;
808:       VecGetLocalSize(xin,&ntmp);
809:       VecGetOwnershipRange(xin,&low,NULL);
810:       ISCreateStride(comm,ntmp,low,1,&ix);
811:     } else {
812:       PetscInt Ntmp;
813:       VecGetSize(xin,&Ntmp);
814:       ISCreateStride(comm,Ntmp,0,1,&ix);
815:     }
816:     tix = ix;
817:   } else if (!ix) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ix not given, but not Seq or MPI vector");

819:   if (!iy && yin_type == VEC_SEQ_ID) {
820:     ISCreateStride(comm,ctx->to_n,0,1,&iy);
821:     tiy  = iy;
822:   } else if (!iy && yin_type == VEC_MPI_ID) {
823:     if (xin_type == VEC_MPI_ID) {
824:       PetscInt ntmp, low;
825:       VecGetLocalSize(yin,&ntmp);
826:       VecGetOwnershipRange(yin,&low,NULL);
827:       ISCreateStride(comm,ntmp,low,1,&iy);
828:     } else {
829:       PetscInt Ntmp;
830:       VecGetSize(yin,&Ntmp);
831:       ISCreateStride(comm,Ntmp,0,1,&iy);
832:     }
833:     tiy = iy;
834:   } else if (!iy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"iy not given, but not Seq or MPI vector");

836:   /* Determine types of index sets */
837:   PetscObjectTypeCompare((PetscObject)ix,ISBLOCK,&flag);
838:   if (flag) ix_type = IS_BLOCK_ID;
839:   PetscObjectTypeCompare((PetscObject)iy,ISBLOCK,&flag);
840:   if (flag) iy_type = IS_BLOCK_ID;
841:   PetscObjectTypeCompare((PetscObject)ix,ISSTRIDE,&flag);
842:   if (flag) ix_type = IS_STRIDE_ID;
843:   PetscObjectTypeCompare((PetscObject)iy,ISSTRIDE,&flag);
844:   if (flag) iy_type = IS_STRIDE_ID;

846:   if (ix_type1) *ix_type1 = ix_type;
847:   if (iy_type1) *iy_type1 = iy_type;
848:   if (tix1)     *tix1     = tix;
849:   if (tiy1)     *tiy1     = tiy;
850:   return(0);
851: }

853: PetscErrorCode VecScatterSetUp_vectype_private(VecScatter ctx,PetscErrorCode (*vecscattercreatelocal_ptos)(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter),PetscErrorCode (*vecscattercreatelocal_stop)(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter),PetscErrorCode (*vecscattercreatelocal_ptop)(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter))
854: {
855:   PetscErrorCode    ierr;
856:   PetscInt          xin_type=-1,yin_type=-1;

859:   VecScatterGetInputVecType_private(ctx,&xin_type,&yin_type);
860:   if (xin_type == VEC_MPI_ID && yin_type == VEC_SEQ_ID) {
861:     VecScatterCreate_PtoS(ctx,vecscattercreatelocal_ptos);
862:   } else if (xin_type == VEC_SEQ_ID && yin_type == VEC_MPI_ID) {
863:     VecScatterCreate_StoP(ctx,vecscattercreatelocal_stop);
864:   } else if (xin_type == VEC_MPI_ID && yin_type == VEC_MPI_ID) {
865:     VecScatterCreate_PtoP(ctx,vecscattercreatelocal_ptop);
866:   }
867:   return(0);
868: }