Actual source code: vscat.c

  1: #include "petscsf.h"
  2: #include "petscsystypes.h"
  3: #include "petscvec.h"
  4: #include <petsc/private/sfimpl.h>
  5: #include <../src/vec/is/sf/impls/basic/sfbasic.h>
  6: #include <../src/vec/is/sf/impls/basic/sfpack.h>
  7: #include <petsc/private/vecimpl.h>

  9: typedef enum {IS_INVALID, IS_GENERAL, IS_BLOCK, IS_STRIDE} ISTypeID;

 11: static inline PetscErrorCode ISGetTypeID_Private(IS is,ISTypeID *id)
 12: {
 13:   PetscBool      same;

 15:   *id  = IS_INVALID;
 16:   PetscObjectTypeCompare((PetscObject)is,ISGENERAL,&same);
 17:   if (same) {*id = IS_GENERAL; goto functionend;}
 18:   PetscObjectTypeCompare((PetscObject)is,ISBLOCK,&same);
 19:   if (same) {*id = IS_BLOCK; goto functionend;}
 20:   PetscObjectTypeCompare((PetscObject)is,ISSTRIDE,&same);
 21:   if (same) {*id = IS_STRIDE; goto functionend;}
 22: functionend:
 23:   return 0;
 24: }

 26: static PetscErrorCode VecScatterBegin_Internal(VecScatter sf,Vec x,Vec y,InsertMode addv,ScatterMode mode)
 27: {
 28:   PetscSF        wsf=NULL; /* either sf or its local part */
 29:   MPI_Op         mop=MPI_OP_NULL;
 30:   PetscMPIInt    size;
 31:   PetscMemType   xmtype=PETSC_MEMTYPE_HOST,ymtype=PETSC_MEMTYPE_HOST;

 33:   if (x != y) VecLockReadPush(x);
 34:   VecGetArrayReadAndMemType(x,&sf->vscat.xdata,&xmtype);
 35:   VecGetArrayAndMemType(y,&sf->vscat.ydata,&ymtype);
 36:   VecLockWriteSet_Private(y,PETSC_TRUE);

 38:   /* SCATTER_LOCAL indicates ignoring inter-process communication */
 39:   MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);
 40:   if ((mode & SCATTER_LOCAL) && size > 1) { /* Lazy creation of sf->vscat.lsf since SCATTER_LOCAL is uncommon */
 41:     if (!sf->vscat.lsf) PetscSFCreateLocalSF_Private(sf,&sf->vscat.lsf);
 42:     wsf = sf->vscat.lsf;
 43:   } else {
 44:     wsf = sf;
 45:   }

 47:   /* Note xdata/ydata is always recorded on sf (not lsf) above */
 48:   if (addv == INSERT_VALUES)   mop = MPI_REPLACE;
 49:   else if (addv == ADD_VALUES) mop = MPIU_SUM; /* Petsc defines its own MPI datatype and SUM operation for __float128 etc. */
 50:   else if (addv == MAX_VALUES) mop = MPIU_MAX;
 51:   else if (addv == MIN_VALUES) mop = MPIU_MIN;
 52:   else SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"Unsupported InsertMode %d in VecScatterBegin/End",addv);

 54:   if (mode & SCATTER_REVERSE) { /* REVERSE indicates leaves to root scatter. Note that x and y are swapped in input */
 55:     PetscSFReduceWithMemTypeBegin(wsf,sf->vscat.unit,xmtype,sf->vscat.xdata,ymtype,sf->vscat.ydata,mop);
 56:   } else { /* FORWARD indicates x to y scatter, where x is root and y is leaf */
 57:     PetscSFBcastWithMemTypeBegin(wsf,sf->vscat.unit,xmtype,sf->vscat.xdata,ymtype,sf->vscat.ydata,mop);
 58:   }
 59:   return 0;
 60: }

 62: static PetscErrorCode VecScatterEnd_Internal(VecScatter sf,Vec x,Vec y,InsertMode addv,ScatterMode mode)
 63: {
 64:   PetscSF        wsf=NULL;
 65:   MPI_Op         mop=MPI_OP_NULL;
 66:   PetscMPIInt    size;

 68:   /* SCATTER_LOCAL indicates ignoring inter-process communication */
 69:   MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);
 70:   wsf  = ((mode & SCATTER_LOCAL) && size > 1) ? sf->vscat.lsf : sf;

 72:   if (addv == INSERT_VALUES)   mop = MPI_REPLACE;
 73:   else if (addv == ADD_VALUES) mop = MPIU_SUM;
 74:   else if (addv == MAX_VALUES) mop = MPIU_MAX;
 75:   else if (addv == MIN_VALUES) mop = MPIU_MIN;
 76:   else SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"Unsupported InsertMode %d in VecScatterBegin/End",addv);

 78:   if (mode & SCATTER_REVERSE) { /* reverse scatter sends leaves to roots. Note that x and y are swapped in input */
 79:     PetscSFReduceEnd(wsf,sf->vscat.unit,sf->vscat.xdata,sf->vscat.ydata,mop);
 80:   } else { /* forward scatter sends roots to leaves, i.e., x to y */
 81:     PetscSFBcastEnd(wsf,sf->vscat.unit,sf->vscat.xdata,sf->vscat.ydata,mop);
 82:   }

 84:   VecRestoreArrayReadAndMemType(x,&sf->vscat.xdata);
 85:   if (x != y) VecLockReadPop(x);
 86:   VecRestoreArrayAndMemType(y,&sf->vscat.ydata);
 87:   VecLockWriteSet_Private(y,PETSC_FALSE);
 88:   return 0;
 89: }

 91: /* VecScatterRemap provides a light way to slightly modify a VecScatter. Suppose the input sf scatters
 92:    x[i] to y[j], tomap gives a plan to change vscat to scatter x[tomap[i]] to y[j]. Note that in SF,
 93:    x is roots. That means we need to change incoming stuffs such as bas->irootloc[].
 94:  */
 95: static PetscErrorCode VecScatterRemap_Internal(VecScatter sf,const PetscInt *tomap,const PetscInt *frommap)
 96: {
 97:   PetscInt       i,bs = sf->vscat.bs;
 98:   PetscMPIInt    size;
 99:   PetscBool      ident = PETSC_TRUE,isbasic,isneighbor;
100:   PetscSFType    type;
101:   PetscSF_Basic  *bas = NULL;

103:   /* check if it is an identity map. If it is, do nothing */
104:   if (tomap) {
105:     for (i=0; i<sf->nroots*bs; i++) {if (i != tomap[i]) {ident = PETSC_FALSE; break; } }
106:     if (ident) return 0;
107:   }
109:   if (!tomap) return 0;

111:   MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);

113:   /* Since the indices changed, we must also update the local SF. But we do not do it since
114:      lsf is rarely used. We just destroy lsf and rebuild it on demand from updated sf.
115:   */
116:   if (sf->vscat.lsf) PetscSFDestroy(&sf->vscat.lsf);

118:   PetscSFGetType(sf,&type);
119:   PetscObjectTypeCompare((PetscObject)sf,PETSCSFBASIC,&isbasic);
120:   PetscObjectTypeCompare((PetscObject)sf,PETSCSFNEIGHBOR,&isneighbor);

123:   PetscSFSetUp(sf); /* to bulid sf->irootloc if SetUp is not yet called */

125:   /* Root indices are going to be remapped. This is tricky for SF. Root indices are used in sf->rremote,
126:     sf->remote and bas->irootloc. The latter one is cheap to remap, but the former two are not.
127:     To remap them, we have to do a bcast from roots to leaves, to let leaves know their updated roots.
128:     Since VecScatterRemap is supposed to be a cheap routine to adapt a vecscatter by only changing where
129:     x[] data is taken, we do not remap sf->rremote, sf->remote. The consequence is that operations
130:     accessing them (such as PetscSFCompose) may get stale info. Considering VecScatter does not need
131:     that complicated SF operations, we do not remap sf->rremote, sf->remote, instead we destroy them
132:     so that code accessing them (if any) will crash (instead of get silent errors). Note that BcastAndOp/Reduce,
133:     which are used by VecScatter and only rely on bas->irootloc, are updated and correct.
134:   */
135:   sf->remote = NULL;
136:   PetscFree(sf->remote_alloc);
137:   /* Not easy to free sf->rremote since it was allocated with PetscMalloc4(), so just give it crazy values */
138:   for (i=0; i<sf->roffset[sf->nranks]; i++) sf->rremote[i] = PETSC_MIN_INT;

140:   /* Indices in tomap[] are for each indivisual vector entry. But indices in sf are for each
141:      block in the vector. So before the remapping, we have to expand indices in sf by bs, and
142:      after the remapping, we have to shrink them back.
143:    */
144:   bas = (PetscSF_Basic*)sf->data;
145:   for (i=0; i<bas->ioffset[bas->niranks]; i++) bas->irootloc[i] = tomap[bas->irootloc[i]*bs]/bs;
146: #if defined(PETSC_HAVE_DEVICE)
147:   /* Free the irootloc copy on device. We allocate a new copy and get the updated value on demand. See PetscSFLinkGetRootPackOptAndIndices() */
148:   for (i=0; i<2; i++) PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,bas->irootloc_d[i]);
149: #endif
150:   /* Destroy and then rebuild root packing optimizations since indices are changed */
151:   PetscSFResetPackFields(sf);
152:   PetscSFSetUpPackFields(sf);
153:   return 0;
154: }

156: /* Given a parallel VecScatter context, return number of procs and vector entries involved in remote (i.e., off-process) communication

158:   Input Parameters:
159: + sf   - the context (must be a parallel vecscatter)
160: - send  - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)

162:   Output parameters:
163: + num_procs   - number of remote processors
164: - num_entries - number of vector entries to send or recv

166:   .seealso: VecScatterGetRemote_Private(), VecScatterGetRemoteOrdered_Private()

168:   Notes:
169:   Sometimes PETSc internally needs to use the matrix-vector-multiply vecscatter context for other purposes. The client code
170:   usually only uses MPI_Send/Recv. This group of subroutines provides info needed for such uses.
171:  */
172: PetscErrorCode VecScatterGetRemoteCount_Private(VecScatter sf,PetscBool send,PetscInt *num_procs,PetscInt *num_entries)
173: {
174:   PetscInt          nranks,remote_start;
175:   PetscMPIInt       rank;
176:   const PetscInt    *offset;
177:   const PetscMPIInt *ranks;

179:   PetscSFSetUp(sf);
180:   MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);

182:   /* This routine is mainly used for MatMult's Mvctx. In Mvctx, we scatter an MPI vector x to a sequential vector lvec.
183:      Remember x is roots and lvec is leaves. 'send' means roots to leaves communication. If 'send' is true, we need to
184:      get info about which ranks this processor needs to send to. In other words, we need to call PetscSFGetLeafRanks().
185:      If send is false, we do the opposite, calling PetscSFGetRootRanks().
186:   */
187:   if (send) PetscSFGetLeafRanks(sf,&nranks,&ranks,&offset,NULL);
188:   else PetscSFGetRootRanks(sf,&nranks,&ranks,&offset,NULL,NULL);
189:   if (nranks) {
190:     remote_start = (rank == ranks[0])? 1 : 0;
191:     if (num_procs)   *num_procs   = nranks - remote_start;
192:     if (num_entries) *num_entries = offset[nranks] - offset[remote_start];
193:   } else {
194:     if (num_procs)   *num_procs   = 0;
195:     if (num_entries) *num_entries = 0;
196:   }
197:   return 0;
198: }

200: /* Given a parallel VecScatter context, return a plan that represents the remote communication.
201:    Any output parameter can be NULL.

203:   Input Parameters:
204: + sf   - the context
205: - send  - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)

207:   Output parameters:
208: + n        - number of remote processors
209: . starts   - starting point in indices for each proc. ATTENTION: starts[0] is not necessarily zero.
210:              Therefore, expressions like starts[i+1]-starts[i] and indices[starts[i]+j] work as
211:              expected for a CSR structure but buf[starts[i]+j] may be out of range if buf was allocated
212:              with length starts[n]-starts[0]. One should use buf[starts[i]-starts[0]+j] instead.
213: . indices  - indices of entries to send/recv
214: . procs    - ranks of remote processors
215: - bs       - block size

217:   .seealso: VecScatterRestoreRemote_Private(), VecScatterGetRemoteOrdered_Private()
218:  */
219: PetscErrorCode VecScatterGetRemote_Private(VecScatter sf,PetscBool send,PetscInt *n,const PetscInt **starts,const PetscInt **indices,const PetscMPIInt **procs,PetscInt *bs)
220: {
221:   PetscInt          nranks,remote_start;
222:   PetscMPIInt       rank;
223:   const PetscInt    *offset,*location;
224:   const PetscMPIInt *ranks;

226:   PetscSFSetUp(sf);
227:   MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);

229:   if (send) PetscSFGetLeafRanks(sf,&nranks,&ranks,&offset,&location);
230:   else PetscSFGetRootRanks(sf,&nranks,&ranks,&offset,&location,NULL);

232:   if (nranks) {
233:     remote_start = (rank == ranks[0])? 1 : 0;
234:     if (n)       *n       = nranks - remote_start;
235:     if (starts)  *starts  = &offset[remote_start];
236:     if (indices) *indices = location; /* not &location[offset[remote_start]]. Starts[0] may point to the middle of indices[] */
237:     if (procs)   *procs   = &ranks[remote_start];
238:   } else {
239:     if (n)       *n       = 0;
240:     if (starts)  *starts  = NULL;
241:     if (indices) *indices = NULL;
242:     if (procs)   *procs   = NULL;
243:   }

245:   if (bs) *bs = 1;
246:   return 0;
247: }

249: /* Given a parallel VecScatter context, return a plan that represents the remote communication. Ranks of remote
250:    processors returned in procs must be sorted in ascending order. Any output parameter can be NULL.

252:   Input Parameters:
253: + sf   - the context
254: - send  - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)

256:   Output parameters:
257: + n        - number of remote processors
258: . starts   - starting point in indices for each proc. ATTENTION: starts[0] is not necessarily zero.
259:              Therefore, expressions like starts[i+1]-starts[i] and indices[starts[i]+j] work as
260:              expected for a CSR structure but buf[starts[i]+j] may be out of range if buf was allocated
261:              with length starts[n]-starts[0]. One should use buf[starts[i]-starts[0]+j] instead.
262: . indices  - indices of entries to send/recv
263: . procs    - ranks of remote processors
264: - bs       - block size

266:   .seealso: VecScatterRestoreRemoteOrdered_Private(), VecScatterGetRemote_Private()

268:   Notes:
269:   Output parameters like starts, indices must also be adapted according to the sorted ranks.
270:  */
271: PetscErrorCode VecScatterGetRemoteOrdered_Private(VecScatter sf,PetscBool send,PetscInt *n,const PetscInt **starts,const PetscInt **indices,const PetscMPIInt **procs,PetscInt *bs)
272: {
273:   VecScatterGetRemote_Private(sf,send,n,starts,indices,procs,bs);
274:   if (PetscUnlikelyDebug(n && procs)) {
275:     PetscInt i;
276:     /* from back to front to also handle cases *n=0 */
278:   }
279:   return 0;
280: }

282: /* Given a parallel VecScatter context, restore the plan returned by VecScatterGetRemote_Private. This gives a chance for
283:    an implementation to free memory allocated in the VecScatterGetRemote_Private call.

285:   Input Parameters:
286: + sf   - the context
287: - send  - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)

289:   Output parameters:
290: + n        - number of remote processors
291: . starts   - starting point in indices for each proc
292: . indices  - indices of entries to send/recv
293: . procs    - ranks of remote processors
294: - bs       - block size

296:   .seealso: VecScatterGetRemote_Private()
297:  */
298: PetscErrorCode VecScatterRestoreRemote_Private(VecScatter sf,PetscBool send,PetscInt *n,const PetscInt **starts,const PetscInt **indices,const PetscMPIInt **procs,PetscInt *bs)
299: {
300:   if (starts)   *starts  = NULL;
301:   if (indices)  *indices = NULL;
302:   if (procs)    *procs   = NULL;
303:   return 0;
304: }

306: /* Given a parallel VecScatter context, restore the plan returned by VecScatterGetRemoteOrdered_Private. This gives a chance for
307:    an implementation to free memory allocated in the VecScatterGetRemoteOrdered_Private call.

309:   Input Parameters:
310: + sf   - the context
311: - send  - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)

313:   Output parameters:
314: + n        - number of remote processors
315: . starts   - starting point in indices for each proc
316: . indices  - indices of entries to send/recv
317: . procs    - ranks of remote processors
318: - bs       - block size

320:   .seealso: VecScatterGetRemoteOrdered_Private()
321:  */
322: PetscErrorCode VecScatterRestoreRemoteOrdered_Private(VecScatter sf,PetscBool send,PetscInt *n,const PetscInt **starts,const PetscInt **indices,const PetscMPIInt **procs,PetscInt *bs)
323: {
324:   VecScatterRestoreRemote_Private(sf,send,n,starts,indices,procs,bs);
325:   return 0;
326: }

328: /*@
329:    VecScatterSetUp - Sets up the VecScatter to be able to actually scatter information between vectors

331:    Collective on VecScatter

333:    Input Parameter:
334: .  sf - the scatter context

336:    Level: intermediate

338: .seealso: VecScatterCreate(), VecScatterCopy()
339: @*/
340: PetscErrorCode VecScatterSetUp(VecScatter sf)
341: {
342:   PetscSFSetUp(sf);
343:   return 0;
344: }

346: /*@C
347:   VecScatterSetType - Builds a vector scatter, for a particular vector scatter implementation.

349:   Collective on VecScatter

351:   Input Parameters:
352: + sf - The VecScatter (SF) object
353: - type - The name of the vector scatter type

355:   Options Database Key:
356: . -sf_type <type> - Sets the VecScatter (SF) type

358:   Notes:
359:   Use VecScatterDuplicate() to form additional vectors scatter of the same type as an existing vector scatter.

361:   Level: intermediate

363: .seealso: VecScatterGetType(), VecScatterCreate()
364: @*/
365: PetscErrorCode VecScatterSetType(VecScatter sf, VecScatterType type)
366: {
367:   PetscSFSetType(sf,type);
368:   return 0;
369: }

371: /*@C
372:   VecScatterGetType - Gets the vector scatter type name (as a string) from the VecScatter.

374:   Not Collective

376:   Input Parameter:
377: . sf  - The vector scatter (SF)

379:   Output Parameter:
380: . type - The vector scatter type name

382:   Level: intermediate

384: .seealso: VecScatterSetType(), VecScatterCreate()
385: @*/
386: PetscErrorCode VecScatterGetType(VecScatter sf, VecScatterType *type)
387: {
388:   PetscSFGetType(sf,type);
389:   return 0;
390: }

392: /*@C
393:   VecScatterRegister -  Adds a new vector scatter component implementation

395:   Not Collective

397:   Input Parameters:
398: + name        - The name of a new user-defined creation routine
399: - create_func - The creation routine itself

401:   Level: advanced

403: .seealso: VecRegister()
404: @*/
405: PetscErrorCode VecScatterRegister(const char sname[], PetscErrorCode (*function)(VecScatter))
406: {
407:   PetscSFRegister(sname,function);
408:   return 0;
409: }

411: /* ------------------------------------------------------------------*/
412: /*@
413:    VecScatterGetMerged - Returns true if the scatter is completed in the VecScatterBegin()
414:       and the VecScatterEnd() does nothing

416:    Not Collective

418:    Input Parameter:
419: .   sf - scatter context created with VecScatterCreate()

421:    Output Parameter:
422: .   flg - PETSC_TRUE if the VecScatterBegin/End() are all done during the VecScatterBegin()

424:    Level: developer

426: .seealso: VecScatterCreate(), VecScatterEnd(), VecScatterBegin()
427: @*/
428: PetscErrorCode  VecScatterGetMerged(VecScatter sf,PetscBool *flg)
429: {
431:   if (flg) *flg = sf->vscat.beginandendtogether;
432:   return 0;
433: }
434: /*@C
435:    VecScatterDestroy - Destroys a scatter context created by VecScatterCreate()

437:    Collective on VecScatter

439:    Input Parameter:
440: .  sf - the scatter context

442:    Level: intermediate

444: .seealso: VecScatterCreate(), VecScatterCopy()
445: @*/
446: PetscErrorCode VecScatterDestroy(VecScatter *sf)
447: {
448:   PetscSFDestroy(sf);
449:   return 0;
450: }

452: /*@
453:    VecScatterCopy - Makes a copy of a scatter context.

455:    Collective on VecScatter

457:    Input Parameter:
458: .  sf - the scatter context

460:    Output Parameter:
461: .  newsf - the context copy

463:    Level: advanced

465: .seealso: VecScatterCreate(), VecScatterDestroy()
466: @*/
467: PetscErrorCode  VecScatterCopy(VecScatter sf,VecScatter *newsf)
468: {
470:   PetscSFDuplicate(sf,PETSCSF_DUPLICATE_GRAPH,newsf);
471:   PetscSFSetUp(*newsf);
472:   return 0;
473: }

475: /*@C
476:    VecScatterViewFromOptions - View from Options

478:    Collective on VecScatter

480:    Input Parameters:
481: +  sf - the scatter context
482: .  obj - Optional object
483: -  name - command line option

485:    Level: intermediate
486: .seealso:  VecScatter, VecScatterView, PetscObjectViewFromOptions(), VecScatterCreate()
487: @*/
488: PetscErrorCode  VecScatterViewFromOptions(VecScatter sf,PetscObject obj,const char name[])
489: {
491:   PetscObjectViewFromOptions((PetscObject)sf,obj,name);
492:   return 0;
493: }

495: /* ------------------------------------------------------------------*/
496: /*@C
497:    VecScatterView - Views a vector scatter context.

499:    Collective on VecScatter

501:    Input Parameters:
502: +  sf - the scatter context
503: -  viewer - the viewer for displaying the context

505:    Level: intermediate

507: @*/
508: PetscErrorCode  VecScatterView(VecScatter sf,PetscViewer viewer)
509: {
510:   PetscSFView(sf,viewer);
511:   return 0;
512: }

514: /*@C
515:    VecScatterRemap - Remaps the "from" and "to" indices in a
516:    vector scatter context. FOR EXPERTS ONLY!

518:    Collective on VecScatter

520:    Input Parameters:
521: +  sf    - vector scatter context
522: .  tomap   - remapping plan for "to" indices (may be NULL).
523: -  frommap - remapping plan for "from" indices (may be NULL)

525:    Level: developer

527:    Notes:
528:      In the parallel case the todata contains indices from where the data is taken
529:      (and then sent to others)! The fromdata contains indices from where the received
530:      data is finally put locally.

532:      In the sequential case the todata contains indices from where the data is put
533:      and the fromdata contains indices from where the data is taken from.
534:      This is backwards from the paralllel case!

536: @*/
537: PetscErrorCode  VecScatterRemap(VecScatter sf,PetscInt tomap[],PetscInt frommap[])
538: {
541:   VecScatterRemap_Internal(sf,tomap,frommap);
543:   /* Mark then vector lengths as unknown because we do not know the lengths of the remapped vectors */
544:   sf->vscat.from_n = -1;
545:   sf->vscat.to_n   = -1;
546:   return 0;
547: }

549: /*@
550:   VecScatterSetFromOptions - Configures the vector scatter from the options database.

552:   Collective on VecScatter

554:   Input Parameter:
555: . sf - The vector scatter

557:   Notes:
558:     To see all options, run your program with the -help option, or consult the users manual.
559:           Must be called before VecScatterSetUp() but before the vector scatter is used.

561:   Level: beginner

563: .seealso: VecScatterCreate(), VecScatterDestroy(), VecScatterSetUp()
564: @*/
565: PetscErrorCode VecScatterSetFromOptions(VecScatter sf)
566: {

570:   PetscObjectOptionsBegin((PetscObject)sf);

572:   sf->vscat.beginandendtogether = PETSC_FALSE;
573:   PetscOptionsBool("-vecscatter_merge","Use combined (merged) vector scatter begin and end","VecScatterCreate",sf->vscat.beginandendtogether,&sf->vscat.beginandendtogether,NULL);
574:   if (sf->vscat.beginandendtogether) PetscInfo(sf,"Using combined (merged) vector scatter begin and end\n");
575:   PetscOptionsEnd();
576:   return 0;
577: }

579: /* ---------------------------------------------------------------- */
580: /*@
581:    VecScatterCreate - Creates a vector scatter context.

583:    Collective on Vec

585:    Input Parameters:
586: +  xin - a vector that defines the shape (parallel data layout of the vector)
587:          of vectors from which we scatter
588: .  yin - a vector that defines the shape (parallel data layout of the vector)
589:          of vectors to which we scatter
590: .  ix - the indices of xin to scatter (if NULL scatters all values)
591: -  iy - the indices of yin to hold results (if NULL fills entire vector yin)

593:    Output Parameter:
594: .  newsf - location to store the new scatter (SF) context

596:    Options Database Keys:
597: +  -vecscatter_view         - Prints detail of communications
598: .  -vecscatter_view ::ascii_info    - Print less details about communication
599: -  -vecscatter_merge        - VecScatterBegin() handles all of the communication, VecScatterEnd() is a nop
600:                               eliminates the chance for overlap of computation and communication

602:   Level: intermediate

604:   Notes:
605:    If both xin and yin are parallel, their communicator must be on the same
606:    set of processes, but their process order can be different.
607:    In calls to VecScatter() you can use different vectors than the xin and
608:    yin you used above; BUT they must have the same parallel data layout, for example,
609:    they could be obtained from VecDuplicate().
610:    A VecScatter context CANNOT be used in two or more simultaneous scatters;
611:    that is you cannot call a second VecScatterBegin() with the same scatter
612:    context until the VecScatterEnd() has been called on the first VecScatterBegin().
613:    In this case a separate VecScatter is needed for each concurrent scatter.

615:    Currently the MPI_Send() use PERSISTENT versions.
616:    (this unfortunately requires that the same in and out arrays be used for each use, this
617:     is why  we always need to pack the input into the work array before sending
618:     and unpack upon receiving instead of using MPI datatypes to avoid the packing/unpacking).

620:    Both ix and iy cannot be NULL at the same time.

622:    Use VecScatterCreateToAll() to create a vecscatter that copies an MPI vector to sequential vectors on all MPI ranks.
623:    Use VecScatterCreateToZero() to create a vecscatter that copies an MPI vector to a sequential vector on MPI rank 0.
624:    These special vecscatters have better performance than general ones.

626: .seealso: VecScatterDestroy(), VecScatterCreateToAll(), VecScatterCreateToZero(), PetscSFCreate()
627: @*/
628: PetscErrorCode VecScatterCreate(Vec x,IS ix,Vec y,IS iy,VecScatter *newsf)
629: {
630:   MPI_Comm       xcomm,ycomm,bigcomm;
631:   Vec            xx,yy;
632:   IS             ix_old=ix,iy_old=iy,ixx,iyy;
633:   PetscMPIInt    xcommsize,ycommsize,rank,result;
634:   PetscInt       i,n,N,nroots,nleaves,*ilocal,xstart,ystart,ixsize,iysize,xlen,ylen;
635:   const PetscInt *xindices,*yindices;
636:   PetscSFNode    *iremote;
637:   PetscLayout    xlayout,ylayout;
638:   ISTypeID       ixid,iyid;
639:   PetscInt       bs,bsx,bsy,min,max,m[2],ixfirst,ixstep,iyfirst,iystep;
640:   PetscBool      can_do_block_opt=PETSC_FALSE;
641:   PetscSF        sf;


646:   /* Get comm from x and y */
647:   PetscObjectGetComm((PetscObject)x,&xcomm);
648:   MPI_Comm_size(xcomm,&xcommsize);
649:   PetscObjectGetComm((PetscObject)y,&ycomm);
650:   MPI_Comm_size(ycomm,&ycommsize);
651:   if (xcommsize > 1 && ycommsize > 1) {
652:     MPI_Comm_compare(xcomm,ycomm,&result);
654:   }
655:   bs = 1; /* default, no blocking */

657:   /*
658:    Let P and S stand for parallel and sequential vectors respectively. There are four combinations of vecscatters: PtoP, PtoS,
659:    StoP and StoS. The assumption of VecScatterCreate(Vec x,IS ix,Vec y,IS iy,VecScatter *newctx) is: if x is parallel, then ix
660:    contains global indices of x. If x is sequential, ix contains local indices of x. Similarily for y and iy.

662:    SF builds around concepts of local leaves and remote roots. We treat source vector x as roots and destination vector y as
663:    leaves. A PtoS scatter can be naturally mapped to SF. We transform PtoP and StoP to PtoS, and treat StoS as trivial PtoS.
664:   */

666:   /* NULL ix or iy in VecScatterCreate(x,ix,y,iy,newctx) has special meaning. Recover them for these cases */
667:   if (!ix) {
668:     if (xcommsize > 1 && ycommsize == 1) { /* PtoS: null ix means the whole x will be scattered to each seq y */
669:       VecGetSize(x,&N);
670:       ISCreateStride(PETSC_COMM_SELF,N,0,1,&ix);
671:     } else { /* PtoP, StoP or StoS: null ix means the whole local part of x will be scattered */
672:       VecGetLocalSize(x,&n);
673:       VecGetOwnershipRange(x,&xstart,NULL);
674:       ISCreateStride(PETSC_COMM_SELF,n,xstart,1,&ix);
675:     }
676:   }

678:   if (!iy) {
679:     if (xcommsize == 1 && ycommsize > 1) { /* StoP: null iy means the whole y will be scattered to from each seq x */
680:       VecGetSize(y,&N);
681:       ISCreateStride(PETSC_COMM_SELF,N,0,1,&iy);
682:     } else { /* PtoP, StoP or StoS: null iy means the whole local part of y will be scattered to */
683:       VecGetLocalSize(y,&n);
684:       VecGetOwnershipRange(y,&ystart,NULL);
685:       ISCreateStride(PETSC_COMM_SELF,n,ystart,1,&iy);
686:     }
687:   }

689:   /* Do error checking immediately after we have non-empty ix, iy */
690:   ISGetLocalSize(ix,&ixsize);
691:   ISGetLocalSize(iy,&iysize);
692:   VecGetSize(x,&xlen);
693:   VecGetSize(y,&ylen);
695:   ISGetMinMax(ix,&min,&max);
697:   ISGetMinMax(iy,&min,&max);

700:   /* Extract info about ix, iy for further test */
701:   ISGetTypeID_Private(ix,&ixid);
702:   ISGetTypeID_Private(iy,&iyid);
703:   if (ixid == IS_BLOCK)       ISGetBlockSize(ix,&bsx);
704:   else if (ixid == IS_STRIDE) ISStrideGetInfo(ix,&ixfirst,&ixstep);

706:   if (iyid == IS_BLOCK)      ISGetBlockSize(iy,&bsy);
707:   else if (iyid == IS_STRIDE) ISStrideGetInfo(iy,&iyfirst,&iystep);

709:   /* Check if a PtoS is special ToAll/ToZero scatters, which can be results of VecScatterCreateToAll/Zero.
710:      ToAll means a whole MPI vector is copied to a seq vector on every process. ToZero means a whole MPI
711:      vector is copied to a seq vector on rank 0 and other processes do nothing(i.e.,they input empty ix,iy).

713:      We can optimize these scatters with MPI collectives. We can also avoid costly analysis used for general scatters.
714:   */
715:   if (xcommsize > 1 && ycommsize == 1) { /* Ranks do not diverge at this if-test */
716:     PetscInt    pattern[2] = {0, 0}; /* A boolean array with pattern[0] for allgather-like (ToAll) and pattern[1] for gather-like (ToZero) */
717:     PetscLayout map;

719:     MPI_Comm_rank(xcomm,&rank);
720:     VecGetLayout(x,&map);
721:     if (rank == 0) {
722:       if (ixid == IS_STRIDE && iyid == IS_STRIDE && ixsize == xlen && ixfirst == 0 && ixstep == 1 && iyfirst == 0 && iystep == 1) {
723:         /* Rank 0 scatters the whole mpi x to seq y, so it is either a ToAll or a ToZero candidate in its view */
724:         pattern[0] = pattern[1] = 1;
725:       }
726:     } else {
727:       if (ixid == IS_STRIDE && iyid == IS_STRIDE && ixsize == xlen && ixfirst == 0 && ixstep == 1 && iyfirst == 0 && iystep == 1) {
728:         /* Other ranks also scatter the whole mpi x to seq y, so it is a ToAll candidate in their view */
729:         pattern[0] = 1;
730:       } else if (ixsize == 0) {
731:         /* Other ranks do nothing, so it is a ToZero candiate */
732:         pattern[1] = 1;
733:       }
734:     }

736:     /* One stone (the expensive allreduce) two birds: pattern[] tells if it is ToAll or ToZero */
737:     MPIU_Allreduce(MPI_IN_PLACE,pattern,2,MPIU_INT,MPI_LAND,xcomm);

739:     if (pattern[0] || pattern[1]) {
740:       PetscSFCreate(xcomm,&sf);
741:       PetscSFSetFromOptions(sf);
742:       PetscSFSetGraphWithPattern(sf,map,pattern[0] ? PETSCSF_PATTERN_ALLGATHER : PETSCSF_PATTERN_GATHER);
743:       goto functionend; /* No further analysis needed. What a big win! */
744:     }
745:   }

747:   /* Continue ...
748:      Do block optimization by taking advantage of high level info available in ix, iy.
749:      The block optimization is valid when all of the following conditions are met:
750:      1) ix, iy are blocked or can be blocked (i.e., strided with step=1);
751:      2) ix, iy have the same block size;
752:      3) all processors agree on one block size;
753:      4) no blocks span more than one process;
754:    */
755:   bigcomm = (xcommsize == 1) ? ycomm : xcomm;

757:   /* Processors could go through different path in this if-else test */
758:   m[0] = m[1] = PETSC_MPI_INT_MIN;
759:   if (ixid == IS_BLOCK && iyid == IS_BLOCK) {
760:     m[0] = PetscMax(bsx,bsy);
761:     m[1] = -PetscMin(bsx,bsy);
762:   } else if (ixid == IS_BLOCK  && iyid == IS_STRIDE && iystep==1 && iyfirst%bsx==0) {
763:     m[0] = bsx;
764:     m[1] = -bsx;
765:   } else if (ixid == IS_STRIDE && iyid == IS_BLOCK  && ixstep==1 && ixfirst%bsy==0) {
766:     m[0] = bsy;
767:     m[1] = -bsy;
768:   }
769:   /* Get max and min of bsx,bsy over all processes in one allreduce */
770:   MPIU_Allreduce(MPI_IN_PLACE,m,2,MPIU_INT,MPI_MAX,bigcomm);
771:   max = m[0]; min = -m[1];

773:   /* Since we used allreduce above, all ranks will have the same min and max. min==max
774:      implies all ranks have the same bs. Do further test to see if local vectors are dividable
775:      by bs on ALL ranks. If they are, we are ensured that no blocks span more than one processor.
776:    */
777:   if (min == max && min > 1) {
778:     VecGetLocalSize(x,&xlen);
779:     VecGetLocalSize(y,&ylen);
780:     m[0] = xlen%min;
781:     m[1] = ylen%min;
782:     MPIU_Allreduce(MPI_IN_PLACE,m,2,MPIU_INT,MPI_LOR,bigcomm);
783:     if (!m[0] && !m[1]) can_do_block_opt = PETSC_TRUE;
784:   }

786:   /* If can_do_block_opt, then shrink x, y, ix and iy by bs to get xx, yy, ixx and iyy, whose indices
787:      and layout are actually used in building SF. Suppose blocked ix representing {0,1,2,6,7,8} has
788:      indices {0,2} and bs=3, then ixx = {0,2}; suppose strided iy={3,4,5,6,7,8}, then iyy={1,2}.

790:      xx is a little special. If x is seq, then xx is the concatenation of seq x's on ycomm. In this way,
791:      we can treat PtoP and StoP uniformly as PtoS.
792:    */
793:   if (can_do_block_opt) {
794:     const PetscInt *indices;

796:     /* Shrink x and ix */
797:     bs   = min;
798:     VecCreateMPIWithArray(bigcomm,1,xlen/bs,PETSC_DECIDE,NULL,&xx); /* We only care xx's layout */
799:     if (ixid == IS_BLOCK) {
800:       ISBlockGetIndices(ix,&indices);
801:       ISBlockGetLocalSize(ix,&ixsize);
802:       ISCreateGeneral(PETSC_COMM_SELF,ixsize,indices,PETSC_COPY_VALUES,&ixx);
803:       ISBlockRestoreIndices(ix,&indices);
804:     } else { /* ixid == IS_STRIDE */
805:       ISGetLocalSize(ix,&ixsize);
806:       ISCreateStride(PETSC_COMM_SELF,ixsize/bs,ixfirst/bs,1,&ixx);
807:     }

809:     /* Shrink y and iy */
810:     VecCreateMPIWithArray(ycomm,1,ylen/bs,PETSC_DECIDE,NULL,&yy);
811:     if (iyid == IS_BLOCK) {
812:       ISBlockGetIndices(iy,&indices);
813:       ISBlockGetLocalSize(iy,&iysize);
814:       ISCreateGeneral(PETSC_COMM_SELF,iysize,indices,PETSC_COPY_VALUES,&iyy);
815:       ISBlockRestoreIndices(iy,&indices);
816:     } else { /* iyid == IS_STRIDE */
817:       ISGetLocalSize(iy,&iysize);
818:       ISCreateStride(PETSC_COMM_SELF,iysize/bs,iyfirst/bs,1,&iyy);
819:     }
820:   } else {
821:     ixx = ix;
822:     iyy = iy;
823:     yy  = y;
824:     if (xcommsize == 1) VecCreateMPIWithArray(bigcomm,1,xlen,PETSC_DECIDE,NULL,&xx); else xx = x;
825:   }

827:   /* Now it is ready to build SF with preprocessed (xx, yy) and (ixx, iyy) */
828:   ISGetIndices(ixx,&xindices);
829:   ISGetIndices(iyy,&yindices);
830:   VecGetLayout(xx,&xlayout);

832:   if (ycommsize > 1) {
833:     /* PtoP or StoP */

835:     /* Below is a piece of complex code with a very simple goal: move global index pairs (xindices[i], yindices[i]),
836:        to owner process of yindices[i] according to ylayout, i = 0..n.

838:        I did it through a temp sf, but later I thought the old design was inefficient and also distorted log view.
839:        We want to mape one VecScatterCreate() call to one PetscSFCreate() call. The old design mapped to three
840:        PetscSFCreate() calls. This code is on critical path of VecScatterSetUp and is used by every VecScatterCreate.
841:        So I commented it out and did another optimized implementation. The commented code is left here for reference.
842:      */
843: #if 0
844:     const PetscInt *degree;
845:     PetscSF        tmpsf;
846:     PetscInt       inedges=0,*leafdata,*rootdata;

848:     VecGetOwnershipRange(xx,&xstart,NULL);
849:     VecGetLayout(yy,&ylayout);
850:     VecGetOwnershipRange(yy,&ystart,NULL);

852:     VecGetLocalSize(yy,&nroots);
853:     ISGetLocalSize(iyy,&nleaves);
854:     PetscMalloc2(nleaves,&iremote,nleaves*2,&leafdata);

856:     for (i=0; i<nleaves; i++) {
857:       PetscLayoutFindOwnerIndex(ylayout,yindices[i],&iremote[i].rank,&iremote[i].index);
858:       leafdata[2*i]   = yindices[i];
859:       leafdata[2*i+1] = (xcommsize > 1)? xindices[i] : xindices[i] + xstart;
860:     }

862:     PetscSFCreate(ycomm,&tmpsf);
863:     PetscSFSetGraph(tmpsf,nroots,nleaves,NULL,PETSC_USE_POINTER,iremote,PETSC_USE_POINTER);

865:     PetscSFComputeDegreeBegin(tmpsf,&degree);
866:     PetscSFComputeDegreeEnd(tmpsf,&degree);

868:     for (i=0; i<nroots; i++) inedges += degree[i];
869:     PetscMalloc1(inedges*2,&rootdata);
870:     PetscSFGatherBegin(tmpsf,MPIU_2INT,leafdata,rootdata);
871:     PetscSFGatherEnd(tmpsf,MPIU_2INT,leafdata,rootdata);

873:     PetscFree2(iremote,leafdata);
874:     PetscSFDestroy(&tmpsf);

876:     /* rootdata contains global index pairs (i, j). j's are owned by the current process, but i's can point to anywhere.
877:        We convert j to local, and convert i to (rank, index). In the end, we get an PtoS suitable for building SF.
878:      */
879:     nleaves = inedges;
880:     VecGetLocalSize(xx,&nroots);
881:     PetscMalloc1(nleaves,&ilocal);
882:     PetscMalloc1(nleaves,&iremote);

884:     for (i=0; i<inedges; i++) {
885:       ilocal[i] = rootdata[2*i] - ystart; /* covert y's global index to local index */
886:       PetscLayoutFindOwnerIndex(xlayout,rootdata[2*i+1],&iremote[i].rank,&iremote[i].index); /* convert x's global index to (rank, index) */
887:     }
888:     PetscFree(rootdata);
889: #else
890:     PetscInt       j,k,n,disp,rlentotal,*sstart,*xindices_sorted,*yindices_sorted;
891:     const PetscInt *yrange;
892:     PetscMPIInt    nsend,nrecv,nreq,yrank,*sendto,*recvfrom,tag1,tag2;
893:     PetscInt       *slens,*rlens,count;
894:     PetscInt       *rxindices,*ryindices;
895:     MPI_Request    *reqs,*sreqs,*rreqs;

897:     /* Sorting makes code simpler, faster and also helps getting rid of many O(P) arrays, which hurt scalability at large scale
898:        yindices_sorted - sorted yindices
899:        xindices_sorted - xindices sorted along with yindces
900:      */
901:     ISGetLocalSize(ixx,&n); /*ixx, iyy have the same local size */
902:     PetscMalloc2(n,&xindices_sorted,n,&yindices_sorted);
903:     PetscArraycpy(xindices_sorted,xindices,n);
904:     PetscArraycpy(yindices_sorted,yindices,n);
905:     PetscSortIntWithArray(n,yindices_sorted,xindices_sorted);
906:     VecGetOwnershipRange(xx,&xstart,NULL);
907:     if (xcommsize == 1) {for (i=0; i<n; i++) xindices_sorted[i] += xstart;} /* Convert to global indices */

909:     /*=============================================================================
910:              Calculate info about messages I need to send
911:       =============================================================================*/
912:     /* nsend    - number of non-empty messages to send
913:        sendto   - [nsend] ranks I will send messages to
914:        sstart   - [nsend+1] sstart[i] is the start index in xsindices_sorted[] I send to rank sendto[i]
915:        slens    - [ycommsize] I want to send slens[i] entries to rank i.
916:      */
917:     VecGetLayout(yy,&ylayout);
918:     PetscLayoutGetRanges(ylayout,&yrange);
919:     PetscCalloc1(ycommsize,&slens); /* The only O(P) array in this algorithm */

921:     i = j = nsend = 0;
922:     while (i < n) {
923:       if (yindices_sorted[i] >= yrange[j+1]) { /* If i-th index is out of rank j's bound */
924:         do {j++;} while (yindices_sorted[i] >= yrange[j+1] && j < ycommsize); /* Increase j until i-th index falls in rank j's bound */
926:       }
927:       i++;
928:       if (!slens[j]++) nsend++;
929:     }

931:     PetscMalloc2(nsend+1,&sstart,nsend,&sendto);

933:     sstart[0] = 0;
934:     for (i=j=0; i<ycommsize; i++) {
935:       if (slens[i]) {
936:         sendto[j]   = (PetscMPIInt)i;
937:         sstart[j+1] = sstart[j] + slens[i];
938:         j++;
939:       }
940:     }

942:     /*=============================================================================
943:       Calculate the reverse info about messages I will recv
944:       =============================================================================*/
945:     /* nrecv     - number of messages I will recv
946:        recvfrom  - [nrecv] ranks I recv from
947:        rlens     - [nrecv] I will recv rlens[i] entries from rank recvfrom[i]
948:        rlentotal - sum of rlens[]
949:        rxindices - [rlentotal] recv buffer for xindices_sorted
950:        ryindices - [rlentotal] recv buffer for yindices_sorted
951:      */
952:     PetscGatherNumberOfMessages_Private(ycomm,NULL,slens,&nrecv);
953:     PetscGatherMessageLengths_Private(ycomm,nsend,nrecv,slens,&recvfrom,&rlens);
954:     PetscFree(slens); /* Free the O(P) array ASAP */
955:     rlentotal = 0; for (i=0; i<nrecv; i++) rlentotal += rlens[i];

957:     /*=============================================================================
958:       Communicate with processors in recvfrom[] to populate rxindices and ryindices
959:       ============================================================================*/
960:     PetscCommGetNewTag(ycomm,&tag1);
961:     PetscCommGetNewTag(ycomm,&tag2);
962:     PetscMalloc2(rlentotal,&rxindices,rlentotal,&ryindices);
963:     PetscMPIIntCast((nsend+nrecv)*2,&nreq);
964:     PetscMalloc1(nreq,&reqs);
965:     sreqs = reqs;
966:     rreqs = reqs + nsend*2;

968:     for (i=disp=0; i<nrecv; i++) {
969:       count = rlens[i];
970:       MPIU_Irecv(rxindices+disp,count,MPIU_INT,recvfrom[i],tag1,ycomm,rreqs+i);
971:       MPIU_Irecv(ryindices+disp,count,MPIU_INT,recvfrom[i],tag2,ycomm,rreqs+nrecv+i);
972:       disp += rlens[i];
973:     }

975:     for (i=0; i<nsend; i++) {
976:       count = sstart[i+1]-sstart[i];
977:       MPIU_Isend(xindices_sorted+sstart[i],count,MPIU_INT,sendto[i],tag1,ycomm,sreqs+i);
978:       MPIU_Isend(yindices_sorted+sstart[i],count,MPIU_INT,sendto[i],tag2,ycomm,sreqs+nsend+i);
979:     }
980:     MPI_Waitall(nreq,reqs,MPI_STATUS_IGNORE);

982:     /* Transform VecScatter into SF */
983:     nleaves = rlentotal;
984:     PetscMalloc1(nleaves,&ilocal);
985:     PetscMalloc1(nleaves,&iremote);
986:     MPI_Comm_rank(ycomm,&yrank);
987:     for (i=disp=0; i<nrecv; i++) {
988:       for (j=0; j<rlens[i]; j++) {
989:         k               = disp + j; /* k-th index pair */
990:         ilocal[k]       = ryindices[k] - yrange[yrank]; /* Convert y's global index to local index */
991:         PetscLayoutFindOwnerIndex(xlayout,rxindices[k],&rank,&iremote[k].index); /* Convert x's global index to (rank, index) */
992:         iremote[k].rank = rank;
993:       }
994:       disp += rlens[i];
995:     }

997:     PetscFree2(sstart,sendto);
998:     PetscFree(rlens);
999:     PetscFree(recvfrom);
1000:     PetscFree(reqs);
1001:     PetscFree2(rxindices,ryindices);
1002:     PetscFree2(xindices_sorted,yindices_sorted);
1003: #endif
1004:   } else {
1005:     /* PtoS or StoS */
1006:     ISGetLocalSize(iyy,&nleaves);
1007:     PetscMalloc1(nleaves,&ilocal);
1008:     PetscMalloc1(nleaves,&iremote);
1009:     PetscArraycpy(ilocal,yindices,nleaves);
1010:     for (i=0; i<nleaves; i++) {
1011:       PetscLayoutFindOwnerIndex(xlayout,xindices[i],&rank,&iremote[i].index);
1012:       iremote[i].rank = rank;
1013:     }
1014:   }

1016:   /* MUST build SF on xx's comm, which is not necessarily identical to yy's comm.
1017:      In SF's view, xx contains the roots (i.e., the remote) and iremote[].rank are ranks in xx's comm.
1018:      yy contains leaves, which are local and can be thought as part of PETSC_COMM_SELF. */
1019:   PetscSFCreate(PetscObjectComm((PetscObject)xx),&sf);
1020:   sf->allow_multi_leaves = PETSC_TRUE;
1021:   PetscSFSetFromOptions(sf);
1022:   VecGetLocalSize(xx,&nroots);
1023:   PetscSFSetGraph(sf,nroots,nleaves,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER); /* Give ilocal/iremote to petsc and no need to free them here */

1025:   /* Free memory no longer needed */
1026:   ISRestoreIndices(ixx,&xindices);
1027:   ISRestoreIndices(iyy,&yindices);
1028:   if (can_do_block_opt) {
1029:     VecDestroy(&xx);
1030:     VecDestroy(&yy);
1031:     ISDestroy(&ixx);
1032:     ISDestroy(&iyy);
1033:   } else if (xcommsize == 1) {
1034:     VecDestroy(&xx);
1035:   }

1037: functionend:
1038:   sf->vscat.bs = bs;
1039:   if (sf->vscat.bs > 1) {
1040:     MPI_Type_contiguous(sf->vscat.bs,MPIU_SCALAR,&sf->vscat.unit);
1041:     MPI_Type_commit(&sf->vscat.unit);
1042:   } else {
1043:     sf->vscat.unit = MPIU_SCALAR;
1044:   }
1045:   VecGetLocalSize(x,&sf->vscat.from_n);
1046:   VecGetLocalSize(y,&sf->vscat.to_n);
1047:   if (!ix_old) ISDestroy(&ix); /* We created helper ix, iy. Free them */
1048:   if (!iy_old) ISDestroy(&iy);

1050:   /* Set default */
1051:   VecScatterSetFromOptions(sf);

1053:   *newsf = sf;
1054:   return 0;
1055: }

1057: /*@C
1058:       VecScatterCreateToAll - Creates a vector and a scatter context that copies all
1059:           vector values to each processor

1061:   Collective on Vec

1063:   Input Parameter:
1064: .  vin  - input MPIVEC

1066:   Output Parameters:
1067: +  ctx - scatter context
1068: -  vout - output SEQVEC that is large enough to scatter into

1070:   Level: intermediate

1072:    Note: vout may be NULL [PETSC_NULL_VEC from fortran] if you do not
1073:    need to have it created

1075:    Usage:
1076: $        VecScatterCreateToAll(vin,&ctx,&vout);
1077: $
1078: $        // scatter as many times as you need
1079: $        VecScatterBegin(ctx,vin,vout,INSERT_VALUES,SCATTER_FORWARD);
1080: $        VecScatterEnd(ctx,vin,vout,INSERT_VALUES,SCATTER_FORWARD);
1081: $
1082: $        // destroy scatter context and local vector when no longer needed
1083: $        VecScatterDestroy(&ctx);
1084: $        VecDestroy(&vout);

1086:     Do NOT create a vector and then pass it in as the final argument vout! vout is created by this routine
1087:   automatically (unless you pass NULL in for that argument if you do not need it).

1089: .seealso VecScatterCreate(), VecScatterCreateToZero(), VecScatterBegin(), VecScatterEnd()

1091: @*/
1092: PetscErrorCode  VecScatterCreateToAll(Vec vin,VecScatter *ctx,Vec *vout)
1093: {
1094:   PetscInt       N;
1095:   IS             is;
1096:   Vec            tmp;
1097:   Vec            *tmpv;
1098:   PetscBool      tmpvout = PETSC_FALSE;
1099:   VecType        roottype;

1104:   if (vout) {
1106:     tmpv = vout;
1107:   } else {
1108:     tmpvout = PETSC_TRUE;
1109:     tmpv    = &tmp;
1110:   }

1112:   /* Create seq vec on each proc, with the same size of the original vec */
1113:   VecGetSize(vin,&N);
1114:   VecGetRootType_Private(vin,&roottype);
1115:   VecCreate(PETSC_COMM_SELF,tmpv);
1116:   VecSetSizes(*tmpv,N,PETSC_DECIDE);
1117:   VecSetType(*tmpv,roottype);
1118:   /* Create the VecScatter ctx with the communication info */
1119:   ISCreateStride(PETSC_COMM_SELF,N,0,1,&is);
1120:   VecScatterCreate(vin,is,*tmpv,is,ctx);
1121:   ISDestroy(&is);
1122:   if (tmpvout) VecDestroy(tmpv);
1123:   return 0;
1124: }

1126: /*@C
1127:       VecScatterCreateToZero - Creates an output vector and a scatter context used to
1128:               copy all vector values into the output vector on the zeroth processor

1130:   Collective on Vec

1132:   Input Parameter:
1133: .  vin  - input MPIVEC

1135:   Output Parameters:
1136: +  ctx - scatter context
1137: -  vout - output SEQVEC that is large enough to scatter into on processor 0 and
1138:           of length zero on all other processors

1140:   Level: intermediate

1142:    Note: vout may be NULL [PETSC_NULL_VEC from fortran] if you do not
1143:    need to have it created

1145:    Usage:
1146: $        VecScatterCreateToZero(vin,&ctx,&vout);
1147: $
1148: $        // scatter as many times as you need
1149: $        VecScatterBegin(ctx,vin,vout,INSERT_VALUES,SCATTER_FORWARD);
1150: $        VecScatterEnd(ctx,vin,vout,INSERT_VALUES,SCATTER_FORWARD);
1151: $
1152: $        // destroy scatter context and local vector when no longer needed
1153: $        VecScatterDestroy(&ctx);
1154: $        VecDestroy(&vout);

1156: .seealso VecScatterCreate(), VecScatterCreateToAll(), VecScatterBegin(), VecScatterEnd()

1158:     Do NOT create a vector and then pass it in as the final argument vout! vout is created by this routine
1159:   automatically (unless you pass NULL in for that argument if you do not need it).

1161: @*/
1162: PetscErrorCode  VecScatterCreateToZero(Vec vin,VecScatter *ctx,Vec *vout)
1163: {

1165:   PetscInt       N;
1166:   PetscMPIInt    rank;
1167:   IS             is;
1168:   Vec            tmp;
1169:   Vec            *tmpv;
1170:   PetscBool      tmpvout = PETSC_FALSE;
1171:   VecType        roottype;

1176:   if (vout) {
1178:     tmpv = vout;
1179:   } else {
1180:     tmpvout = PETSC_TRUE;
1181:     tmpv    = &tmp;
1182:   }

1184:   /* Create vec on each proc, with the same size of the original vec all on process 0 */
1185:   VecGetSize(vin,&N);
1186:   MPI_Comm_rank(PetscObjectComm((PetscObject)vin),&rank);
1187:   if (rank) N = 0;
1188:   VecGetRootType_Private(vin,&roottype);
1189:   VecCreate(PETSC_COMM_SELF,tmpv);
1190:   VecSetSizes(*tmpv,N,PETSC_DECIDE);
1191:   VecSetType(*tmpv,roottype);
1192:   /* Create the VecScatter ctx with the communication info */
1193:   ISCreateStride(PETSC_COMM_SELF,N,0,1,&is);
1194:   VecScatterCreate(vin,is,*tmpv,is,ctx);
1195:   ISDestroy(&is);
1196:   if (tmpvout) VecDestroy(tmpv);
1197:   return 0;
1198: }

1200: /*@
1201:    VecScatterBegin - Begins a generalized scatter from one vector to
1202:    another. Complete the scattering phase with VecScatterEnd().

1204:    Neighbor-wise Collective on VecScatter

1206:    Input Parameters:
1207: +  sf - scatter context generated by VecScatterCreate()
1208: .  x - the vector from which we scatter
1209: .  y - the vector to which we scatter
1210: .  addv - either ADD_VALUES, MAX_VALUES, MIN_VALUES or INSERT_VALUES, with INSERT_VALUES mode any location
1211:           not scattered to retains its old value; i.e. the vector is NOT first zeroed.
1212: -  mode - the scattering mode, usually SCATTER_FORWARD.  The available modes are:
1213:     SCATTER_FORWARD or SCATTER_REVERSE

1215:    Level: intermediate

1217:    Options Database: See VecScatterCreate()

1219:    Notes:
1220:    The vectors x and y need not be the same vectors used in the call
1221:    to VecScatterCreate(), but x must have the same parallel data layout
1222:    as that passed in as the x to VecScatterCreate(), similarly for the y.
1223:    Most likely they have been obtained from VecDuplicate().

1225:    You cannot change the values in the input vector between the calls to VecScatterBegin()
1226:    and VecScatterEnd().

1228:    If you use SCATTER_REVERSE the two arguments x and y should be reversed, from
1229:    the SCATTER_FORWARD.

1231:    y[iy[i]] = x[ix[i]], for i=0,...,ni-1

1233:    This scatter is far more general than the conventional
1234:    scatter, since it can be a gather or a scatter or a combination,
1235:    depending on the indices ix and iy.  If x is a parallel vector and y
1236:    is sequential, VecScatterBegin() can serve to gather values to a
1237:    single processor.  Similarly, if y is parallel and x sequential, the
1238:    routine can scatter from one processor to many processors.

1240: .seealso: VecScatterCreate(), VecScatterEnd()
1241: @*/
1242: PetscErrorCode  VecScatterBegin(VecScatter sf,Vec x,Vec y,InsertMode addv,ScatterMode mode)
1243: {
1244:   PetscInt       to_n,from_n;

1249:   if (PetscDefined(USE_DEBUG)) {
1250:     /*
1251:      Error checking to make sure these vectors match the vectors used
1252:      to create the vector scatter context. -1 in the from_n and to_n indicate the
1253:      vector lengths are unknown (for example with mapped scatters) and thus
1254:      no error checking is performed.
1255:      */
1256:     if (sf->vscat.from_n >= 0 && sf->vscat.to_n >= 0) {
1257:       VecGetLocalSize(x,&from_n);
1258:       VecGetLocalSize(y,&to_n);
1259:       if (mode & SCATTER_REVERSE) {
1262:       } else {
1265:       }
1266:     }
1267:   }

1269:   sf->vscat.logging = PETSC_TRUE;
1270:   PetscLogEventBegin(VEC_ScatterBegin,sf,x,y,0);
1271:   VecScatterBegin_Internal(sf,x,y,addv,mode);
1272:   if (sf->vscat.beginandendtogether) {
1273:     VecScatterEnd_Internal(sf,x,y,addv,mode);
1274:   }
1275:   PetscLogEventEnd(VEC_ScatterBegin,sf,x,y,0);
1276:   sf->vscat.logging = PETSC_FALSE;
1277:   return 0;
1278: }

1280: /*@
1281:    VecScatterEnd - Ends a generalized scatter from one vector to another.  Call
1282:    after first calling VecScatterBegin().

1284:    Neighbor-wise Collective on VecScatter

1286:    Input Parameters:
1287: +  sf - scatter context generated by VecScatterCreate()
1288: .  x - the vector from which we scatter
1289: .  y - the vector to which we scatter
1290: .  addv - one of ADD_VALUES, MAX_VALUES, MIN_VALUES or INSERT_VALUES
1291: -  mode - the scattering mode, usually SCATTER_FORWARD.  The available modes are:
1292:      SCATTER_FORWARD, SCATTER_REVERSE

1294:    Level: intermediate

1296:    Notes:
1297:    If you use SCATTER_REVERSE the arguments x and y should be reversed, from the SCATTER_FORWARD.

1299:    y[iy[i]] = x[ix[i]], for i=0,...,ni-1

1301: .seealso: VecScatterBegin(), VecScatterCreate()
1302: @*/
1303: PetscErrorCode  VecScatterEnd(VecScatter sf,Vec x,Vec y,InsertMode addv,ScatterMode mode)
1304: {
1308:   if (!sf->vscat.beginandendtogether) {
1309:     sf->vscat.logging = PETSC_TRUE;
1310:     PetscLogEventBegin(VEC_ScatterEnd,sf,x,y,0);
1311:     VecScatterEnd_Internal(sf,x,y,addv,mode);
1312:     PetscLogEventEnd(VEC_ScatterEnd,sf,x,y,0);
1313:     sf->vscat.logging = PETSC_FALSE;
1314:   }
1315:   return 0;
1316: }