Actual source code: vscat.c
1: #include "petscsf.h"
2: #include "petscsystypes.h"
3: #include "petscvec.h"
4: #include <petsc/private/sfimpl.h>
5: #include <../src/vec/is/sf/impls/basic/sfbasic.h>
6: #include <../src/vec/is/sf/impls/basic/sfpack.h>
7: #include <petsc/private/vecimpl.h>
9: typedef enum {IS_INVALID, IS_GENERAL, IS_BLOCK, IS_STRIDE} ISTypeID;
11: PETSC_STATIC_INLINE PetscErrorCode ISGetTypeID_Private(IS is,ISTypeID *id)
12: {
14: PetscBool same;
17: *id = IS_INVALID;
18: PetscObjectTypeCompare((PetscObject)is,ISGENERAL,&same);
19: if (same) {*id = IS_GENERAL; goto functionend;}
20: PetscObjectTypeCompare((PetscObject)is,ISBLOCK,&same);
21: if (same) {*id = IS_BLOCK; goto functionend;}
22: PetscObjectTypeCompare((PetscObject)is,ISSTRIDE,&same);
23: if (same) {*id = IS_STRIDE; goto functionend;}
24: functionend:
25: return(0);
26: }
28: static PetscErrorCode VecScatterBegin_Internal(VecScatter sf,Vec x,Vec y,InsertMode addv,ScatterMode mode)
29: {
31: PetscSF wsf=NULL; /* either sf or its local part */
32: MPI_Op mop=MPI_OP_NULL;
33: PetscMPIInt size;
34: PetscMemType xmtype=PETSC_MEMTYPE_HOST,ymtype=PETSC_MEMTYPE_HOST;
37: if (x != y) {VecLockReadPush(x);}
38: if (sf->use_gpu_aware_mpi || sf->vscat.packongpu) {
39: VecGetArrayReadAndMemType(x,&sf->vscat.xdata,&xmtype);
40: } else {
41: VecGetArrayRead(x,&sf->vscat.xdata);
42: }
44: if (x != y) {
45: if (sf->use_gpu_aware_mpi || sf->vscat.packongpu) {VecGetArrayAndMemType(y,&sf->vscat.ydata,&ymtype);}
46: else {VecGetArray(y,&sf->vscat.ydata);}
47: } else {
48: sf->vscat.ydata = (PetscScalar *)sf->vscat.xdata;
49: ymtype = xmtype;
50: }
51: VecLockWriteSet_Private(y,PETSC_TRUE);
53: /* SCATTER_LOCAL indicates ignoring inter-process communication */
54: MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);
55: if ((mode & SCATTER_LOCAL) && size > 1) { /* Lazy creation of sf->vscat.lsf since SCATTER_LOCAL is uncommon */
56: if (!sf->vscat.lsf) {PetscSFCreateLocalSF_Private(sf,&sf->vscat.lsf);}
57: wsf = sf->vscat.lsf;
58: } else {
59: wsf = sf;
60: }
62: /* Note xdata/ydata is always recorded on sf (not lsf) above */
63: if (addv == INSERT_VALUES) mop = MPI_REPLACE;
64: else if (addv == ADD_VALUES) mop = MPIU_SUM; /* Petsc defines its own MPI datatype and SUM operation for __float128 etc. */
65: else if (addv == MAX_VALUES) mop = MPIU_MAX;
66: else if (addv == MIN_VALUES) mop = MPIU_MIN;
67: else SETERRQ1(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"Unsupported InsertMode %D in VecScatterBegin/End",addv);
69: if (mode & SCATTER_REVERSE) { /* REVERSE indicates leaves to root scatter. Note that x and y are swapped in input */
70: PetscSFReduceWithMemTypeBegin(wsf,sf->vscat.unit,xmtype,sf->vscat.xdata,ymtype,sf->vscat.ydata,mop);
71: } else { /* FORWARD indicates x to y scatter, where x is root and y is leaf */
72: PetscSFBcastWithMemTypeBegin(wsf,sf->vscat.unit,xmtype,sf->vscat.xdata,ymtype,sf->vscat.ydata,mop);
73: }
74: return(0);
75: }
77: static PetscErrorCode VecScatterEnd_Internal(VecScatter sf,Vec x,Vec y,InsertMode addv,ScatterMode mode)
78: {
80: PetscSF wsf=NULL;
81: MPI_Op mop=MPI_OP_NULL;
82: PetscMPIInt size;
85: /* SCATTER_LOCAL indicates ignoring inter-process communication */
86: MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);
87: wsf = ((mode & SCATTER_LOCAL) && size > 1) ? sf->vscat.lsf : sf;
89: if (addv == INSERT_VALUES) mop = MPI_REPLACE;
90: else if (addv == ADD_VALUES) mop = MPIU_SUM;
91: else if (addv == MAX_VALUES) mop = MPIU_MAX;
92: else if (addv == MIN_VALUES) mop = MPIU_MIN;
93: else SETERRQ1(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"Unsupported InsertMode %D in VecScatterBegin/End",addv);
95: if (mode & SCATTER_REVERSE) { /* reverse scatter sends leaves to roots. Note that x and y are swapped in input */
96: PetscSFReduceEnd(wsf,sf->vscat.unit,sf->vscat.xdata,sf->vscat.ydata,mop);
97: } else { /* forward scatter sends roots to leaves, i.e., x to y */
98: PetscSFBcastEnd(wsf,sf->vscat.unit,sf->vscat.xdata,sf->vscat.ydata,mop);
99: }
101: if (x != y) {
102: if (sf->use_gpu_aware_mpi || sf->vscat.packongpu) {VecRestoreArrayReadAndMemType(x,&sf->vscat.xdata);}
103: else {VecRestoreArrayRead(x,&sf->vscat.xdata);}
104: VecLockReadPop(x);
105: }
107: if (sf->use_gpu_aware_mpi || sf->vscat.packongpu) {VecRestoreArrayAndMemType(y,&sf->vscat.ydata);}
108: else {VecRestoreArray(y,&sf->vscat.ydata);}
109: VecLockWriteSet_Private(y,PETSC_FALSE);
111: return(0);
112: }
114: /* VecScatterRemap provides a light way to slightly modify a VecScatter. Suppose the input sf scatters
115: x[i] to y[j], tomap gives a plan to change vscat to scatter x[tomap[i]] to y[j]. Note that in SF,
116: x is roots. That means we need to change incoming stuffs such as bas->irootloc[].
117: */
118: static PetscErrorCode VecScatterRemap_Internal(VecScatter sf,const PetscInt *tomap,const PetscInt *frommap)
119: {
120: PetscInt i,bs = sf->vscat.bs;
121: PetscMPIInt size;
122: PetscBool ident = PETSC_TRUE,isbasic,isneighbor;
123: PetscSFType type;
124: PetscSF_Basic *bas = NULL;
128: /* check if it is an identity map. If it is, do nothing */
129: if (tomap) {
130: for (i=0; i<sf->nroots*bs; i++) {if (i != tomap[i]) {ident = PETSC_FALSE; break; } }
131: if (ident) return(0);
132: }
133: if (frommap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unable to remap the FROM in scatters yet");
134: if (!tomap) return(0);
136: MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);
138: /* Since the indices changed, we must also update the local SF. But we do not do it since
139: lsf is rarely used. We just destroy lsf and rebuild it on demand from updated sf.
140: */
141: if (sf->vscat.lsf) {PetscSFDestroy(&sf->vscat.lsf);}
143: PetscSFGetType(sf,&type);
144: PetscObjectTypeCompare((PetscObject)sf,PETSCSFBASIC,&isbasic);
145: PetscObjectTypeCompare((PetscObject)sf,PETSCSFNEIGHBOR,&isneighbor);
146: if (!isbasic && !isneighbor) SETERRQ1(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"VecScatterRemap on SF type %s is not supported",type);
148: PetscSFSetUp(sf); /* to bulid sf->irootloc if SetUp is not yet called */
150: /* Root indices are going to be remapped. This is tricky for SF. Root indices are used in sf->rremote,
151: sf->remote and bas->irootloc. The latter one is cheap to remap, but the former two are not.
152: To remap them, we have to do a bcast from roots to leaves, to let leaves know their updated roots.
153: Since VecScatterRemap is supposed to be a cheap routine to adapt a vecscatter by only changing where
154: x[] data is taken, we do not remap sf->rremote, sf->remote. The consequence is that operations
155: accessing them (such as PetscSFCompose) may get stale info. Considering VecScatter does not need
156: that complicated SF operations, we do not remap sf->rremote, sf->remote, instead we destroy them
157: so that code accessing them (if any) will crash (instead of get silent errors). Note that BcastAndOp/Reduce,
158: which are used by VecScatter and only rely on bas->irootloc, are updated and correct.
159: */
160: sf->remote = NULL;
161: PetscFree(sf->remote_alloc);
162: /* Not easy to free sf->rremote since it was allocated with PetscMalloc4(), so just give it crazy values */
163: for (i=0; i<sf->roffset[sf->nranks]; i++) sf->rremote[i] = PETSC_MIN_INT;
165: /* Indices in tomap[] are for each indivisual vector entry. But indices in sf are for each
166: block in the vector. So before the remapping, we have to expand indices in sf by bs, and
167: after the remapping, we have to shrink them back.
168: */
169: bas = (PetscSF_Basic*)sf->data;
170: for (i=0; i<bas->ioffset[bas->niranks]; i++) bas->irootloc[i] = tomap[bas->irootloc[i]*bs]/bs;
171: #if defined(PETSC_HAVE_DEVICE)
172: /* Free the irootloc copy on device. We allocate a new copy and get the updated value on demand. See PetscSFLinkGetRootPackOptAndIndices() */
173: for (i=0; i<2; i++) {PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,bas->irootloc_d[i]);}
174: #endif
175: /* Destroy and then rebuild root packing optimizations since indices are changed */
176: PetscSFResetPackFields(sf);
177: PetscSFSetUpPackFields(sf);
178: return(0);
179: }
181: /* Given a parallel VecScatter context, return number of procs and vector entries involved in remote (i.e., off-process) communication
183: Input Parameters:
184: + sf - the context (must be a parallel vecscatter)
185: - send - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)
187: Output parameters:
188: + num_procs - number of remote processors
189: - num_entries - number of vector entries to send or recv
191: .seealso: VecScatterGetRemote_Private(), VecScatterGetRemoteOrdered_Private()
193: Notes:
194: Sometimes PETSc internally needs to use the matrix-vector-multiply vecscatter context for other purposes. The client code
195: usually only uses MPI_Send/Recv. This group of subroutines provides info needed for such uses.
196: */
197: PetscErrorCode VecScatterGetRemoteCount_Private(VecScatter sf,PetscBool send,PetscInt *num_procs,PetscInt *num_entries)
198: {
199: PetscErrorCode ierr;
200: PetscInt nranks,remote_start;
201: PetscMPIInt rank;
202: const PetscInt *offset;
203: const PetscMPIInt *ranks;
206: PetscSFSetUp(sf);
207: MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
209: /* This routine is mainly used for MatMult's Mvctx. In Mvctx, we scatter an MPI vector x to a sequential vector lvec.
210: Remember x is roots and lvec is leaves. 'send' means roots to leaves communication. If 'send' is true, we need to
211: get info about which ranks this processor needs to send to. In other words, we need to call PetscSFGetLeafRanks().
212: If send is false, we do the opposite, calling PetscSFGetRootRanks().
213: */
214: if (send) {PetscSFGetLeafRanks(sf,&nranks,&ranks,&offset,NULL);}
215: else {PetscSFGetRootRanks(sf,&nranks,&ranks,&offset,NULL,NULL);}
216: if (nranks) {
217: remote_start = (rank == ranks[0])? 1 : 0;
218: if (num_procs) *num_procs = nranks - remote_start;
219: if (num_entries) *num_entries = offset[nranks] - offset[remote_start];
220: } else {
221: if (num_procs) *num_procs = 0;
222: if (num_entries) *num_entries = 0;
223: }
224: return(0);
225: }
227: /* Given a parallel VecScatter context, return a plan that represents the remote communication.
228: Any output parameter can be NULL.
230: Input Parameters:
231: + sf - the context
232: - send - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)
234: Output parameters:
235: + n - number of remote processors
236: . starts - starting point in indices for each proc. ATTENTION: starts[0] is not necessarily zero.
237: Therefore, expressions like starts[i+1]-starts[i] and indices[starts[i]+j] work as
238: expected for a CSR structure but buf[starts[i]+j] may be out of range if buf was allocated
239: with length starts[n]-starts[0]. One should use buf[starts[i]-starts[0]+j] instead.
240: . indices - indices of entries to send/recv
241: . procs - ranks of remote processors
242: - bs - block size
244: .seealso: VecScatterRestoreRemote_Private(), VecScatterGetRemoteOrdered_Private()
245: */
246: PetscErrorCode VecScatterGetRemote_Private(VecScatter sf,PetscBool send,PetscInt *n,const PetscInt **starts,const PetscInt **indices,const PetscMPIInt **procs,PetscInt *bs)
247: {
248: PetscErrorCode ierr;
249: PetscInt nranks,remote_start;
250: PetscMPIInt rank;
251: const PetscInt *offset,*location;
252: const PetscMPIInt *ranks;
255: PetscSFSetUp(sf);
256: MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
258: if (send) {PetscSFGetLeafRanks(sf,&nranks,&ranks,&offset,&location);}
259: else {PetscSFGetRootRanks(sf,&nranks,&ranks,&offset,&location,NULL);}
261: if (nranks) {
262: remote_start = (rank == ranks[0])? 1 : 0;
263: if (n) *n = nranks - remote_start;
264: if (starts) *starts = &offset[remote_start];
265: if (indices) *indices = location; /* not &location[offset[remote_start]]. Starts[0] may point to the middle of indices[] */
266: if (procs) *procs = &ranks[remote_start];
267: } else {
268: if (n) *n = 0;
269: if (starts) *starts = NULL;
270: if (indices) *indices = NULL;
271: if (procs) *procs = NULL;
272: }
274: if (bs) *bs = 1;
275: return(0);
276: }
278: /* Given a parallel VecScatter context, return a plan that represents the remote communication. Ranks of remote
279: processors returned in procs must be sorted in ascending order. Any output parameter can be NULL.
281: Input Parameters:
282: + sf - the context
283: - send - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)
285: Output parameters:
286: + n - number of remote processors
287: . starts - starting point in indices for each proc. ATTENTION: starts[0] is not necessarily zero.
288: Therefore, expressions like starts[i+1]-starts[i] and indices[starts[i]+j] work as
289: expected for a CSR structure but buf[starts[i]+j] may be out of range if buf was allocated
290: with length starts[n]-starts[0]. One should use buf[starts[i]-starts[0]+j] instead.
291: . indices - indices of entries to send/recv
292: . procs - ranks of remote processors
293: - bs - block size
295: .seealso: VecScatterRestoreRemoteOrdered_Private(), VecScatterGetRemote_Private()
297: Notes:
298: Output parameters like starts, indices must also be adapted according to the sorted ranks.
299: */
300: PetscErrorCode VecScatterGetRemoteOrdered_Private(VecScatter sf,PetscBool send,PetscInt *n,const PetscInt **starts,const PetscInt **indices,const PetscMPIInt **procs,PetscInt *bs)
301: {
305: VecScatterGetRemote_Private(sf,send,n,starts,indices,procs,bs);
306: if (PetscUnlikelyDebug(n && procs)) {
307: PetscInt i;
308: /* from back to front to also handle cases *n=0 */
309: for (i=*n-1; i>0; i--) { if ((*procs)[i-1] > (*procs)[i]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"procs[] are not ordered"); }
310: }
311: return(0);
312: }
314: /* Given a parallel VecScatter context, restore the plan returned by VecScatterGetRemote_Private. This gives a chance for
315: an implementation to free memory allocated in the VecScatterGetRemote_Private call.
317: Input Parameters:
318: + sf - the context
319: - send - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)
321: Output parameters:
322: + n - number of remote processors
323: . starts - starting point in indices for each proc
324: . indices - indices of entries to send/recv
325: . procs - ranks of remote processors
326: - bs - block size
328: .seealso: VecScatterGetRemote_Private()
329: */
330: PetscErrorCode VecScatterRestoreRemote_Private(VecScatter sf,PetscBool send,PetscInt *n,const PetscInt **starts,const PetscInt **indices,const PetscMPIInt **procs,PetscInt *bs)
331: {
333: if (starts) *starts = NULL;
334: if (indices) *indices = NULL;
335: if (procs) *procs = NULL;
336: return(0);
337: }
339: /* Given a parallel VecScatter context, restore the plan returned by VecScatterGetRemoteOrdered_Private. This gives a chance for
340: an implementation to free memory allocated in the VecScatterGetRemoteOrdered_Private call.
342: Input Parameters:
343: + sf - the context
344: - send - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)
346: Output parameters:
347: + n - number of remote processors
348: . starts - starting point in indices for each proc
349: . indices - indices of entries to send/recv
350: . procs - ranks of remote processors
351: - bs - block size
353: .seealso: VecScatterGetRemoteOrdered_Private()
354: */
355: PetscErrorCode VecScatterRestoreRemoteOrdered_Private(VecScatter sf,PetscBool send,PetscInt *n,const PetscInt **starts,const PetscInt **indices,const PetscMPIInt **procs,PetscInt *bs)
356: {
359: VecScatterRestoreRemote_Private(sf,send,n,starts,indices,procs,bs);
360: return(0);
361: }
363: /*@
364: VecScatterSetUp - Sets up the VecScatter to be able to actually scatter information between vectors
366: Collective on VecScatter
368: Input Parameter:
369: . sf - the scatter context
371: Level: intermediate
373: .seealso: VecScatterCreate(), VecScatterCopy()
374: @*/
375: PetscErrorCode VecScatterSetUp(VecScatter sf)
376: {
379: PetscSFSetUp(sf);
380: return(0);
381: }
383: /*@C
384: VecScatterSetType - Builds a vector scatter, for a particular vector scatter implementation.
386: Collective on VecScatter
388: Input Parameters:
389: + sf - The VecScatter (SF) object
390: - type - The name of the vector scatter type
392: Options Database Key:
393: . -sf_type <type> - Sets the VecScatter (SF) type
395: Notes:
396: Use VecScatterDuplicate() to form additional vectors scatter of the same type as an existing vector scatter.
398: Level: intermediate
400: .seealso: VecScatterGetType(), VecScatterCreate()
401: @*/
402: PetscErrorCode VecScatterSetType(VecScatter sf, VecScatterType type)
403: {
407: PetscSFSetType(sf,type);
408: return(0);
409: }
411: /*@C
412: VecScatterGetType - Gets the vector scatter type name (as a string) from the VecScatter.
414: Not Collective
416: Input Parameter:
417: . sf - The vector scatter (SF)
419: Output Parameter:
420: . type - The vector scatter type name
422: Level: intermediate
424: .seealso: VecScatterSetType(), VecScatterCreate()
425: @*/
426: PetscErrorCode VecScatterGetType(VecScatter sf, VecScatterType *type)
427: {
430: PetscSFGetType(sf,type);
431: return(0);
432: }
434: /*@C
435: VecScatterRegister - Adds a new vector scatter component implementation
437: Not Collective
439: Input Parameters:
440: + name - The name of a new user-defined creation routine
441: - create_func - The creation routine itself
443: Level: advanced
445: .seealso: VecRegister()
446: @*/
447: PetscErrorCode VecScatterRegister(const char sname[], PetscErrorCode (*function)(VecScatter))
448: {
451: PetscSFRegister(sname,function);
452: return(0);
453: }
455: /* ------------------------------------------------------------------*/
456: /*@
457: VecScatterGetMerged - Returns true if the scatter is completed in the VecScatterBegin()
458: and the VecScatterEnd() does nothing
460: Not Collective
462: Input Parameter:
463: . sf - scatter context created with VecScatterCreate()
465: Output Parameter:
466: . flg - PETSC_TRUE if the VecScatterBegin/End() are all done during the VecScatterBegin()
468: Level: developer
470: .seealso: VecScatterCreate(), VecScatterEnd(), VecScatterBegin()
471: @*/
472: PetscErrorCode VecScatterGetMerged(VecScatter sf,PetscBool *flg)
473: {
476: if (flg) *flg = sf->vscat.beginandendtogether;
477: return(0);
478: }
479: /*@C
480: VecScatterDestroy - Destroys a scatter context created by VecScatterCreate()
482: Collective on VecScatter
484: Input Parameter:
485: . sf - the scatter context
487: Level: intermediate
489: .seealso: VecScatterCreate(), VecScatterCopy()
490: @*/
491: PetscErrorCode VecScatterDestroy(VecScatter *sf)
492: {
496: PetscSFDestroy(sf);
497: return(0);
498: }
500: /*@
501: VecScatterCopy - Makes a copy of a scatter context.
503: Collective on VecScatter
505: Input Parameter:
506: . sf - the scatter context
508: Output Parameter:
509: . newsf - the context copy
511: Level: advanced
513: .seealso: VecScatterCreate(), VecScatterDestroy()
514: @*/
515: PetscErrorCode VecScatterCopy(VecScatter sf,VecScatter *newsf)
516: {
521: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_GRAPH,newsf);
522: PetscSFSetUp(*newsf);
523: return(0);
524: }
526: /*@C
527: VecScatterViewFromOptions - View from Options
529: Collective on VecScatter
531: Input Parameters:
532: + sf - the scatter context
533: . obj - Optional object
534: - name - command line option
536: Level: intermediate
537: .seealso: VecScatter, VecScatterView, PetscObjectViewFromOptions(), VecScatterCreate()
538: @*/
539: PetscErrorCode VecScatterViewFromOptions(VecScatter sf,PetscObject obj,const char name[])
540: {
545: PetscObjectViewFromOptions((PetscObject)sf,obj,name);
546: return(0);
547: }
549: /* ------------------------------------------------------------------*/
550: /*@C
551: VecScatterView - Views a vector scatter context.
553: Collective on VecScatter
555: Input Parameters:
556: + sf - the scatter context
557: - viewer - the viewer for displaying the context
559: Level: intermediate
561: @*/
562: PetscErrorCode VecScatterView(VecScatter sf,PetscViewer viewer)
563: {
567: PetscSFView(sf,viewer);
568: return(0);
569: }
571: /*@C
572: VecScatterRemap - Remaps the "from" and "to" indices in a
573: vector scatter context. FOR EXPERTS ONLY!
575: Collective on VecScatter
577: Input Parameters:
578: + sf - vector scatter context
579: . tomap - remapping plan for "to" indices (may be NULL).
580: - frommap - remapping plan for "from" indices (may be NULL)
582: Level: developer
584: Notes:
585: In the parallel case the todata contains indices from where the data is taken
586: (and then sent to others)! The fromdata contains indices from where the received
587: data is finally put locally.
589: In the sequential case the todata contains indices from where the data is put
590: and the fromdata contains indices from where the data is taken from.
591: This is backwards from the paralllel case!
593: @*/
594: PetscErrorCode VecScatterRemap(VecScatter sf,PetscInt tomap[],PetscInt frommap[])
595: {
596: PetscInt ierr;
601: VecScatterRemap_Internal(sf,tomap,frommap);
602: if (frommap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unable to remap the FROM in scatters yet");
603: /* Mark then vector lengths as unknown because we do not know the lengths of the remapped vectors */
604: sf->vscat.from_n = -1;
605: sf->vscat.to_n = -1;
606: return(0);
607: }
609: /*@
610: VecScatterSetFromOptions - Configures the vector scatter from the options database.
612: Collective on VecScatter
614: Input Parameter:
615: . sf - The vector scatter
617: Notes:
618: To see all options, run your program with the -help option, or consult the users manual.
619: Must be called before VecScatterSetUp() but before the vector scatter is used.
621: Level: beginner
623: .seealso: VecScatterCreate(), VecScatterDestroy(), VecScatterSetUp()
624: @*/
625: PetscErrorCode VecScatterSetFromOptions(VecScatter sf)
626: {
631: PetscObjectOptionsBegin((PetscObject)sf);
633: sf->vscat.beginandendtogether = PETSC_FALSE;
634: PetscOptionsBool("-vecscatter_merge","Use combined (merged) vector scatter begin and end","VecScatterCreate",sf->vscat.beginandendtogether,&sf->vscat.beginandendtogether,NULL);
635: if (sf->vscat.beginandendtogether) {PetscInfo(sf,"Using combined (merged) vector scatter begin and end\n");}
637: sf->vscat.packongpu = PETSC_TRUE;
638: PetscOptionsBool("-vecscatter_packongpu","For GPU vectors, pack needed entries on GPU, then copy packed data to CPU, then do MPI","VecScatterCreate",sf->vscat.packongpu,&sf->vscat.packongpu,NULL);
639: if (sf->vscat.packongpu) {PetscInfo(sf,"For GPU vectors, pack needed entries on GPU, then copy packed data to CPU, then do MPI\n");}
640: PetscOptionsEnd();
641: return(0);
642: }
644: /* ---------------------------------------------------------------- */
645: /*@
646: VecScatterCreate - Creates a vector scatter context.
648: Collective on Vec
650: Input Parameters:
651: + xin - a vector that defines the shape (parallel data layout of the vector)
652: of vectors from which we scatter
653: . yin - a vector that defines the shape (parallel data layout of the vector)
654: of vectors to which we scatter
655: . ix - the indices of xin to scatter (if NULL scatters all values)
656: - iy - the indices of yin to hold results (if NULL fills entire vector yin)
658: Output Parameter:
659: . newsf - location to store the new scatter (SF) context
661: Options Database Keys:
662: + -vecscatter_view - Prints detail of communications
663: . -vecscatter_view ::ascii_info - Print less details about communication
664: . -vecscatter_merge - VecScatterBegin() handles all of the communication, VecScatterEnd() is a nop
665: eliminates the chance for overlap of computation and communication
666: - -vecscatter_packongpu - For GPU vectors, pack needed entries on GPU, then copy packed data to CPU, then do MPI.
667: Otherwise, we might copy a segment encompassing needed entries. Default is TRUE.
669: Level: intermediate
671: Notes:
672: If both xin and yin are parallel, their communicator must be on the same
673: set of processes, but their process order can be different.
674: In calls to VecScatter() you can use different vectors than the xin and
675: yin you used above; BUT they must have the same parallel data layout, for example,
676: they could be obtained from VecDuplicate().
677: A VecScatter context CANNOT be used in two or more simultaneous scatters;
678: that is you cannot call a second VecScatterBegin() with the same scatter
679: context until the VecScatterEnd() has been called on the first VecScatterBegin().
680: In this case a separate VecScatter is needed for each concurrent scatter.
682: Currently the MPI_Send() use PERSISTENT versions.
683: (this unfortunately requires that the same in and out arrays be used for each use, this
684: is why we always need to pack the input into the work array before sending
685: and unpack upon receiving instead of using MPI datatypes to avoid the packing/unpacking).
687: Both ix and iy cannot be NULL at the same time.
689: Use VecScatterCreateToAll() to create a vecscatter that copies an MPI vector to sequential vectors on all MPI ranks.
690: Use VecScatterCreateToZero() to create a vecscatter that copies an MPI vector to a sequential vector on MPI rank 0.
691: These special vecscatters have better performance than general ones.
693: .seealso: VecScatterDestroy(), VecScatterCreateToAll(), VecScatterCreateToZero(), PetscSFCreate()
694: @*/
695: PetscErrorCode VecScatterCreate(Vec x,IS ix,Vec y,IS iy,VecScatter *newsf)
696: {
698: MPI_Comm xcomm,ycomm,bigcomm;
699: Vec xx,yy;
700: IS ix_old=ix,iy_old=iy,ixx,iyy;
701: PetscMPIInt xcommsize,ycommsize,rank,result;
702: PetscInt i,n,N,nroots,nleaves,*ilocal,xstart,ystart,ixsize,iysize,xlen,ylen;
703: const PetscInt *xindices,*yindices;
704: PetscSFNode *iremote;
705: PetscLayout xlayout,ylayout;
706: ISTypeID ixid,iyid;
707: PetscInt bs,bsx,bsy,min,max,m[2],ixfirst,ixstep,iyfirst,iystep;
708: PetscBool can_do_block_opt=PETSC_FALSE;
709: PetscSF sf;
713: if (!ix && !iy) SETERRQ(PetscObjectComm((PetscObject)x),PETSC_ERR_SUP,"Cannot pass default in for both input and output indices");
715: /* Get comm from x and y */
716: PetscObjectGetComm((PetscObject)x,&xcomm);
717: MPI_Comm_size(xcomm,&xcommsize);
718: PetscObjectGetComm((PetscObject)y,&ycomm);
719: MPI_Comm_size(ycomm,&ycommsize);
720: if (xcommsize > 1 && ycommsize > 1) {
721: MPI_Comm_compare(xcomm,ycomm,&result);
722: if (result == MPI_UNEQUAL) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"VecScatterCreate: parallel vectors x and y must have identical/congruent/similar communicators");
723: }
724: bs = 1; /* default, no blocking */
726: /*
727: Let P and S stand for parallel and sequential vectors respectively. There are four combinations of vecscatters: PtoP, PtoS,
728: StoP and StoS. The assumption of VecScatterCreate(Vec x,IS ix,Vec y,IS iy,VecScatter *newctx) is: if x is parallel, then ix
729: contains global indices of x. If x is sequential, ix contains local indices of x. Similarily for y and iy.
731: SF builds around concepts of local leaves and remote roots. We treat source vector x as roots and destination vector y as
732: leaves. A PtoS scatter can be naturally mapped to SF. We transform PtoP and StoP to PtoS, and treat StoS as trivial PtoS.
733: */
735: /* NULL ix or iy in VecScatterCreate(x,ix,y,iy,newctx) has special meaning. Recover them for these cases */
736: if (!ix) {
737: if (xcommsize > 1 && ycommsize == 1) { /* PtoS: null ix means the whole x will be scattered to each seq y */
738: VecGetSize(x,&N);
739: ISCreateStride(PETSC_COMM_SELF,N,0,1,&ix);
740: } else { /* PtoP, StoP or StoS: null ix means the whole local part of x will be scattered */
741: VecGetLocalSize(x,&n);
742: VecGetOwnershipRange(x,&xstart,NULL);
743: ISCreateStride(PETSC_COMM_SELF,n,xstart,1,&ix);
744: }
745: }
747: if (!iy) {
748: if (xcommsize == 1 && ycommsize > 1) { /* StoP: null iy means the whole y will be scattered to from each seq x */
749: VecGetSize(y,&N);
750: ISCreateStride(PETSC_COMM_SELF,N,0,1,&iy);
751: } else { /* PtoP, StoP or StoS: null iy means the whole local part of y will be scattered to */
752: VecGetLocalSize(y,&n);
753: VecGetOwnershipRange(y,&ystart,NULL);
754: ISCreateStride(PETSC_COMM_SELF,n,ystart,1,&iy);
755: }
756: }
758: /* Do error checking immediately after we have non-empty ix, iy */
759: ISGetLocalSize(ix,&ixsize);
760: ISGetLocalSize(iy,&iysize);
761: VecGetSize(x,&xlen);
762: VecGetSize(y,&ylen);
763: if (ixsize != iysize) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Scatter sizes of ix and iy don't match locally ix=%D iy=%D",ixsize,iysize);
764: ISGetMinMax(ix,&min,&max);
765: if (min < 0 || max >= xlen) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scatter indices in ix are out of range");
766: ISGetMinMax(iy,&min,&max);
767: if (min < 0 || max >= ylen) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scatter indices in iy are out of range");
769: /* Extract info about ix, iy for further test */
770: ISGetTypeID_Private(ix,&ixid);
771: ISGetTypeID_Private(iy,&iyid);
772: if (ixid == IS_BLOCK) {ISGetBlockSize(ix,&bsx);}
773: else if (ixid == IS_STRIDE) {ISStrideGetInfo(ix,&ixfirst,&ixstep);}
775: if (iyid == IS_BLOCK) {ISGetBlockSize(iy,&bsy);}
776: else if (iyid == IS_STRIDE) {ISStrideGetInfo(iy,&iyfirst,&iystep);}
778: /* Check if a PtoS is special ToAll/ToZero scatters, which can be results of VecScatterCreateToAll/Zero.
779: ToAll means a whole MPI vector is copied to a seq vector on every process. ToZero means a whole MPI
780: vector is copied to a seq vector on rank 0 and other processes do nothing(i.e.,they input empty ix,iy).
782: We can optimize these scatters with MPI collectives. We can also avoid costly analysis used for general scatters.
783: */
784: if (xcommsize > 1 && ycommsize == 1) { /* Ranks do not diverge at this if-test */
785: PetscInt pattern[2] = {0, 0}; /* A boolean array with pattern[0] for allgather-like (ToAll) and pattern[1] for gather-like (ToZero) */
786: PetscLayout map;
788: MPI_Comm_rank(xcomm,&rank);
789: VecGetLayout(x,&map);
790: if (rank == 0) {
791: if (ixid == IS_STRIDE && iyid == IS_STRIDE && ixsize == xlen && ixfirst == 0 && ixstep == 1 && iyfirst == 0 && iystep == 1) {
792: /* Rank 0 scatters the whole mpi x to seq y, so it is either a ToAll or a ToZero candidate in its view */
793: pattern[0] = pattern[1] = 1;
794: }
795: } else {
796: if (ixid == IS_STRIDE && iyid == IS_STRIDE && ixsize == xlen && ixfirst == 0 && ixstep == 1 && iyfirst == 0 && iystep == 1) {
797: /* Other ranks also scatter the whole mpi x to seq y, so it is a ToAll candidate in their view */
798: pattern[0] = 1;
799: } else if (ixsize == 0) {
800: /* Other ranks do nothing, so it is a ToZero candiate */
801: pattern[1] = 1;
802: }
803: }
805: /* One stone (the expensive allreduce) two birds: pattern[] tells if it is ToAll or ToZero */
806: MPIU_Allreduce(MPI_IN_PLACE,pattern,2,MPIU_INT,MPI_LAND,xcomm);
808: if (pattern[0] || pattern[1]) {
809: PetscSFCreate(xcomm,&sf);
810: PetscSFSetFromOptions(sf);
811: PetscSFSetGraphWithPattern(sf,map,pattern[0] ? PETSCSF_PATTERN_ALLGATHER : PETSCSF_PATTERN_GATHER);
812: goto functionend; /* No further analysis needed. What a big win! */
813: }
814: }
816: /* Continue ...
817: Do block optimization by taking advantage of high level info available in ix, iy.
818: The block optimization is valid when all of the following conditions are met:
819: 1) ix, iy are blocked or can be blocked (i.e., strided with step=1);
820: 2) ix, iy have the same block size;
821: 3) all processors agree on one block size;
822: 4) no blocks span more than one process;
823: */
824: bigcomm = (xcommsize == 1) ? ycomm : xcomm;
826: /* Processors could go through different path in this if-else test */
827: m[0] = m[1] = PETSC_MPI_INT_MIN;
828: if (ixid == IS_BLOCK && iyid == IS_BLOCK) {
829: m[0] = PetscMax(bsx,bsy);
830: m[1] = -PetscMin(bsx,bsy);
831: } else if (ixid == IS_BLOCK && iyid == IS_STRIDE && iystep==1 && iyfirst%bsx==0) {
832: m[0] = bsx;
833: m[1] = -bsx;
834: } else if (ixid == IS_STRIDE && iyid == IS_BLOCK && ixstep==1 && ixfirst%bsy==0) {
835: m[0] = bsy;
836: m[1] = -bsy;
837: }
838: /* Get max and min of bsx,bsy over all processes in one allreduce */
839: MPIU_Allreduce(MPI_IN_PLACE,m,2,MPIU_INT,MPI_MAX,bigcomm);
840: max = m[0]; min = -m[1];
842: /* Since we used allreduce above, all ranks will have the same min and max. min==max
843: implies all ranks have the same bs. Do further test to see if local vectors are dividable
844: by bs on ALL ranks. If they are, we are ensured that no blocks span more than one processor.
845: */
846: if (min == max && min > 1) {
847: VecGetLocalSize(x,&xlen);
848: VecGetLocalSize(y,&ylen);
849: m[0] = xlen%min;
850: m[1] = ylen%min;
851: MPIU_Allreduce(MPI_IN_PLACE,m,2,MPIU_INT,MPI_LOR,bigcomm);
852: if (!m[0] && !m[1]) can_do_block_opt = PETSC_TRUE;
853: }
855: /* If can_do_block_opt, then shrink x, y, ix and iy by bs to get xx, yy, ixx and iyy, whose indices
856: and layout are actually used in building SF. Suppose blocked ix representing {0,1,2,6,7,8} has
857: indices {0,2} and bs=3, then ixx = {0,2}; suppose strided iy={3,4,5,6,7,8}, then iyy={1,2}.
859: xx is a little special. If x is seq, then xx is the concatenation of seq x's on ycomm. In this way,
860: we can treat PtoP and StoP uniformly as PtoS.
861: */
862: if (can_do_block_opt) {
863: const PetscInt *indices;
865: /* Shrink x and ix */
866: bs = min;
867: VecCreateMPIWithArray(bigcomm,1,xlen/bs,PETSC_DECIDE,NULL,&xx); /* We only care xx's layout */
868: if (ixid == IS_BLOCK) {
869: ISBlockGetIndices(ix,&indices);
870: ISBlockGetLocalSize(ix,&ixsize);
871: ISCreateGeneral(PETSC_COMM_SELF,ixsize,indices,PETSC_COPY_VALUES,&ixx);
872: ISBlockRestoreIndices(ix,&indices);
873: } else { /* ixid == IS_STRIDE */
874: ISGetLocalSize(ix,&ixsize);
875: ISCreateStride(PETSC_COMM_SELF,ixsize/bs,ixfirst/bs,1,&ixx);
876: }
878: /* Shrink y and iy */
879: VecCreateMPIWithArray(ycomm,1,ylen/bs,PETSC_DECIDE,NULL,&yy);
880: if (iyid == IS_BLOCK) {
881: ISBlockGetIndices(iy,&indices);
882: ISBlockGetLocalSize(iy,&iysize);
883: ISCreateGeneral(PETSC_COMM_SELF,iysize,indices,PETSC_COPY_VALUES,&iyy);
884: ISBlockRestoreIndices(iy,&indices);
885: } else { /* iyid == IS_STRIDE */
886: ISGetLocalSize(iy,&iysize);
887: ISCreateStride(PETSC_COMM_SELF,iysize/bs,iyfirst/bs,1,&iyy);
888: }
889: } else {
890: ixx = ix;
891: iyy = iy;
892: yy = y;
893: if (xcommsize == 1) {VecCreateMPIWithArray(bigcomm,1,xlen,PETSC_DECIDE,NULL,&xx);} else xx = x;
894: }
896: /* Now it is ready to build SF with preprocessed (xx, yy) and (ixx, iyy) */
897: ISGetIndices(ixx,&xindices);
898: ISGetIndices(iyy,&yindices);
899: VecGetLayout(xx,&xlayout);
901: if (ycommsize > 1) {
902: /* PtoP or StoP */
904: /* Below is a piece of complex code with a very simple goal: move global index pairs (xindices[i], yindices[i]),
905: to owner process of yindices[i] according to ylayout, i = 0..n.
907: I did it through a temp sf, but later I thought the old design was inefficient and also distorted log view.
908: We want to mape one VecScatterCreate() call to one PetscSFCreate() call. The old design mapped to three
909: PetscSFCreate() calls. This code is on critical path of VecScatterSetUp and is used by every VecScatterCreate.
910: So I commented it out and did another optimized implementation. The commented code is left here for reference.
911: */
912: #if 0
913: const PetscInt *degree;
914: PetscSF tmpsf;
915: PetscInt inedges=0,*leafdata,*rootdata;
917: VecGetOwnershipRange(xx,&xstart,NULL);
918: VecGetLayout(yy,&ylayout);
919: VecGetOwnershipRange(yy,&ystart,NULL);
921: VecGetLocalSize(yy,&nroots);
922: ISGetLocalSize(iyy,&nleaves);
923: PetscMalloc2(nleaves,&iremote,nleaves*2,&leafdata);
925: for (i=0; i<nleaves; i++) {
926: PetscLayoutFindOwnerIndex(ylayout,yindices[i],&iremote[i].rank,&iremote[i].index);
927: leafdata[2*i] = yindices[i];
928: leafdata[2*i+1] = (xcommsize > 1)? xindices[i] : xindices[i] + xstart;
929: }
931: PetscSFCreate(ycomm,&tmpsf);
932: PetscSFSetGraph(tmpsf,nroots,nleaves,NULL,PETSC_USE_POINTER,iremote,PETSC_USE_POINTER);
934: PetscSFComputeDegreeBegin(tmpsf,°ree);
935: PetscSFComputeDegreeEnd(tmpsf,°ree);
937: for (i=0; i<nroots; i++) inedges += degree[i];
938: PetscMalloc1(inedges*2,&rootdata);
939: PetscSFGatherBegin(tmpsf,MPIU_2INT,leafdata,rootdata);
940: PetscSFGatherEnd(tmpsf,MPIU_2INT,leafdata,rootdata);
942: PetscFree2(iremote,leafdata);
943: PetscSFDestroy(&tmpsf);
945: /* rootdata contains global index pairs (i, j). j's are owned by the current process, but i's can point to anywhere.
946: We convert j to local, and convert i to (rank, index). In the end, we get an PtoS suitable for building SF.
947: */
948: nleaves = inedges;
949: VecGetLocalSize(xx,&nroots);
950: PetscMalloc1(nleaves,&ilocal);
951: PetscMalloc1(nleaves,&iremote);
953: for (i=0; i<inedges; i++) {
954: ilocal[i] = rootdata[2*i] - ystart; /* covert y's global index to local index */
955: PetscLayoutFindOwnerIndex(xlayout,rootdata[2*i+1],&iremote[i].rank,&iremote[i].index); /* convert x's global index to (rank, index) */
956: }
957: PetscFree(rootdata);
958: #else
959: PetscInt j,k,n,disp,rlentotal,*sstart,*xindices_sorted,*yindices_sorted;
960: const PetscInt *yrange;
961: PetscMPIInt nsend,nrecv,nreq,count,yrank,*slens,*rlens,*sendto,*recvfrom,tag1,tag2;
962: PetscInt *rxindices,*ryindices;
963: MPI_Request *reqs,*sreqs,*rreqs;
965: /* Sorting makes code simpler, faster and also helps getting rid of many O(P) arrays, which hurt scalability at large scale
966: yindices_sorted - sorted yindices
967: xindices_sorted - xindices sorted along with yindces
968: */
969: ISGetLocalSize(ixx,&n); /*ixx, iyy have the same local size */
970: PetscMalloc2(n,&xindices_sorted,n,&yindices_sorted);
971: PetscArraycpy(xindices_sorted,xindices,n);
972: PetscArraycpy(yindices_sorted,yindices,n);
973: PetscSortIntWithArray(n,yindices_sorted,xindices_sorted);
974: VecGetOwnershipRange(xx,&xstart,NULL);
975: if (xcommsize == 1) {for (i=0; i<n; i++) xindices_sorted[i] += xstart;} /* Convert to global indices */
977: /*=============================================================================
978: Calculate info about messages I need to send
979: =============================================================================*/
980: /* nsend - number of non-empty messages to send
981: sendto - [nsend] ranks I will send messages to
982: sstart - [nsend+1] sstart[i] is the start index in xsindices_sorted[] I send to rank sendto[i]
983: slens - [ycommsize] I want to send slens[i] entries to rank i.
984: */
985: VecGetLayout(yy,&ylayout);
986: PetscLayoutGetRanges(ylayout,&yrange);
987: PetscCalloc1(ycommsize,&slens); /* The only O(P) array in this algorithm */
989: i = j = nsend = 0;
990: while (i < n) {
991: if (yindices_sorted[i] >= yrange[j+1]) { /* If i-th index is out of rank j's bound */
992: do {j++;} while (yindices_sorted[i] >= yrange[j+1] && j < ycommsize); /* Increase j until i-th index falls in rank j's bound */
993: if (j == ycommsize) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Index %D not owned by any process, upper bound %D",yindices_sorted[i],yrange[ycommsize]);
994: }
995: i++;
996: if (!slens[j]++) nsend++;
997: }
999: PetscMalloc2(nsend+1,&sstart,nsend,&sendto);
1001: sstart[0] = 0;
1002: for (i=j=0; i<ycommsize; i++) {
1003: if (slens[i]) {
1004: sendto[j] = (PetscMPIInt)i;
1005: sstart[j+1] = sstart[j] + slens[i];
1006: j++;
1007: }
1008: }
1010: /*=============================================================================
1011: Calculate the reverse info about messages I will recv
1012: =============================================================================*/
1013: /* nrecv - number of messages I will recv
1014: recvfrom - [nrecv] ranks I recv from
1015: rlens - [nrecv] I will recv rlens[i] entries from rank recvfrom[i]
1016: rlentotal - sum of rlens[]
1017: rxindices - [rlentotal] recv buffer for xindices_sorted
1018: ryindices - [rlentotal] recv buffer for yindices_sorted
1019: */
1020: PetscGatherNumberOfMessages(ycomm,NULL,slens,&nrecv);
1021: PetscGatherMessageLengths(ycomm,nsend,nrecv,slens,&recvfrom,&rlens);
1022: PetscFree(slens); /* Free the O(P) array ASAP */
1023: rlentotal = 0; for (i=0; i<nrecv; i++) rlentotal += rlens[i];
1025: /*=============================================================================
1026: Communicate with processors in recvfrom[] to populate rxindices and ryindices
1027: ============================================================================*/
1028: PetscCommGetNewTag(ycomm,&tag1);
1029: PetscCommGetNewTag(ycomm,&tag2);
1030: PetscMalloc2(rlentotal,&rxindices,rlentotal,&ryindices);
1031: PetscMPIIntCast((nsend+nrecv)*2,&nreq);
1032: PetscMalloc1(nreq,&reqs);
1033: sreqs = reqs;
1034: rreqs = reqs + nsend*2;
1036: for (i=disp=0; i<nrecv; i++) {
1037: count = rlens[i];
1038: MPI_Irecv(rxindices+disp,count,MPIU_INT,recvfrom[i],tag1,ycomm,rreqs+i);
1039: MPI_Irecv(ryindices+disp,count,MPIU_INT,recvfrom[i],tag2,ycomm,rreqs+nrecv+i);
1040: disp += rlens[i];
1041: }
1043: for (i=0; i<nsend; i++) {
1044: PetscMPIIntCast(sstart[i+1]-sstart[i],&count);
1045: MPI_Isend(xindices_sorted+sstart[i],count,MPIU_INT,sendto[i],tag1,ycomm,sreqs+i);
1046: MPI_Isend(yindices_sorted+sstart[i],count,MPIU_INT,sendto[i],tag2,ycomm,sreqs+nsend+i);
1047: }
1048: MPI_Waitall(nreq,reqs,MPI_STATUS_IGNORE);
1050: /* Transform VecScatter into SF */
1051: nleaves = rlentotal;
1052: PetscMalloc1(nleaves,&ilocal);
1053: PetscMalloc1(nleaves,&iremote);
1054: MPI_Comm_rank(ycomm,&yrank);
1055: for (i=disp=0; i<nrecv; i++) {
1056: for (j=0; j<rlens[i]; j++) {
1057: k = disp + j; /* k-th index pair */
1058: ilocal[k] = ryindices[k] - yrange[yrank]; /* Convert y's global index to local index */
1059: PetscLayoutFindOwnerIndex(xlayout,rxindices[k],&rank,&iremote[k].index); /* Convert x's global index to (rank, index) */
1060: iremote[k].rank = rank;
1061: }
1062: disp += rlens[i];
1063: }
1065: PetscFree2(sstart,sendto);
1066: PetscFree(slens);
1067: PetscFree(rlens);
1068: PetscFree(recvfrom);
1069: PetscFree(reqs);
1070: PetscFree2(rxindices,ryindices);
1071: PetscFree2(xindices_sorted,yindices_sorted);
1072: #endif
1073: } else {
1074: /* PtoS or StoS */
1075: ISGetLocalSize(iyy,&nleaves);
1076: PetscMalloc1(nleaves,&ilocal);
1077: PetscMalloc1(nleaves,&iremote);
1078: PetscArraycpy(ilocal,yindices,nleaves);
1079: for (i=0; i<nleaves; i++) {
1080: PetscLayoutFindOwnerIndex(xlayout,xindices[i],&rank,&iremote[i].index);
1081: iremote[i].rank = rank;
1082: }
1083: }
1085: /* MUST build SF on xx's comm, which is not necessarily identical to yy's comm.
1086: In SF's view, xx contains the roots (i.e., the remote) and iremote[].rank are ranks in xx's comm.
1087: yy contains leaves, which are local and can be thought as part of PETSC_COMM_SELF. */
1088: PetscSFCreate(PetscObjectComm((PetscObject)xx),&sf);
1089: PetscSFSetFromOptions(sf);
1090: VecGetLocalSize(xx,&nroots);
1091: PetscSFSetGraph(sf,nroots,nleaves,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER); /* Give ilocal/iremote to petsc and no need to free them here */
1093: /* Free memory no longer needed */
1094: ISRestoreIndices(ixx,&xindices);
1095: ISRestoreIndices(iyy,&yindices);
1096: if (can_do_block_opt) {
1097: VecDestroy(&xx);
1098: VecDestroy(&yy);
1099: ISDestroy(&ixx);
1100: ISDestroy(&iyy);
1101: } else if (xcommsize == 1) {
1102: VecDestroy(&xx);
1103: }
1105: functionend:
1106: sf->vscat.bs = bs;
1107: if (sf->vscat.bs > 1) {
1108: MPI_Type_contiguous(sf->vscat.bs,MPIU_SCALAR,&sf->vscat.unit);
1109: MPI_Type_commit(&sf->vscat.unit);
1110: } else {
1111: sf->vscat.unit = MPIU_SCALAR;
1112: }
1113: VecGetLocalSize(x,&sf->vscat.from_n);
1114: VecGetLocalSize(y,&sf->vscat.to_n);
1115: if (!ix_old) {ISDestroy(&ix);} /* We created helper ix, iy. Free them */
1116: if (!iy_old) {ISDestroy(&iy);}
1118: /* Set default */
1119: VecScatterSetFromOptions(sf);
1121: *newsf = sf;
1122: return(0);
1123: }
1125: /*@C
1126: VecScatterCreateToAll - Creates a vector and a scatter context that copies all
1127: vector values to each processor
1129: Collective on Vec
1131: Input Parameter:
1132: . vin - input MPIVEC
1134: Output Parameters:
1135: + ctx - scatter context
1136: - vout - output SEQVEC that is large enough to scatter into
1138: Level: intermediate
1140: Note: vout may be NULL [PETSC_NULL_VEC from fortran] if you do not
1141: need to have it created
1143: Usage:
1144: $ VecScatterCreateToAll(vin,&ctx,&vout);
1145: $
1146: $ // scatter as many times as you need
1147: $ VecScatterBegin(ctx,vin,vout,INSERT_VALUES,SCATTER_FORWARD);
1148: $ VecScatterEnd(ctx,vin,vout,INSERT_VALUES,SCATTER_FORWARD);
1149: $
1150: $ // destroy scatter context and local vector when no longer needed
1151: $ VecScatterDestroy(&ctx);
1152: $ VecDestroy(&vout);
1154: Do NOT create a vector and then pass it in as the final argument vout! vout is created by this routine
1155: automatically (unless you pass NULL in for that argument if you do not need it).
1157: .seealso VecScatterCreate(), VecScatterCreateToZero(), VecScatterBegin(), VecScatterEnd()
1159: @*/
1160: PetscErrorCode VecScatterCreateToAll(Vec vin,VecScatter *ctx,Vec *vout)
1161: {
1164: PetscInt N;
1165: IS is;
1166: Vec tmp;
1167: Vec *tmpv;
1168: PetscBool tmpvout = PETSC_FALSE;
1174: if (vout) {
1176: tmpv = vout;
1177: } else {
1178: tmpvout = PETSC_TRUE;
1179: tmpv = &tmp;
1180: }
1182: /* Create seq vec on each proc, with the same size of the original mpi vec */
1183: VecGetSize(vin,&N);
1184: VecCreateSeq(PETSC_COMM_SELF,N,tmpv);
1185: VecSetFromOptions(*tmpv);
1186: /* Create the VecScatter ctx with the communication info */
1187: ISCreateStride(PETSC_COMM_SELF,N,0,1,&is);
1188: VecScatterCreate(vin,is,*tmpv,is,ctx);
1189: ISDestroy(&is);
1190: if (tmpvout) {VecDestroy(tmpv);}
1191: return(0);
1192: }
1194: /*@C
1195: VecScatterCreateToZero - Creates an output vector and a scatter context used to
1196: copy all vector values into the output vector on the zeroth processor
1198: Collective on Vec
1200: Input Parameter:
1201: . vin - input MPIVEC
1203: Output Parameters:
1204: + ctx - scatter context
1205: - vout - output SEQVEC that is large enough to scatter into on processor 0 and
1206: of length zero on all other processors
1208: Level: intermediate
1210: Note: vout may be NULL [PETSC_NULL_VEC from fortran] if you do not
1211: need to have it created
1213: Usage:
1214: $ VecScatterCreateToZero(vin,&ctx,&vout);
1215: $
1216: $ // scatter as many times as you need
1217: $ VecScatterBegin(ctx,vin,vout,INSERT_VALUES,SCATTER_FORWARD);
1218: $ VecScatterEnd(ctx,vin,vout,INSERT_VALUES,SCATTER_FORWARD);
1219: $
1220: $ // destroy scatter context and local vector when no longer needed
1221: $ VecScatterDestroy(&ctx);
1222: $ VecDestroy(&vout);
1224: .seealso VecScatterCreate(), VecScatterCreateToAll(), VecScatterBegin(), VecScatterEnd()
1226: Do NOT create a vector and then pass it in as the final argument vout! vout is created by this routine
1227: automatically (unless you pass NULL in for that argument if you do not need it).
1229: @*/
1230: PetscErrorCode VecScatterCreateToZero(Vec vin,VecScatter *ctx,Vec *vout)
1231: {
1234: PetscInt N;
1235: PetscMPIInt rank;
1236: IS is;
1237: Vec tmp;
1238: Vec *tmpv;
1239: PetscBool tmpvout = PETSC_FALSE;
1245: if (vout) {
1247: tmpv = vout;
1248: } else {
1249: tmpvout = PETSC_TRUE;
1250: tmpv = &tmp;
1251: }
1253: /* Create vec on each proc, with the same size of the original mpi vec (all on process 0)*/
1254: VecGetSize(vin,&N);
1255: MPI_Comm_rank(PetscObjectComm((PetscObject)vin),&rank);
1256: if (rank) N = 0;
1257: VecCreateSeq(PETSC_COMM_SELF,N,tmpv);
1258: VecSetFromOptions(*tmpv);
1259: /* Create the VecScatter ctx with the communication info */
1260: ISCreateStride(PETSC_COMM_SELF,N,0,1,&is);
1261: VecScatterCreate(vin,is,*tmpv,is,ctx);
1262: ISDestroy(&is);
1263: if (tmpvout) {VecDestroy(tmpv);}
1264: return(0);
1265: }
1267: /*@
1268: VecScatterBegin - Begins a generalized scatter from one vector to
1269: another. Complete the scattering phase with VecScatterEnd().
1271: Neighbor-wise Collective on VecScatter
1273: Input Parameters:
1274: + sf - scatter context generated by VecScatterCreate()
1275: . x - the vector from which we scatter
1276: . y - the vector to which we scatter
1277: . addv - either ADD_VALUES, MAX_VALUES, MIN_VALUES or INSERT_VALUES, with INSERT_VALUES mode any location
1278: not scattered to retains its old value; i.e. the vector is NOT first zeroed.
1279: - mode - the scattering mode, usually SCATTER_FORWARD. The available modes are:
1280: SCATTER_FORWARD or SCATTER_REVERSE
1282: Level: intermediate
1284: Options Database: See VecScatterCreate()
1286: Notes:
1287: The vectors x and y need not be the same vectors used in the call
1288: to VecScatterCreate(), but x must have the same parallel data layout
1289: as that passed in as the x to VecScatterCreate(), similarly for the y.
1290: Most likely they have been obtained from VecDuplicate().
1292: You cannot change the values in the input vector between the calls to VecScatterBegin()
1293: and VecScatterEnd().
1295: If you use SCATTER_REVERSE the two arguments x and y should be reversed, from
1296: the SCATTER_FORWARD.
1298: y[iy[i]] = x[ix[i]], for i=0,...,ni-1
1300: This scatter is far more general than the conventional
1301: scatter, since it can be a gather or a scatter or a combination,
1302: depending on the indices ix and iy. If x is a parallel vector and y
1303: is sequential, VecScatterBegin() can serve to gather values to a
1304: single processor. Similarly, if y is parallel and x sequential, the
1305: routine can scatter from one processor to many processors.
1307: .seealso: VecScatterCreate(), VecScatterEnd()
1308: @*/
1309: PetscErrorCode VecScatterBegin(VecScatter sf,Vec x,Vec y,InsertMode addv,ScatterMode mode)
1310: {
1312: PetscInt to_n,from_n;
1318: if (PetscDefined(USE_DEBUG)) {
1319: /*
1320: Error checking to make sure these vectors match the vectors used
1321: to create the vector scatter context. -1 in the from_n and to_n indicate the
1322: vector lengths are unknown (for example with mapped scatters) and thus
1323: no error checking is performed.
1324: */
1325: if (sf->vscat.from_n >= 0 && sf->vscat.to_n >= 0) {
1326: VecGetLocalSize(x,&from_n);
1327: VecGetLocalSize(y,&to_n);
1328: if (mode & SCATTER_REVERSE) {
1329: if (to_n != sf->vscat.from_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter reverse and vector to != sf from size)",to_n,sf->vscat.from_n);
1330: if (from_n != sf->vscat.to_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter reverse and vector from != sf to size)",from_n,sf->vscat.to_n);
1331: } else {
1332: if (to_n != sf->vscat.to_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter forward and vector to != sf to size)",to_n,sf->vscat.to_n);
1333: if (from_n != sf->vscat.from_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter forward and vector from != sf from size)",from_n,sf->vscat.from_n);
1334: }
1335: }
1336: }
1338: sf->vscat.logging = PETSC_TRUE;
1339: PetscLogEventBegin(VEC_ScatterBegin,sf,x,y,0);
1340: VecScatterBegin_Internal(sf,x,y,addv,mode);
1341: if (sf->vscat.beginandendtogether) {
1342: VecScatterEnd_Internal(sf,x,y,addv,mode);
1343: }
1344: PetscLogEventEnd(VEC_ScatterBegin,sf,x,y,0);
1345: sf->vscat.logging = PETSC_FALSE;
1346: return(0);
1347: }
1349: /*@
1350: VecScatterEnd - Ends a generalized scatter from one vector to another. Call
1351: after first calling VecScatterBegin().
1353: Neighbor-wise Collective on VecScatter
1355: Input Parameters:
1356: + sf - scatter context generated by VecScatterCreate()
1357: . x - the vector from which we scatter
1358: . y - the vector to which we scatter
1359: . addv - one of ADD_VALUES, MAX_VALUES, MIN_VALUES or INSERT_VALUES
1360: - mode - the scattering mode, usually SCATTER_FORWARD. The available modes are:
1361: SCATTER_FORWARD, SCATTER_REVERSE
1363: Level: intermediate
1365: Notes:
1366: If you use SCATTER_REVERSE the arguments x and y should be reversed, from the SCATTER_FORWARD.
1368: y[iy[i]] = x[ix[i]], for i=0,...,ni-1
1370: .seealso: VecScatterBegin(), VecScatterCreate()
1371: @*/
1372: PetscErrorCode VecScatterEnd(VecScatter sf,Vec x,Vec y,InsertMode addv,ScatterMode mode)
1373: {
1380: if (!sf->vscat.beginandendtogether) {
1381: sf->vscat.logging = PETSC_TRUE;
1382: PetscLogEventBegin(VEC_ScatterEnd,sf,x,y,0);
1383: VecScatterEnd_Internal(sf,x,y,addv,mode);
1384: PetscLogEventEnd(VEC_ScatterEnd,sf,x,y,0);
1385: sf->vscat.logging = PETSC_FALSE;
1386: }
1387: return(0);
1388: }