Actual source code: vscat.c
1: #include "petscsf.h"
2: #include "petscsystypes.h"
3: #include "petscvec.h"
4: #include <petsc/private/sfimpl.h>
5: #include <../src/vec/is/sf/impls/basic/sfbasic.h>
6: #include <../src/vec/is/sf/impls/basic/sfpack.h>
7: #include <petsc/private/vecimpl.h>
9: typedef enum {IS_INVALID, IS_GENERAL, IS_BLOCK, IS_STRIDE} ISTypeID;
11: static inline PetscErrorCode ISGetTypeID_Private(IS is,ISTypeID *id)
12: {
13: PetscBool same;
15: *id = IS_INVALID;
16: PetscObjectTypeCompare((PetscObject)is,ISGENERAL,&same);
17: if (same) {*id = IS_GENERAL; goto functionend;}
18: PetscObjectTypeCompare((PetscObject)is,ISBLOCK,&same);
19: if (same) {*id = IS_BLOCK; goto functionend;}
20: PetscObjectTypeCompare((PetscObject)is,ISSTRIDE,&same);
21: if (same) {*id = IS_STRIDE; goto functionend;}
22: functionend:
23: return 0;
24: }
26: static PetscErrorCode VecScatterBegin_Internal(VecScatter sf,Vec x,Vec y,InsertMode addv,ScatterMode mode)
27: {
28: PetscSF wsf=NULL; /* either sf or its local part */
29: MPI_Op mop=MPI_OP_NULL;
30: PetscMPIInt size;
31: PetscMemType xmtype=PETSC_MEMTYPE_HOST,ymtype=PETSC_MEMTYPE_HOST;
33: if (x != y) VecLockReadPush(x);
34: VecGetArrayReadAndMemType(x,&sf->vscat.xdata,&xmtype);
35: VecGetArrayAndMemType(y,&sf->vscat.ydata,&ymtype);
36: VecLockWriteSet_Private(y,PETSC_TRUE);
38: /* SCATTER_LOCAL indicates ignoring inter-process communication */
39: MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);
40: if ((mode & SCATTER_LOCAL) && size > 1) { /* Lazy creation of sf->vscat.lsf since SCATTER_LOCAL is uncommon */
41: if (!sf->vscat.lsf) PetscSFCreateLocalSF_Private(sf,&sf->vscat.lsf);
42: wsf = sf->vscat.lsf;
43: } else {
44: wsf = sf;
45: }
47: /* Note xdata/ydata is always recorded on sf (not lsf) above */
48: if (addv == INSERT_VALUES) mop = MPI_REPLACE;
49: else if (addv == ADD_VALUES) mop = MPIU_SUM; /* Petsc defines its own MPI datatype and SUM operation for __float128 etc. */
50: else if (addv == MAX_VALUES) mop = MPIU_MAX;
51: else if (addv == MIN_VALUES) mop = MPIU_MIN;
52: else SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"Unsupported InsertMode %d in VecScatterBegin/End",addv);
54: if (mode & SCATTER_REVERSE) { /* REVERSE indicates leaves to root scatter. Note that x and y are swapped in input */
55: PetscSFReduceWithMemTypeBegin(wsf,sf->vscat.unit,xmtype,sf->vscat.xdata,ymtype,sf->vscat.ydata,mop);
56: } else { /* FORWARD indicates x to y scatter, where x is root and y is leaf */
57: PetscSFBcastWithMemTypeBegin(wsf,sf->vscat.unit,xmtype,sf->vscat.xdata,ymtype,sf->vscat.ydata,mop);
58: }
59: return 0;
60: }
62: static PetscErrorCode VecScatterEnd_Internal(VecScatter sf,Vec x,Vec y,InsertMode addv,ScatterMode mode)
63: {
64: PetscSF wsf=NULL;
65: MPI_Op mop=MPI_OP_NULL;
66: PetscMPIInt size;
68: /* SCATTER_LOCAL indicates ignoring inter-process communication */
69: MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);
70: wsf = ((mode & SCATTER_LOCAL) && size > 1) ? sf->vscat.lsf : sf;
72: if (addv == INSERT_VALUES) mop = MPI_REPLACE;
73: else if (addv == ADD_VALUES) mop = MPIU_SUM;
74: else if (addv == MAX_VALUES) mop = MPIU_MAX;
75: else if (addv == MIN_VALUES) mop = MPIU_MIN;
76: else SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"Unsupported InsertMode %d in VecScatterBegin/End",addv);
78: if (mode & SCATTER_REVERSE) { /* reverse scatter sends leaves to roots. Note that x and y are swapped in input */
79: PetscSFReduceEnd(wsf,sf->vscat.unit,sf->vscat.xdata,sf->vscat.ydata,mop);
80: } else { /* forward scatter sends roots to leaves, i.e., x to y */
81: PetscSFBcastEnd(wsf,sf->vscat.unit,sf->vscat.xdata,sf->vscat.ydata,mop);
82: }
84: VecRestoreArrayReadAndMemType(x,&sf->vscat.xdata);
85: if (x != y) VecLockReadPop(x);
86: VecRestoreArrayAndMemType(y,&sf->vscat.ydata);
87: VecLockWriteSet_Private(y,PETSC_FALSE);
88: return 0;
89: }
91: /* VecScatterRemap provides a light way to slightly modify a VecScatter. Suppose the input sf scatters
92: x[i] to y[j], tomap gives a plan to change vscat to scatter x[tomap[i]] to y[j]. Note that in SF,
93: x is roots. That means we need to change incoming stuffs such as bas->irootloc[].
94: */
95: static PetscErrorCode VecScatterRemap_Internal(VecScatter sf,const PetscInt *tomap,const PetscInt *frommap)
96: {
97: PetscInt i,bs = sf->vscat.bs;
98: PetscMPIInt size;
99: PetscBool ident = PETSC_TRUE,isbasic,isneighbor;
100: PetscSFType type;
101: PetscSF_Basic *bas = NULL;
103: /* check if it is an identity map. If it is, do nothing */
104: if (tomap) {
105: for (i=0; i<sf->nroots*bs; i++) {if (i != tomap[i]) {ident = PETSC_FALSE; break; } }
106: if (ident) return 0;
107: }
109: if (!tomap) return 0;
111: MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);
113: /* Since the indices changed, we must also update the local SF. But we do not do it since
114: lsf is rarely used. We just destroy lsf and rebuild it on demand from updated sf.
115: */
116: if (sf->vscat.lsf) PetscSFDestroy(&sf->vscat.lsf);
118: PetscSFGetType(sf,&type);
119: PetscObjectTypeCompare((PetscObject)sf,PETSCSFBASIC,&isbasic);
120: PetscObjectTypeCompare((PetscObject)sf,PETSCSFNEIGHBOR,&isneighbor);
123: PetscSFSetUp(sf); /* to bulid sf->irootloc if SetUp is not yet called */
125: /* Root indices are going to be remapped. This is tricky for SF. Root indices are used in sf->rremote,
126: sf->remote and bas->irootloc. The latter one is cheap to remap, but the former two are not.
127: To remap them, we have to do a bcast from roots to leaves, to let leaves know their updated roots.
128: Since VecScatterRemap is supposed to be a cheap routine to adapt a vecscatter by only changing where
129: x[] data is taken, we do not remap sf->rremote, sf->remote. The consequence is that operations
130: accessing them (such as PetscSFCompose) may get stale info. Considering VecScatter does not need
131: that complicated SF operations, we do not remap sf->rremote, sf->remote, instead we destroy them
132: so that code accessing them (if any) will crash (instead of get silent errors). Note that BcastAndOp/Reduce,
133: which are used by VecScatter and only rely on bas->irootloc, are updated and correct.
134: */
135: sf->remote = NULL;
136: PetscFree(sf->remote_alloc);
137: /* Not easy to free sf->rremote since it was allocated with PetscMalloc4(), so just give it crazy values */
138: for (i=0; i<sf->roffset[sf->nranks]; i++) sf->rremote[i] = PETSC_MIN_INT;
140: /* Indices in tomap[] are for each indivisual vector entry. But indices in sf are for each
141: block in the vector. So before the remapping, we have to expand indices in sf by bs, and
142: after the remapping, we have to shrink them back.
143: */
144: bas = (PetscSF_Basic*)sf->data;
145: for (i=0; i<bas->ioffset[bas->niranks]; i++) bas->irootloc[i] = tomap[bas->irootloc[i]*bs]/bs;
146: #if defined(PETSC_HAVE_DEVICE)
147: /* Free the irootloc copy on device. We allocate a new copy and get the updated value on demand. See PetscSFLinkGetRootPackOptAndIndices() */
148: for (i=0; i<2; i++) PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,bas->irootloc_d[i]);
149: #endif
150: /* Destroy and then rebuild root packing optimizations since indices are changed */
151: PetscSFResetPackFields(sf);
152: PetscSFSetUpPackFields(sf);
153: return 0;
154: }
156: /* Given a parallel VecScatter context, return number of procs and vector entries involved in remote (i.e., off-process) communication
158: Input Parameters:
159: + sf - the context (must be a parallel vecscatter)
160: - send - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)
162: Output parameters:
163: + num_procs - number of remote processors
164: - num_entries - number of vector entries to send or recv
166: .seealso: VecScatterGetRemote_Private(), VecScatterGetRemoteOrdered_Private()
168: Notes:
169: Sometimes PETSc internally needs to use the matrix-vector-multiply vecscatter context for other purposes. The client code
170: usually only uses MPI_Send/Recv. This group of subroutines provides info needed for such uses.
171: */
172: PetscErrorCode VecScatterGetRemoteCount_Private(VecScatter sf,PetscBool send,PetscInt *num_procs,PetscInt *num_entries)
173: {
174: PetscInt nranks,remote_start;
175: PetscMPIInt rank;
176: const PetscInt *offset;
177: const PetscMPIInt *ranks;
179: PetscSFSetUp(sf);
180: MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
182: /* This routine is mainly used for MatMult's Mvctx. In Mvctx, we scatter an MPI vector x to a sequential vector lvec.
183: Remember x is roots and lvec is leaves. 'send' means roots to leaves communication. If 'send' is true, we need to
184: get info about which ranks this processor needs to send to. In other words, we need to call PetscSFGetLeafRanks().
185: If send is false, we do the opposite, calling PetscSFGetRootRanks().
186: */
187: if (send) PetscSFGetLeafRanks(sf,&nranks,&ranks,&offset,NULL);
188: else PetscSFGetRootRanks(sf,&nranks,&ranks,&offset,NULL,NULL);
189: if (nranks) {
190: remote_start = (rank == ranks[0])? 1 : 0;
191: if (num_procs) *num_procs = nranks - remote_start;
192: if (num_entries) *num_entries = offset[nranks] - offset[remote_start];
193: } else {
194: if (num_procs) *num_procs = 0;
195: if (num_entries) *num_entries = 0;
196: }
197: return 0;
198: }
200: /* Given a parallel VecScatter context, return a plan that represents the remote communication.
201: Any output parameter can be NULL.
203: Input Parameters:
204: + sf - the context
205: - send - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)
207: Output parameters:
208: + n - number of remote processors
209: . starts - starting point in indices for each proc. ATTENTION: starts[0] is not necessarily zero.
210: Therefore, expressions like starts[i+1]-starts[i] and indices[starts[i]+j] work as
211: expected for a CSR structure but buf[starts[i]+j] may be out of range if buf was allocated
212: with length starts[n]-starts[0]. One should use buf[starts[i]-starts[0]+j] instead.
213: . indices - indices of entries to send/recv
214: . procs - ranks of remote processors
215: - bs - block size
217: .seealso: VecScatterRestoreRemote_Private(), VecScatterGetRemoteOrdered_Private()
218: */
219: PetscErrorCode VecScatterGetRemote_Private(VecScatter sf,PetscBool send,PetscInt *n,const PetscInt **starts,const PetscInt **indices,const PetscMPIInt **procs,PetscInt *bs)
220: {
221: PetscInt nranks,remote_start;
222: PetscMPIInt rank;
223: const PetscInt *offset,*location;
224: const PetscMPIInt *ranks;
226: PetscSFSetUp(sf);
227: MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
229: if (send) PetscSFGetLeafRanks(sf,&nranks,&ranks,&offset,&location);
230: else PetscSFGetRootRanks(sf,&nranks,&ranks,&offset,&location,NULL);
232: if (nranks) {
233: remote_start = (rank == ranks[0])? 1 : 0;
234: if (n) *n = nranks - remote_start;
235: if (starts) *starts = &offset[remote_start];
236: if (indices) *indices = location; /* not &location[offset[remote_start]]. Starts[0] may point to the middle of indices[] */
237: if (procs) *procs = &ranks[remote_start];
238: } else {
239: if (n) *n = 0;
240: if (starts) *starts = NULL;
241: if (indices) *indices = NULL;
242: if (procs) *procs = NULL;
243: }
245: if (bs) *bs = 1;
246: return 0;
247: }
249: /* Given a parallel VecScatter context, return a plan that represents the remote communication. Ranks of remote
250: processors returned in procs must be sorted in ascending order. Any output parameter can be NULL.
252: Input Parameters:
253: + sf - the context
254: - send - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)
256: Output parameters:
257: + n - number of remote processors
258: . starts - starting point in indices for each proc. ATTENTION: starts[0] is not necessarily zero.
259: Therefore, expressions like starts[i+1]-starts[i] and indices[starts[i]+j] work as
260: expected for a CSR structure but buf[starts[i]+j] may be out of range if buf was allocated
261: with length starts[n]-starts[0]. One should use buf[starts[i]-starts[0]+j] instead.
262: . indices - indices of entries to send/recv
263: . procs - ranks of remote processors
264: - bs - block size
266: .seealso: VecScatterRestoreRemoteOrdered_Private(), VecScatterGetRemote_Private()
268: Notes:
269: Output parameters like starts, indices must also be adapted according to the sorted ranks.
270: */
271: PetscErrorCode VecScatterGetRemoteOrdered_Private(VecScatter sf,PetscBool send,PetscInt *n,const PetscInt **starts,const PetscInt **indices,const PetscMPIInt **procs,PetscInt *bs)
272: {
273: VecScatterGetRemote_Private(sf,send,n,starts,indices,procs,bs);
274: if (PetscUnlikelyDebug(n && procs)) {
275: PetscInt i;
276: /* from back to front to also handle cases *n=0 */
278: }
279: return 0;
280: }
282: /* Given a parallel VecScatter context, restore the plan returned by VecScatterGetRemote_Private. This gives a chance for
283: an implementation to free memory allocated in the VecScatterGetRemote_Private call.
285: Input Parameters:
286: + sf - the context
287: - send - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)
289: Output parameters:
290: + n - number of remote processors
291: . starts - starting point in indices for each proc
292: . indices - indices of entries to send/recv
293: . procs - ranks of remote processors
294: - bs - block size
296: .seealso: VecScatterGetRemote_Private()
297: */
298: PetscErrorCode VecScatterRestoreRemote_Private(VecScatter sf,PetscBool send,PetscInt *n,const PetscInt **starts,const PetscInt **indices,const PetscMPIInt **procs,PetscInt *bs)
299: {
300: if (starts) *starts = NULL;
301: if (indices) *indices = NULL;
302: if (procs) *procs = NULL;
303: return 0;
304: }
306: /* Given a parallel VecScatter context, restore the plan returned by VecScatterGetRemoteOrdered_Private. This gives a chance for
307: an implementation to free memory allocated in the VecScatterGetRemoteOrdered_Private call.
309: Input Parameters:
310: + sf - the context
311: - send - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)
313: Output parameters:
314: + n - number of remote processors
315: . starts - starting point in indices for each proc
316: . indices - indices of entries to send/recv
317: . procs - ranks of remote processors
318: - bs - block size
320: .seealso: VecScatterGetRemoteOrdered_Private()
321: */
322: PetscErrorCode VecScatterRestoreRemoteOrdered_Private(VecScatter sf,PetscBool send,PetscInt *n,const PetscInt **starts,const PetscInt **indices,const PetscMPIInt **procs,PetscInt *bs)
323: {
324: VecScatterRestoreRemote_Private(sf,send,n,starts,indices,procs,bs);
325: return 0;
326: }
328: /*@
329: VecScatterSetUp - Sets up the VecScatter to be able to actually scatter information between vectors
331: Collective on VecScatter
333: Input Parameter:
334: . sf - the scatter context
336: Level: intermediate
338: .seealso: VecScatterCreate(), VecScatterCopy()
339: @*/
340: PetscErrorCode VecScatterSetUp(VecScatter sf)
341: {
342: PetscSFSetUp(sf);
343: return 0;
344: }
346: /*@C
347: VecScatterSetType - Builds a vector scatter, for a particular vector scatter implementation.
349: Collective on VecScatter
351: Input Parameters:
352: + sf - The VecScatter (SF) object
353: - type - The name of the vector scatter type
355: Options Database Key:
356: . -sf_type <type> - Sets the VecScatter (SF) type
358: Notes:
359: Use VecScatterDuplicate() to form additional vectors scatter of the same type as an existing vector scatter.
361: Level: intermediate
363: .seealso: VecScatterGetType(), VecScatterCreate()
364: @*/
365: PetscErrorCode VecScatterSetType(VecScatter sf, VecScatterType type)
366: {
367: PetscSFSetType(sf,type);
368: return 0;
369: }
371: /*@C
372: VecScatterGetType - Gets the vector scatter type name (as a string) from the VecScatter.
374: Not Collective
376: Input Parameter:
377: . sf - The vector scatter (SF)
379: Output Parameter:
380: . type - The vector scatter type name
382: Level: intermediate
384: .seealso: VecScatterSetType(), VecScatterCreate()
385: @*/
386: PetscErrorCode VecScatterGetType(VecScatter sf, VecScatterType *type)
387: {
388: PetscSFGetType(sf,type);
389: return 0;
390: }
392: /*@C
393: VecScatterRegister - Adds a new vector scatter component implementation
395: Not Collective
397: Input Parameters:
398: + name - The name of a new user-defined creation routine
399: - create_func - The creation routine itself
401: Level: advanced
403: .seealso: VecRegister()
404: @*/
405: PetscErrorCode VecScatterRegister(const char sname[], PetscErrorCode (*function)(VecScatter))
406: {
407: PetscSFRegister(sname,function);
408: return 0;
409: }
411: /* ------------------------------------------------------------------*/
412: /*@
413: VecScatterGetMerged - Returns true if the scatter is completed in the VecScatterBegin()
414: and the VecScatterEnd() does nothing
416: Not Collective
418: Input Parameter:
419: . sf - scatter context created with VecScatterCreate()
421: Output Parameter:
422: . flg - PETSC_TRUE if the VecScatterBegin/End() are all done during the VecScatterBegin()
424: Level: developer
426: .seealso: VecScatterCreate(), VecScatterEnd(), VecScatterBegin()
427: @*/
428: PetscErrorCode VecScatterGetMerged(VecScatter sf,PetscBool *flg)
429: {
431: if (flg) *flg = sf->vscat.beginandendtogether;
432: return 0;
433: }
434: /*@C
435: VecScatterDestroy - Destroys a scatter context created by VecScatterCreate()
437: Collective on VecScatter
439: Input Parameter:
440: . sf - the scatter context
442: Level: intermediate
444: .seealso: VecScatterCreate(), VecScatterCopy()
445: @*/
446: PetscErrorCode VecScatterDestroy(VecScatter *sf)
447: {
448: PetscSFDestroy(sf);
449: return 0;
450: }
452: /*@
453: VecScatterCopy - Makes a copy of a scatter context.
455: Collective on VecScatter
457: Input Parameter:
458: . sf - the scatter context
460: Output Parameter:
461: . newsf - the context copy
463: Level: advanced
465: .seealso: VecScatterCreate(), VecScatterDestroy()
466: @*/
467: PetscErrorCode VecScatterCopy(VecScatter sf,VecScatter *newsf)
468: {
470: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_GRAPH,newsf);
471: PetscSFSetUp(*newsf);
472: return 0;
473: }
475: /*@C
476: VecScatterViewFromOptions - View from Options
478: Collective on VecScatter
480: Input Parameters:
481: + sf - the scatter context
482: . obj - Optional object
483: - name - command line option
485: Level: intermediate
486: .seealso: VecScatter, VecScatterView, PetscObjectViewFromOptions(), VecScatterCreate()
487: @*/
488: PetscErrorCode VecScatterViewFromOptions(VecScatter sf,PetscObject obj,const char name[])
489: {
491: PetscObjectViewFromOptions((PetscObject)sf,obj,name);
492: return 0;
493: }
495: /* ------------------------------------------------------------------*/
496: /*@C
497: VecScatterView - Views a vector scatter context.
499: Collective on VecScatter
501: Input Parameters:
502: + sf - the scatter context
503: - viewer - the viewer for displaying the context
505: Level: intermediate
507: @*/
508: PetscErrorCode VecScatterView(VecScatter sf,PetscViewer viewer)
509: {
510: PetscSFView(sf,viewer);
511: return 0;
512: }
514: /*@C
515: VecScatterRemap - Remaps the "from" and "to" indices in a
516: vector scatter context. FOR EXPERTS ONLY!
518: Collective on VecScatter
520: Input Parameters:
521: + sf - vector scatter context
522: . tomap - remapping plan for "to" indices (may be NULL).
523: - frommap - remapping plan for "from" indices (may be NULL)
525: Level: developer
527: Notes:
528: In the parallel case the todata contains indices from where the data is taken
529: (and then sent to others)! The fromdata contains indices from where the received
530: data is finally put locally.
532: In the sequential case the todata contains indices from where the data is put
533: and the fromdata contains indices from where the data is taken from.
534: This is backwards from the paralllel case!
536: @*/
537: PetscErrorCode VecScatterRemap(VecScatter sf,PetscInt tomap[],PetscInt frommap[])
538: {
541: VecScatterRemap_Internal(sf,tomap,frommap);
543: /* Mark then vector lengths as unknown because we do not know the lengths of the remapped vectors */
544: sf->vscat.from_n = -1;
545: sf->vscat.to_n = -1;
546: return 0;
547: }
549: /*@
550: VecScatterSetFromOptions - Configures the vector scatter from the options database.
552: Collective on VecScatter
554: Input Parameter:
555: . sf - The vector scatter
557: Notes:
558: To see all options, run your program with the -help option, or consult the users manual.
559: Must be called before VecScatterSetUp() but before the vector scatter is used.
561: Level: beginner
563: .seealso: VecScatterCreate(), VecScatterDestroy(), VecScatterSetUp()
564: @*/
565: PetscErrorCode VecScatterSetFromOptions(VecScatter sf)
566: {
570: PetscObjectOptionsBegin((PetscObject)sf);
572: sf->vscat.beginandendtogether = PETSC_FALSE;
573: PetscOptionsBool("-vecscatter_merge","Use combined (merged) vector scatter begin and end","VecScatterCreate",sf->vscat.beginandendtogether,&sf->vscat.beginandendtogether,NULL);
574: if (sf->vscat.beginandendtogether) PetscInfo(sf,"Using combined (merged) vector scatter begin and end\n");
575: PetscOptionsEnd();
576: return 0;
577: }
579: /* ---------------------------------------------------------------- */
580: /*@
581: VecScatterCreate - Creates a vector scatter context.
583: Collective on Vec
585: Input Parameters:
586: + xin - a vector that defines the shape (parallel data layout of the vector)
587: of vectors from which we scatter
588: . yin - a vector that defines the shape (parallel data layout of the vector)
589: of vectors to which we scatter
590: . ix - the indices of xin to scatter (if NULL scatters all values)
591: - iy - the indices of yin to hold results (if NULL fills entire vector yin)
593: Output Parameter:
594: . newsf - location to store the new scatter (SF) context
596: Options Database Keys:
597: + -vecscatter_view - Prints detail of communications
598: . -vecscatter_view ::ascii_info - Print less details about communication
599: - -vecscatter_merge - VecScatterBegin() handles all of the communication, VecScatterEnd() is a nop
600: eliminates the chance for overlap of computation and communication
602: Level: intermediate
604: Notes:
605: If both xin and yin are parallel, their communicator must be on the same
606: set of processes, but their process order can be different.
607: In calls to VecScatter() you can use different vectors than the xin and
608: yin you used above; BUT they must have the same parallel data layout, for example,
609: they could be obtained from VecDuplicate().
610: A VecScatter context CANNOT be used in two or more simultaneous scatters;
611: that is you cannot call a second VecScatterBegin() with the same scatter
612: context until the VecScatterEnd() has been called on the first VecScatterBegin().
613: In this case a separate VecScatter is needed for each concurrent scatter.
615: Currently the MPI_Send() use PERSISTENT versions.
616: (this unfortunately requires that the same in and out arrays be used for each use, this
617: is why we always need to pack the input into the work array before sending
618: and unpack upon receiving instead of using MPI datatypes to avoid the packing/unpacking).
620: Both ix and iy cannot be NULL at the same time.
622: Use VecScatterCreateToAll() to create a vecscatter that copies an MPI vector to sequential vectors on all MPI ranks.
623: Use VecScatterCreateToZero() to create a vecscatter that copies an MPI vector to a sequential vector on MPI rank 0.
624: These special vecscatters have better performance than general ones.
626: .seealso: VecScatterDestroy(), VecScatterCreateToAll(), VecScatterCreateToZero(), PetscSFCreate()
627: @*/
628: PetscErrorCode VecScatterCreate(Vec x,IS ix,Vec y,IS iy,VecScatter *newsf)
629: {
630: MPI_Comm xcomm,ycomm,bigcomm;
631: Vec xx,yy;
632: IS ix_old=ix,iy_old=iy,ixx,iyy;
633: PetscMPIInt xcommsize,ycommsize,rank,result;
634: PetscInt i,n,N,nroots,nleaves,*ilocal,xstart,ystart,ixsize,iysize,xlen,ylen;
635: const PetscInt *xindices,*yindices;
636: PetscSFNode *iremote;
637: PetscLayout xlayout,ylayout;
638: ISTypeID ixid,iyid;
639: PetscInt bs,bsx,bsy,min,max,m[2],ixfirst,ixstep,iyfirst,iystep;
640: PetscBool can_do_block_opt=PETSC_FALSE;
641: PetscSF sf;
646: /* Get comm from x and y */
647: PetscObjectGetComm((PetscObject)x,&xcomm);
648: MPI_Comm_size(xcomm,&xcommsize);
649: PetscObjectGetComm((PetscObject)y,&ycomm);
650: MPI_Comm_size(ycomm,&ycommsize);
651: if (xcommsize > 1 && ycommsize > 1) {
652: MPI_Comm_compare(xcomm,ycomm,&result);
654: }
655: bs = 1; /* default, no blocking */
657: /*
658: Let P and S stand for parallel and sequential vectors respectively. There are four combinations of vecscatters: PtoP, PtoS,
659: StoP and StoS. The assumption of VecScatterCreate(Vec x,IS ix,Vec y,IS iy,VecScatter *newctx) is: if x is parallel, then ix
660: contains global indices of x. If x is sequential, ix contains local indices of x. Similarily for y and iy.
662: SF builds around concepts of local leaves and remote roots. We treat source vector x as roots and destination vector y as
663: leaves. A PtoS scatter can be naturally mapped to SF. We transform PtoP and StoP to PtoS, and treat StoS as trivial PtoS.
664: */
666: /* NULL ix or iy in VecScatterCreate(x,ix,y,iy,newctx) has special meaning. Recover them for these cases */
667: if (!ix) {
668: if (xcommsize > 1 && ycommsize == 1) { /* PtoS: null ix means the whole x will be scattered to each seq y */
669: VecGetSize(x,&N);
670: ISCreateStride(PETSC_COMM_SELF,N,0,1,&ix);
671: } else { /* PtoP, StoP or StoS: null ix means the whole local part of x will be scattered */
672: VecGetLocalSize(x,&n);
673: VecGetOwnershipRange(x,&xstart,NULL);
674: ISCreateStride(PETSC_COMM_SELF,n,xstart,1,&ix);
675: }
676: }
678: if (!iy) {
679: if (xcommsize == 1 && ycommsize > 1) { /* StoP: null iy means the whole y will be scattered to from each seq x */
680: VecGetSize(y,&N);
681: ISCreateStride(PETSC_COMM_SELF,N,0,1,&iy);
682: } else { /* PtoP, StoP or StoS: null iy means the whole local part of y will be scattered to */
683: VecGetLocalSize(y,&n);
684: VecGetOwnershipRange(y,&ystart,NULL);
685: ISCreateStride(PETSC_COMM_SELF,n,ystart,1,&iy);
686: }
687: }
689: /* Do error checking immediately after we have non-empty ix, iy */
690: ISGetLocalSize(ix,&ixsize);
691: ISGetLocalSize(iy,&iysize);
692: VecGetSize(x,&xlen);
693: VecGetSize(y,&ylen);
695: ISGetMinMax(ix,&min,&max);
697: ISGetMinMax(iy,&min,&max);
700: /* Extract info about ix, iy for further test */
701: ISGetTypeID_Private(ix,&ixid);
702: ISGetTypeID_Private(iy,&iyid);
703: if (ixid == IS_BLOCK) ISGetBlockSize(ix,&bsx);
704: else if (ixid == IS_STRIDE) ISStrideGetInfo(ix,&ixfirst,&ixstep);
706: if (iyid == IS_BLOCK) ISGetBlockSize(iy,&bsy);
707: else if (iyid == IS_STRIDE) ISStrideGetInfo(iy,&iyfirst,&iystep);
709: /* Check if a PtoS is special ToAll/ToZero scatters, which can be results of VecScatterCreateToAll/Zero.
710: ToAll means a whole MPI vector is copied to a seq vector on every process. ToZero means a whole MPI
711: vector is copied to a seq vector on rank 0 and other processes do nothing(i.e.,they input empty ix,iy).
713: We can optimize these scatters with MPI collectives. We can also avoid costly analysis used for general scatters.
714: */
715: if (xcommsize > 1 && ycommsize == 1) { /* Ranks do not diverge at this if-test */
716: PetscInt pattern[2] = {0, 0}; /* A boolean array with pattern[0] for allgather-like (ToAll) and pattern[1] for gather-like (ToZero) */
717: PetscLayout map;
719: MPI_Comm_rank(xcomm,&rank);
720: VecGetLayout(x,&map);
721: if (rank == 0) {
722: if (ixid == IS_STRIDE && iyid == IS_STRIDE && ixsize == xlen && ixfirst == 0 && ixstep == 1 && iyfirst == 0 && iystep == 1) {
723: /* Rank 0 scatters the whole mpi x to seq y, so it is either a ToAll or a ToZero candidate in its view */
724: pattern[0] = pattern[1] = 1;
725: }
726: } else {
727: if (ixid == IS_STRIDE && iyid == IS_STRIDE && ixsize == xlen && ixfirst == 0 && ixstep == 1 && iyfirst == 0 && iystep == 1) {
728: /* Other ranks also scatter the whole mpi x to seq y, so it is a ToAll candidate in their view */
729: pattern[0] = 1;
730: } else if (ixsize == 0) {
731: /* Other ranks do nothing, so it is a ToZero candiate */
732: pattern[1] = 1;
733: }
734: }
736: /* One stone (the expensive allreduce) two birds: pattern[] tells if it is ToAll or ToZero */
737: MPIU_Allreduce(MPI_IN_PLACE,pattern,2,MPIU_INT,MPI_LAND,xcomm);
739: if (pattern[0] || pattern[1]) {
740: PetscSFCreate(xcomm,&sf);
741: PetscSFSetFromOptions(sf);
742: PetscSFSetGraphWithPattern(sf,map,pattern[0] ? PETSCSF_PATTERN_ALLGATHER : PETSCSF_PATTERN_GATHER);
743: goto functionend; /* No further analysis needed. What a big win! */
744: }
745: }
747: /* Continue ...
748: Do block optimization by taking advantage of high level info available in ix, iy.
749: The block optimization is valid when all of the following conditions are met:
750: 1) ix, iy are blocked or can be blocked (i.e., strided with step=1);
751: 2) ix, iy have the same block size;
752: 3) all processors agree on one block size;
753: 4) no blocks span more than one process;
754: */
755: bigcomm = (xcommsize == 1) ? ycomm : xcomm;
757: /* Processors could go through different path in this if-else test */
758: m[0] = m[1] = PETSC_MPI_INT_MIN;
759: if (ixid == IS_BLOCK && iyid == IS_BLOCK) {
760: m[0] = PetscMax(bsx,bsy);
761: m[1] = -PetscMin(bsx,bsy);
762: } else if (ixid == IS_BLOCK && iyid == IS_STRIDE && iystep==1 && iyfirst%bsx==0) {
763: m[0] = bsx;
764: m[1] = -bsx;
765: } else if (ixid == IS_STRIDE && iyid == IS_BLOCK && ixstep==1 && ixfirst%bsy==0) {
766: m[0] = bsy;
767: m[1] = -bsy;
768: }
769: /* Get max and min of bsx,bsy over all processes in one allreduce */
770: MPIU_Allreduce(MPI_IN_PLACE,m,2,MPIU_INT,MPI_MAX,bigcomm);
771: max = m[0]; min = -m[1];
773: /* Since we used allreduce above, all ranks will have the same min and max. min==max
774: implies all ranks have the same bs. Do further test to see if local vectors are dividable
775: by bs on ALL ranks. If they are, we are ensured that no blocks span more than one processor.
776: */
777: if (min == max && min > 1) {
778: VecGetLocalSize(x,&xlen);
779: VecGetLocalSize(y,&ylen);
780: m[0] = xlen%min;
781: m[1] = ylen%min;
782: MPIU_Allreduce(MPI_IN_PLACE,m,2,MPIU_INT,MPI_LOR,bigcomm);
783: if (!m[0] && !m[1]) can_do_block_opt = PETSC_TRUE;
784: }
786: /* If can_do_block_opt, then shrink x, y, ix and iy by bs to get xx, yy, ixx and iyy, whose indices
787: and layout are actually used in building SF. Suppose blocked ix representing {0,1,2,6,7,8} has
788: indices {0,2} and bs=3, then ixx = {0,2}; suppose strided iy={3,4,5,6,7,8}, then iyy={1,2}.
790: xx is a little special. If x is seq, then xx is the concatenation of seq x's on ycomm. In this way,
791: we can treat PtoP and StoP uniformly as PtoS.
792: */
793: if (can_do_block_opt) {
794: const PetscInt *indices;
796: /* Shrink x and ix */
797: bs = min;
798: VecCreateMPIWithArray(bigcomm,1,xlen/bs,PETSC_DECIDE,NULL,&xx); /* We only care xx's layout */
799: if (ixid == IS_BLOCK) {
800: ISBlockGetIndices(ix,&indices);
801: ISBlockGetLocalSize(ix,&ixsize);
802: ISCreateGeneral(PETSC_COMM_SELF,ixsize,indices,PETSC_COPY_VALUES,&ixx);
803: ISBlockRestoreIndices(ix,&indices);
804: } else { /* ixid == IS_STRIDE */
805: ISGetLocalSize(ix,&ixsize);
806: ISCreateStride(PETSC_COMM_SELF,ixsize/bs,ixfirst/bs,1,&ixx);
807: }
809: /* Shrink y and iy */
810: VecCreateMPIWithArray(ycomm,1,ylen/bs,PETSC_DECIDE,NULL,&yy);
811: if (iyid == IS_BLOCK) {
812: ISBlockGetIndices(iy,&indices);
813: ISBlockGetLocalSize(iy,&iysize);
814: ISCreateGeneral(PETSC_COMM_SELF,iysize,indices,PETSC_COPY_VALUES,&iyy);
815: ISBlockRestoreIndices(iy,&indices);
816: } else { /* iyid == IS_STRIDE */
817: ISGetLocalSize(iy,&iysize);
818: ISCreateStride(PETSC_COMM_SELF,iysize/bs,iyfirst/bs,1,&iyy);
819: }
820: } else {
821: ixx = ix;
822: iyy = iy;
823: yy = y;
824: if (xcommsize == 1) VecCreateMPIWithArray(bigcomm,1,xlen,PETSC_DECIDE,NULL,&xx); else xx = x;
825: }
827: /* Now it is ready to build SF with preprocessed (xx, yy) and (ixx, iyy) */
828: ISGetIndices(ixx,&xindices);
829: ISGetIndices(iyy,&yindices);
830: VecGetLayout(xx,&xlayout);
832: if (ycommsize > 1) {
833: /* PtoP or StoP */
835: /* Below is a piece of complex code with a very simple goal: move global index pairs (xindices[i], yindices[i]),
836: to owner process of yindices[i] according to ylayout, i = 0..n.
838: I did it through a temp sf, but later I thought the old design was inefficient and also distorted log view.
839: We want to mape one VecScatterCreate() call to one PetscSFCreate() call. The old design mapped to three
840: PetscSFCreate() calls. This code is on critical path of VecScatterSetUp and is used by every VecScatterCreate.
841: So I commented it out and did another optimized implementation. The commented code is left here for reference.
842: */
843: #if 0
844: const PetscInt *degree;
845: PetscSF tmpsf;
846: PetscInt inedges=0,*leafdata,*rootdata;
848: VecGetOwnershipRange(xx,&xstart,NULL);
849: VecGetLayout(yy,&ylayout);
850: VecGetOwnershipRange(yy,&ystart,NULL);
852: VecGetLocalSize(yy,&nroots);
853: ISGetLocalSize(iyy,&nleaves);
854: PetscMalloc2(nleaves,&iremote,nleaves*2,&leafdata);
856: for (i=0; i<nleaves; i++) {
857: PetscLayoutFindOwnerIndex(ylayout,yindices[i],&iremote[i].rank,&iremote[i].index);
858: leafdata[2*i] = yindices[i];
859: leafdata[2*i+1] = (xcommsize > 1)? xindices[i] : xindices[i] + xstart;
860: }
862: PetscSFCreate(ycomm,&tmpsf);
863: PetscSFSetGraph(tmpsf,nroots,nleaves,NULL,PETSC_USE_POINTER,iremote,PETSC_USE_POINTER);
865: PetscSFComputeDegreeBegin(tmpsf,°ree);
866: PetscSFComputeDegreeEnd(tmpsf,°ree);
868: for (i=0; i<nroots; i++) inedges += degree[i];
869: PetscMalloc1(inedges*2,&rootdata);
870: PetscSFGatherBegin(tmpsf,MPIU_2INT,leafdata,rootdata);
871: PetscSFGatherEnd(tmpsf,MPIU_2INT,leafdata,rootdata);
873: PetscFree2(iremote,leafdata);
874: PetscSFDestroy(&tmpsf);
876: /* rootdata contains global index pairs (i, j). j's are owned by the current process, but i's can point to anywhere.
877: We convert j to local, and convert i to (rank, index). In the end, we get an PtoS suitable for building SF.
878: */
879: nleaves = inedges;
880: VecGetLocalSize(xx,&nroots);
881: PetscMalloc1(nleaves,&ilocal);
882: PetscMalloc1(nleaves,&iremote);
884: for (i=0; i<inedges; i++) {
885: ilocal[i] = rootdata[2*i] - ystart; /* covert y's global index to local index */
886: PetscLayoutFindOwnerIndex(xlayout,rootdata[2*i+1],&iremote[i].rank,&iremote[i].index); /* convert x's global index to (rank, index) */
887: }
888: PetscFree(rootdata);
889: #else
890: PetscInt j,k,n,disp,rlentotal,*sstart,*xindices_sorted,*yindices_sorted;
891: const PetscInt *yrange;
892: PetscMPIInt nsend,nrecv,nreq,yrank,*sendto,*recvfrom,tag1,tag2;
893: PetscInt *slens,*rlens,count;
894: PetscInt *rxindices,*ryindices;
895: MPI_Request *reqs,*sreqs,*rreqs;
897: /* Sorting makes code simpler, faster and also helps getting rid of many O(P) arrays, which hurt scalability at large scale
898: yindices_sorted - sorted yindices
899: xindices_sorted - xindices sorted along with yindces
900: */
901: ISGetLocalSize(ixx,&n); /*ixx, iyy have the same local size */
902: PetscMalloc2(n,&xindices_sorted,n,&yindices_sorted);
903: PetscArraycpy(xindices_sorted,xindices,n);
904: PetscArraycpy(yindices_sorted,yindices,n);
905: PetscSortIntWithArray(n,yindices_sorted,xindices_sorted);
906: VecGetOwnershipRange(xx,&xstart,NULL);
907: if (xcommsize == 1) {for (i=0; i<n; i++) xindices_sorted[i] += xstart;} /* Convert to global indices */
909: /*=============================================================================
910: Calculate info about messages I need to send
911: =============================================================================*/
912: /* nsend - number of non-empty messages to send
913: sendto - [nsend] ranks I will send messages to
914: sstart - [nsend+1] sstart[i] is the start index in xsindices_sorted[] I send to rank sendto[i]
915: slens - [ycommsize] I want to send slens[i] entries to rank i.
916: */
917: VecGetLayout(yy,&ylayout);
918: PetscLayoutGetRanges(ylayout,&yrange);
919: PetscCalloc1(ycommsize,&slens); /* The only O(P) array in this algorithm */
921: i = j = nsend = 0;
922: while (i < n) {
923: if (yindices_sorted[i] >= yrange[j+1]) { /* If i-th index is out of rank j's bound */
924: do {j++;} while (yindices_sorted[i] >= yrange[j+1] && j < ycommsize); /* Increase j until i-th index falls in rank j's bound */
926: }
927: i++;
928: if (!slens[j]++) nsend++;
929: }
931: PetscMalloc2(nsend+1,&sstart,nsend,&sendto);
933: sstart[0] = 0;
934: for (i=j=0; i<ycommsize; i++) {
935: if (slens[i]) {
936: sendto[j] = (PetscMPIInt)i;
937: sstart[j+1] = sstart[j] + slens[i];
938: j++;
939: }
940: }
942: /*=============================================================================
943: Calculate the reverse info about messages I will recv
944: =============================================================================*/
945: /* nrecv - number of messages I will recv
946: recvfrom - [nrecv] ranks I recv from
947: rlens - [nrecv] I will recv rlens[i] entries from rank recvfrom[i]
948: rlentotal - sum of rlens[]
949: rxindices - [rlentotal] recv buffer for xindices_sorted
950: ryindices - [rlentotal] recv buffer for yindices_sorted
951: */
952: PetscGatherNumberOfMessages_Private(ycomm,NULL,slens,&nrecv);
953: PetscGatherMessageLengths_Private(ycomm,nsend,nrecv,slens,&recvfrom,&rlens);
954: PetscFree(slens); /* Free the O(P) array ASAP */
955: rlentotal = 0; for (i=0; i<nrecv; i++) rlentotal += rlens[i];
957: /*=============================================================================
958: Communicate with processors in recvfrom[] to populate rxindices and ryindices
959: ============================================================================*/
960: PetscCommGetNewTag(ycomm,&tag1);
961: PetscCommGetNewTag(ycomm,&tag2);
962: PetscMalloc2(rlentotal,&rxindices,rlentotal,&ryindices);
963: PetscMPIIntCast((nsend+nrecv)*2,&nreq);
964: PetscMalloc1(nreq,&reqs);
965: sreqs = reqs;
966: rreqs = reqs + nsend*2;
968: for (i=disp=0; i<nrecv; i++) {
969: count = rlens[i];
970: MPIU_Irecv(rxindices+disp,count,MPIU_INT,recvfrom[i],tag1,ycomm,rreqs+i);
971: MPIU_Irecv(ryindices+disp,count,MPIU_INT,recvfrom[i],tag2,ycomm,rreqs+nrecv+i);
972: disp += rlens[i];
973: }
975: for (i=0; i<nsend; i++) {
976: count = sstart[i+1]-sstart[i];
977: MPIU_Isend(xindices_sorted+sstart[i],count,MPIU_INT,sendto[i],tag1,ycomm,sreqs+i);
978: MPIU_Isend(yindices_sorted+sstart[i],count,MPIU_INT,sendto[i],tag2,ycomm,sreqs+nsend+i);
979: }
980: MPI_Waitall(nreq,reqs,MPI_STATUS_IGNORE);
982: /* Transform VecScatter into SF */
983: nleaves = rlentotal;
984: PetscMalloc1(nleaves,&ilocal);
985: PetscMalloc1(nleaves,&iremote);
986: MPI_Comm_rank(ycomm,&yrank);
987: for (i=disp=0; i<nrecv; i++) {
988: for (j=0; j<rlens[i]; j++) {
989: k = disp + j; /* k-th index pair */
990: ilocal[k] = ryindices[k] - yrange[yrank]; /* Convert y's global index to local index */
991: PetscLayoutFindOwnerIndex(xlayout,rxindices[k],&rank,&iremote[k].index); /* Convert x's global index to (rank, index) */
992: iremote[k].rank = rank;
993: }
994: disp += rlens[i];
995: }
997: PetscFree2(sstart,sendto);
998: PetscFree(rlens);
999: PetscFree(recvfrom);
1000: PetscFree(reqs);
1001: PetscFree2(rxindices,ryindices);
1002: PetscFree2(xindices_sorted,yindices_sorted);
1003: #endif
1004: } else {
1005: /* PtoS or StoS */
1006: ISGetLocalSize(iyy,&nleaves);
1007: PetscMalloc1(nleaves,&ilocal);
1008: PetscMalloc1(nleaves,&iremote);
1009: PetscArraycpy(ilocal,yindices,nleaves);
1010: for (i=0; i<nleaves; i++) {
1011: PetscLayoutFindOwnerIndex(xlayout,xindices[i],&rank,&iremote[i].index);
1012: iremote[i].rank = rank;
1013: }
1014: }
1016: /* MUST build SF on xx's comm, which is not necessarily identical to yy's comm.
1017: In SF's view, xx contains the roots (i.e., the remote) and iremote[].rank are ranks in xx's comm.
1018: yy contains leaves, which are local and can be thought as part of PETSC_COMM_SELF. */
1019: PetscSFCreate(PetscObjectComm((PetscObject)xx),&sf);
1020: sf->allow_multi_leaves = PETSC_TRUE;
1021: PetscSFSetFromOptions(sf);
1022: VecGetLocalSize(xx,&nroots);
1023: PetscSFSetGraph(sf,nroots,nleaves,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER); /* Give ilocal/iremote to petsc and no need to free them here */
1025: /* Free memory no longer needed */
1026: ISRestoreIndices(ixx,&xindices);
1027: ISRestoreIndices(iyy,&yindices);
1028: if (can_do_block_opt) {
1029: VecDestroy(&xx);
1030: VecDestroy(&yy);
1031: ISDestroy(&ixx);
1032: ISDestroy(&iyy);
1033: } else if (xcommsize == 1) {
1034: VecDestroy(&xx);
1035: }
1037: functionend:
1038: sf->vscat.bs = bs;
1039: if (sf->vscat.bs > 1) {
1040: MPI_Type_contiguous(sf->vscat.bs,MPIU_SCALAR,&sf->vscat.unit);
1041: MPI_Type_commit(&sf->vscat.unit);
1042: } else {
1043: sf->vscat.unit = MPIU_SCALAR;
1044: }
1045: VecGetLocalSize(x,&sf->vscat.from_n);
1046: VecGetLocalSize(y,&sf->vscat.to_n);
1047: if (!ix_old) ISDestroy(&ix); /* We created helper ix, iy. Free them */
1048: if (!iy_old) ISDestroy(&iy);
1050: /* Set default */
1051: VecScatterSetFromOptions(sf);
1053: *newsf = sf;
1054: return 0;
1055: }
1057: /*@C
1058: VecScatterCreateToAll - Creates a vector and a scatter context that copies all
1059: vector values to each processor
1061: Collective on Vec
1063: Input Parameter:
1064: . vin - input MPIVEC
1066: Output Parameters:
1067: + ctx - scatter context
1068: - vout - output SEQVEC that is large enough to scatter into
1070: Level: intermediate
1072: Note: vout may be NULL [PETSC_NULL_VEC from fortran] if you do not
1073: need to have it created
1075: Usage:
1076: $ VecScatterCreateToAll(vin,&ctx,&vout);
1077: $
1078: $ // scatter as many times as you need
1079: $ VecScatterBegin(ctx,vin,vout,INSERT_VALUES,SCATTER_FORWARD);
1080: $ VecScatterEnd(ctx,vin,vout,INSERT_VALUES,SCATTER_FORWARD);
1081: $
1082: $ // destroy scatter context and local vector when no longer needed
1083: $ VecScatterDestroy(&ctx);
1084: $ VecDestroy(&vout);
1086: Do NOT create a vector and then pass it in as the final argument vout! vout is created by this routine
1087: automatically (unless you pass NULL in for that argument if you do not need it).
1089: .seealso VecScatterCreate(), VecScatterCreateToZero(), VecScatterBegin(), VecScatterEnd()
1091: @*/
1092: PetscErrorCode VecScatterCreateToAll(Vec vin,VecScatter *ctx,Vec *vout)
1093: {
1094: PetscInt N;
1095: IS is;
1096: Vec tmp;
1097: Vec *tmpv;
1098: PetscBool tmpvout = PETSC_FALSE;
1099: VecType roottype;
1104: if (vout) {
1106: tmpv = vout;
1107: } else {
1108: tmpvout = PETSC_TRUE;
1109: tmpv = &tmp;
1110: }
1112: /* Create seq vec on each proc, with the same size of the original vec */
1113: VecGetSize(vin,&N);
1114: VecGetRootType_Private(vin,&roottype);
1115: VecCreate(PETSC_COMM_SELF,tmpv);
1116: VecSetSizes(*tmpv,N,PETSC_DECIDE);
1117: VecSetType(*tmpv,roottype);
1118: /* Create the VecScatter ctx with the communication info */
1119: ISCreateStride(PETSC_COMM_SELF,N,0,1,&is);
1120: VecScatterCreate(vin,is,*tmpv,is,ctx);
1121: ISDestroy(&is);
1122: if (tmpvout) VecDestroy(tmpv);
1123: return 0;
1124: }
1126: /*@C
1127: VecScatterCreateToZero - Creates an output vector and a scatter context used to
1128: copy all vector values into the output vector on the zeroth processor
1130: Collective on Vec
1132: Input Parameter:
1133: . vin - input MPIVEC
1135: Output Parameters:
1136: + ctx - scatter context
1137: - vout - output SEQVEC that is large enough to scatter into on processor 0 and
1138: of length zero on all other processors
1140: Level: intermediate
1142: Note: vout may be NULL [PETSC_NULL_VEC from fortran] if you do not
1143: need to have it created
1145: Usage:
1146: $ VecScatterCreateToZero(vin,&ctx,&vout);
1147: $
1148: $ // scatter as many times as you need
1149: $ VecScatterBegin(ctx,vin,vout,INSERT_VALUES,SCATTER_FORWARD);
1150: $ VecScatterEnd(ctx,vin,vout,INSERT_VALUES,SCATTER_FORWARD);
1151: $
1152: $ // destroy scatter context and local vector when no longer needed
1153: $ VecScatterDestroy(&ctx);
1154: $ VecDestroy(&vout);
1156: .seealso VecScatterCreate(), VecScatterCreateToAll(), VecScatterBegin(), VecScatterEnd()
1158: Do NOT create a vector and then pass it in as the final argument vout! vout is created by this routine
1159: automatically (unless you pass NULL in for that argument if you do not need it).
1161: @*/
1162: PetscErrorCode VecScatterCreateToZero(Vec vin,VecScatter *ctx,Vec *vout)
1163: {
1165: PetscInt N;
1166: PetscMPIInt rank;
1167: IS is;
1168: Vec tmp;
1169: Vec *tmpv;
1170: PetscBool tmpvout = PETSC_FALSE;
1171: VecType roottype;
1176: if (vout) {
1178: tmpv = vout;
1179: } else {
1180: tmpvout = PETSC_TRUE;
1181: tmpv = &tmp;
1182: }
1184: /* Create vec on each proc, with the same size of the original vec all on process 0 */
1185: VecGetSize(vin,&N);
1186: MPI_Comm_rank(PetscObjectComm((PetscObject)vin),&rank);
1187: if (rank) N = 0;
1188: VecGetRootType_Private(vin,&roottype);
1189: VecCreate(PETSC_COMM_SELF,tmpv);
1190: VecSetSizes(*tmpv,N,PETSC_DECIDE);
1191: VecSetType(*tmpv,roottype);
1192: /* Create the VecScatter ctx with the communication info */
1193: ISCreateStride(PETSC_COMM_SELF,N,0,1,&is);
1194: VecScatterCreate(vin,is,*tmpv,is,ctx);
1195: ISDestroy(&is);
1196: if (tmpvout) VecDestroy(tmpv);
1197: return 0;
1198: }
1200: /*@
1201: VecScatterBegin - Begins a generalized scatter from one vector to
1202: another. Complete the scattering phase with VecScatterEnd().
1204: Neighbor-wise Collective on VecScatter
1206: Input Parameters:
1207: + sf - scatter context generated by VecScatterCreate()
1208: . x - the vector from which we scatter
1209: . y - the vector to which we scatter
1210: . addv - either ADD_VALUES, MAX_VALUES, MIN_VALUES or INSERT_VALUES, with INSERT_VALUES mode any location
1211: not scattered to retains its old value; i.e. the vector is NOT first zeroed.
1212: - mode - the scattering mode, usually SCATTER_FORWARD. The available modes are:
1213: SCATTER_FORWARD or SCATTER_REVERSE
1215: Level: intermediate
1217: Options Database: See VecScatterCreate()
1219: Notes:
1220: The vectors x and y need not be the same vectors used in the call
1221: to VecScatterCreate(), but x must have the same parallel data layout
1222: as that passed in as the x to VecScatterCreate(), similarly for the y.
1223: Most likely they have been obtained from VecDuplicate().
1225: You cannot change the values in the input vector between the calls to VecScatterBegin()
1226: and VecScatterEnd().
1228: If you use SCATTER_REVERSE the two arguments x and y should be reversed, from
1229: the SCATTER_FORWARD.
1231: y[iy[i]] = x[ix[i]], for i=0,...,ni-1
1233: This scatter is far more general than the conventional
1234: scatter, since it can be a gather or a scatter or a combination,
1235: depending on the indices ix and iy. If x is a parallel vector and y
1236: is sequential, VecScatterBegin() can serve to gather values to a
1237: single processor. Similarly, if y is parallel and x sequential, the
1238: routine can scatter from one processor to many processors.
1240: .seealso: VecScatterCreate(), VecScatterEnd()
1241: @*/
1242: PetscErrorCode VecScatterBegin(VecScatter sf,Vec x,Vec y,InsertMode addv,ScatterMode mode)
1243: {
1244: PetscInt to_n,from_n;
1249: if (PetscDefined(USE_DEBUG)) {
1250: /*
1251: Error checking to make sure these vectors match the vectors used
1252: to create the vector scatter context. -1 in the from_n and to_n indicate the
1253: vector lengths are unknown (for example with mapped scatters) and thus
1254: no error checking is performed.
1255: */
1256: if (sf->vscat.from_n >= 0 && sf->vscat.to_n >= 0) {
1257: VecGetLocalSize(x,&from_n);
1258: VecGetLocalSize(y,&to_n);
1259: if (mode & SCATTER_REVERSE) {
1262: } else {
1265: }
1266: }
1267: }
1269: sf->vscat.logging = PETSC_TRUE;
1270: PetscLogEventBegin(VEC_ScatterBegin,sf,x,y,0);
1271: VecScatterBegin_Internal(sf,x,y,addv,mode);
1272: if (sf->vscat.beginandendtogether) {
1273: VecScatterEnd_Internal(sf,x,y,addv,mode);
1274: }
1275: PetscLogEventEnd(VEC_ScatterBegin,sf,x,y,0);
1276: sf->vscat.logging = PETSC_FALSE;
1277: return 0;
1278: }
1280: /*@
1281: VecScatterEnd - Ends a generalized scatter from one vector to another. Call
1282: after first calling VecScatterBegin().
1284: Neighbor-wise Collective on VecScatter
1286: Input Parameters:
1287: + sf - scatter context generated by VecScatterCreate()
1288: . x - the vector from which we scatter
1289: . y - the vector to which we scatter
1290: . addv - one of ADD_VALUES, MAX_VALUES, MIN_VALUES or INSERT_VALUES
1291: - mode - the scattering mode, usually SCATTER_FORWARD. The available modes are:
1292: SCATTER_FORWARD, SCATTER_REVERSE
1294: Level: intermediate
1296: Notes:
1297: If you use SCATTER_REVERSE the arguments x and y should be reversed, from the SCATTER_FORWARD.
1299: y[iy[i]] = x[ix[i]], for i=0,...,ni-1
1301: .seealso: VecScatterBegin(), VecScatterCreate()
1302: @*/
1303: PetscErrorCode VecScatterEnd(VecScatter sf,Vec x,Vec y,InsertMode addv,ScatterMode mode)
1304: {
1308: if (!sf->vscat.beginandendtogether) {
1309: sf->vscat.logging = PETSC_TRUE;
1310: PetscLogEventBegin(VEC_ScatterEnd,sf,x,y,0);
1311: VecScatterEnd_Internal(sf,x,y,addv,mode);
1312: PetscLogEventEnd(VEC_ScatterEnd,sf,x,y,0);
1313: sf->vscat.logging = PETSC_FALSE;
1314: }
1315: return 0;
1316: }