Actual source code: vscat.c
1: #include "petscsf.h"
2: #include "petscsystypes.h"
3: #include "petscvec.h"
4: #include <petsc/private/sfimpl.h>
5: #include <../src/vec/is/sf/impls/basic/sfbasic.h>
6: #include <../src/vec/is/sf/impls/basic/sfpack.h>
7: #include <petsc/private/vecimpl.h>
9: typedef enum {
10: IS_INVALID,
11: IS_GENERAL,
12: IS_BLOCK,
13: IS_STRIDE
14: } ISTypeID;
16: static inline PetscErrorCode ISGetTypeID_Private(IS is, ISTypeID *id)
17: {
18: PetscBool same;
20: PetscFunctionBegin;
21: *id = IS_INVALID;
22: PetscCall(PetscObjectTypeCompare((PetscObject)is, ISGENERAL, &same));
23: if (same) {
24: *id = IS_GENERAL;
25: goto functionend;
26: }
27: PetscCall(PetscObjectTypeCompare((PetscObject)is, ISBLOCK, &same));
28: if (same) {
29: *id = IS_BLOCK;
30: goto functionend;
31: }
32: PetscCall(PetscObjectTypeCompare((PetscObject)is, ISSTRIDE, &same));
33: if (same) {
34: *id = IS_STRIDE;
35: goto functionend;
36: }
37: functionend:
38: PetscFunctionReturn(PETSC_SUCCESS);
39: }
41: static PetscErrorCode VecScatterBegin_Internal(VecScatter sf, Vec x, Vec y, InsertMode addv, ScatterMode mode)
42: {
43: PetscSF wsf = NULL; /* either sf or its local part */
44: MPI_Op mop = MPI_OP_NULL;
45: PetscMPIInt size;
46: PetscMemType xmtype = PETSC_MEMTYPE_HOST, ymtype = PETSC_MEMTYPE_HOST;
48: PetscFunctionBegin;
49: if (x != y) PetscCall(VecLockReadPush(x));
50: PetscCall(VecGetArrayReadAndMemType(x, &sf->vscat.xdata, &xmtype));
51: PetscCall(VecGetArrayAndMemType(y, &sf->vscat.ydata, &ymtype));
52: PetscCall(VecLockWriteSet(y, PETSC_TRUE));
54: /* SCATTER_FORWARD_LOCAL indicates ignoring inter-process communication */
55: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
56: if ((mode & SCATTER_FORWARD_LOCAL) && size > 1) { /* Lazy creation of sf->vscat.lsf since SCATTER_FORWARD_LOCAL is uncommon */
57: if (!sf->vscat.lsf) PetscCall(PetscSFCreateLocalSF_Private(sf, &sf->vscat.lsf));
58: wsf = sf->vscat.lsf;
59: } else {
60: wsf = sf;
61: }
63: /* Note xdata/ydata is always recorded on sf (not lsf) above */
64: if (addv == INSERT_VALUES) mop = MPI_REPLACE;
65: else if (addv == ADD_VALUES) mop = MPIU_SUM; /* Petsc defines its own MPI datatype and SUM operation for __float128 etc. */
66: else if (addv == MAX_VALUES) mop = MPIU_MAX;
67: else if (addv == MIN_VALUES) mop = MPIU_MIN;
68: else SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "Unsupported InsertMode %d in VecScatterBegin/End", addv);
70: if (mode & SCATTER_REVERSE) { /* REVERSE indicates leaves to root scatter. Note that x and y are swapped in input */
71: PetscCall(PetscSFReduceWithMemTypeBegin(wsf, sf->vscat.unit, xmtype, sf->vscat.xdata, ymtype, sf->vscat.ydata, mop));
72: } else { /* FORWARD indicates x to y scatter, where x is root and y is leaf */
73: PetscCall(PetscSFBcastWithMemTypeBegin(wsf, sf->vscat.unit, xmtype, sf->vscat.xdata, ymtype, sf->vscat.ydata, mop));
74: }
75: PetscFunctionReturn(PETSC_SUCCESS);
76: }
78: static PetscErrorCode VecScatterEnd_Internal(VecScatter sf, Vec x, Vec y, InsertMode addv, ScatterMode mode)
79: {
80: PetscSF wsf = NULL;
81: MPI_Op mop = MPI_OP_NULL;
82: PetscMPIInt size;
84: PetscFunctionBegin;
85: /* SCATTER_FORWARD_LOCAL indicates ignoring inter-process communication */
86: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
87: wsf = ((mode & SCATTER_FORWARD_LOCAL) && size > 1) ? sf->vscat.lsf : sf;
89: if (addv == INSERT_VALUES) mop = MPI_REPLACE;
90: else if (addv == ADD_VALUES) mop = MPIU_SUM;
91: else if (addv == MAX_VALUES) mop = MPIU_MAX;
92: else if (addv == MIN_VALUES) mop = MPIU_MIN;
93: else SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "Unsupported InsertMode %d in VecScatterBegin/End", addv);
95: if (mode & SCATTER_REVERSE) { /* reverse scatter sends leaves to roots. Note that x and y are swapped in input */
96: PetscCall(PetscSFReduceEnd(wsf, sf->vscat.unit, sf->vscat.xdata, sf->vscat.ydata, mop));
97: } else { /* forward scatter sends roots to leaves, i.e., x to y */
98: PetscCall(PetscSFBcastEnd(wsf, sf->vscat.unit, sf->vscat.xdata, sf->vscat.ydata, mop));
99: }
101: PetscCall(VecRestoreArrayReadAndMemType(x, &sf->vscat.xdata));
102: if (x != y) PetscCall(VecLockReadPop(x));
103: PetscCall(VecRestoreArrayAndMemType(y, &sf->vscat.ydata));
104: PetscCall(VecLockWriteSet(y, PETSC_FALSE));
105: PetscFunctionReturn(PETSC_SUCCESS);
106: }
108: /* VecScatterRemap provides a light way to slightly modify a VecScatter. Suppose the input sf scatters
109: x[i] to y[j], tomap gives a plan to change vscat to scatter x[tomap[i]] to y[j]. Note that in SF,
110: x is roots. That means we need to change incoming stuffs such as bas->irootloc[].
111: */
112: static PetscErrorCode VecScatterRemap_Internal(VecScatter sf, const PetscInt *tomap, const PetscInt *frommap)
113: {
114: PetscInt i, bs = sf->vscat.bs;
115: PetscMPIInt size;
116: PetscBool ident = PETSC_TRUE, isbasic, isneighbor;
117: PetscSFType type;
118: PetscSF_Basic *bas = NULL;
120: PetscFunctionBegin;
121: /* check if it is an identity map. If it is, do nothing */
122: if (tomap) {
123: for (i = 0; i < sf->nroots * bs; i++) {
124: if (i != tomap[i]) {
125: ident = PETSC_FALSE;
126: break;
127: }
128: }
129: if (ident) PetscFunctionReturn(PETSC_SUCCESS);
130: }
131: PetscCheck(!frommap, PETSC_COMM_SELF, PETSC_ERR_SUP, "Unable to remap the FROM in scatters yet");
132: if (!tomap) PetscFunctionReturn(PETSC_SUCCESS);
134: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
136: /* Since the indices changed, we must also update the local SF. But we do not do it since
137: lsf is rarely used. We just destroy lsf and rebuild it on demand from updated sf.
138: */
139: if (sf->vscat.lsf) PetscCall(PetscSFDestroy(&sf->vscat.lsf));
141: PetscCall(PetscSFGetType(sf, &type));
142: PetscCall(PetscObjectTypeCompare((PetscObject)sf, PETSCSFBASIC, &isbasic));
143: PetscCall(PetscObjectTypeCompare((PetscObject)sf, PETSCSFNEIGHBOR, &isneighbor));
144: PetscCheck(isbasic || isneighbor, PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "VecScatterRemap on SF type %s is not supported", type);
146: PetscCall(PetscSFSetUp(sf)); /* to build sf->irootloc if SetUp is not yet called */
148: /* Root indices are going to be remapped. This is tricky for SF. Root indices are used in sf->rremote,
149: sf->remote and bas->irootloc. The latter one is cheap to remap, but the former two are not.
150: To remap them, we have to do a bcast from roots to leaves, to let leaves know their updated roots.
151: Since VecScatterRemap is supposed to be a cheap routine to adapt a vecscatter by only changing where
152: x[] data is taken, we do not remap sf->rremote, sf->remote. The consequence is that operations
153: accessing them (such as PetscSFCompose) may get stale info. Considering VecScatter does not need
154: that complicated SF operations, we do not remap sf->rremote, sf->remote, instead we destroy them
155: so that code accessing them (if any) will crash (instead of get silent errors). Note that BcastAndOp/Reduce,
156: which are used by VecScatter and only rely on bas->irootloc, are updated and correct.
157: */
158: sf->remote = NULL;
159: PetscCall(PetscFree(sf->remote_alloc));
160: /* Not easy to free sf->rremote since it was allocated with PetscMalloc4(), so just give it crazy values */
161: for (i = 0; i < sf->roffset[sf->nranks]; i++) sf->rremote[i] = PETSC_MIN_INT;
163: /* Indices in tomap[] are for each individual vector entry. But indices in sf are for each
164: block in the vector. So before the remapping, we have to expand indices in sf by bs, and
165: after the remapping, we have to shrink them back.
166: */
167: bas = (PetscSF_Basic *)sf->data;
168: for (i = 0; i < bas->ioffset[bas->niranks]; i++) bas->irootloc[i] = tomap[bas->irootloc[i] * bs] / bs;
169: #if defined(PETSC_HAVE_DEVICE)
170: /* Free the irootloc copy on device. We allocate a new copy and get the updated value on demand. See PetscSFLinkGetRootPackOptAndIndices() */
171: for (i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, bas->irootloc_d[i]));
172: #endif
173: /* Destroy and then rebuild root packing optimizations since indices are changed */
174: PetscCall(PetscSFResetPackFields(sf));
175: PetscCall(PetscSFSetUpPackFields(sf));
176: PetscFunctionReturn(PETSC_SUCCESS);
177: }
179: /*
180: Given a parallel VecScatter context, return number of procs and vector entries involved in remote (i.e., off-process) communication
182: Input Parameters:
183: + sf - the context (must be a parallel vecscatter)
184: - send - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)
186: Output parameters:
187: + num_procs - number of remote processors
188: - num_entries - number of vector entries to send or recv
190: Notes:
191: Sometimes PETSc internally needs to use the matrix-vector-multiply vecscatter context for other purposes. The client code
192: usually only uses MPI_Send/Recv. This group of subroutines provides info needed for such uses.
194: .seealso: [](sec_scatter), `VecScatterGetRemote_Private()`, `VecScatterGetRemoteOrdered_Private()`
195: */
196: PetscErrorCode VecScatterGetRemoteCount_Private(VecScatter sf, PetscBool send, PetscInt *num_procs, PetscInt *num_entries)
197: {
198: PetscInt nranks, remote_start;
199: PetscMPIInt rank;
200: const PetscInt *offset;
201: const PetscMPIInt *ranks;
203: PetscFunctionBegin;
204: PetscCall(PetscSFSetUp(sf));
205: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
207: /* This routine is mainly used for MatMult's Mvctx. In Mvctx, we scatter an MPI vector x to a sequential vector lvec.
208: Remember x is roots and lvec is leaves. 'send' means roots to leaves communication. If 'send' is true, we need to
209: get info about which ranks this processor needs to send to. In other words, we need to call PetscSFGetLeafRanks().
210: If send is false, we do the opposite, calling PetscSFGetRootRanks().
211: */
212: if (send) PetscCall(PetscSFGetLeafRanks(sf, &nranks, &ranks, &offset, NULL));
213: else PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, &offset, NULL, NULL));
214: if (nranks) {
215: remote_start = (rank == ranks[0]) ? 1 : 0;
216: if (num_procs) *num_procs = nranks - remote_start;
217: if (num_entries) *num_entries = offset[nranks] - offset[remote_start];
218: } else {
219: if (num_procs) *num_procs = 0;
220: if (num_entries) *num_entries = 0;
221: }
222: PetscFunctionReturn(PETSC_SUCCESS);
223: }
225: /* Given a parallel VecScatter context, return a plan that represents the remote communication.
226: Any output parameter can be NULL.
228: Input Parameters:
229: + sf - the context
230: - send - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)
232: Output parameters:
233: + n - number of remote processors
234: . starts - starting point in indices for each proc. ATTENTION: starts[0] is not necessarily zero.
235: Therefore, expressions like starts[i+1]-starts[i] and indices[starts[i]+j] work as
236: expected for a CSR structure but buf[starts[i]+j] may be out of range if buf was allocated
237: with length starts[n]-starts[0]. One should use buf[starts[i]-starts[0]+j] instead.
238: . indices - indices of entries to send/recv
239: . procs - ranks of remote processors
240: - bs - block size
242: .seealso: `VecScatterRestoreRemote_Private()`, `VecScatterGetRemoteOrdered_Private()`
243: */
244: PetscErrorCode VecScatterGetRemote_Private(VecScatter sf, PetscBool send, PetscInt *n, const PetscInt **starts, const PetscInt **indices, const PetscMPIInt **procs, PetscInt *bs)
245: {
246: PetscInt nranks, remote_start;
247: PetscMPIInt rank;
248: const PetscInt *offset, *location;
249: const PetscMPIInt *ranks;
251: PetscFunctionBegin;
252: PetscCall(PetscSFSetUp(sf));
253: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
255: if (send) PetscCall(PetscSFGetLeafRanks(sf, &nranks, &ranks, &offset, &location));
256: else PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, &offset, &location, NULL));
258: if (nranks) {
259: remote_start = (rank == ranks[0]) ? 1 : 0;
260: if (n) *n = nranks - remote_start;
261: if (starts) *starts = &offset[remote_start];
262: if (indices) *indices = location; /* not &location[offset[remote_start]]. Starts[0] may point to the middle of indices[] */
263: if (procs) *procs = &ranks[remote_start];
264: } else {
265: if (n) *n = 0;
266: if (starts) *starts = NULL;
267: if (indices) *indices = NULL;
268: if (procs) *procs = NULL;
269: }
271: if (bs) *bs = 1;
272: PetscFunctionReturn(PETSC_SUCCESS);
273: }
275: /* Given a parallel VecScatter context, return a plan that represents the remote communication. Ranks of remote
276: processors returned in procs must be sorted in ascending order. Any output parameter can be NULL.
278: Input Parameters:
279: + sf - the context
280: - send - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)
282: Output parameters:
283: + n - number of remote processors
284: . starts - starting point in indices for each proc. ATTENTION: starts[0] is not necessarily zero.
285: Therefore, expressions like starts[i+1]-starts[i] and indices[starts[i]+j] work as
286: expected for a CSR structure but buf[starts[i]+j] may be out of range if buf was allocated
287: with length starts[n]-starts[0]. One should use buf[starts[i]-starts[0]+j] instead.
288: . indices - indices of entries to send/recv
289: . procs - ranks of remote processors
290: - bs - block size
292: Notes:
293: Output parameters like starts, indices must also be adapted according to the sorted ranks.
295: .seealso: `VecScatterRestoreRemoteOrdered_Private()`, `VecScatterGetRemote_Private()`
296: */
297: PetscErrorCode VecScatterGetRemoteOrdered_Private(VecScatter sf, PetscBool send, PetscInt *n, const PetscInt **starts, const PetscInt **indices, const PetscMPIInt **procs, PetscInt *bs)
298: {
299: PetscFunctionBegin;
300: PetscCall(VecScatterGetRemote_Private(sf, send, n, starts, indices, procs, bs));
301: if (PetscUnlikelyDebug(n && procs)) {
302: PetscInt i;
303: /* from back to front to also handle cases *n=0 */
304: for (i = *n - 1; i > 0; i--) PetscCheck((*procs)[i - 1] <= (*procs)[i], PETSC_COMM_SELF, PETSC_ERR_PLIB, "procs[] are not ordered");
305: }
306: PetscFunctionReturn(PETSC_SUCCESS);
307: }
309: /* Given a parallel VecScatter context, restore the plan returned by VecScatterGetRemote_Private. This gives a chance for
310: an implementation to free memory allocated in the VecScatterGetRemote_Private call.
312: Input Parameters:
313: + sf - the context
314: - send - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)
316: Output parameters:
317: + n - number of remote processors
318: . starts - starting point in indices for each proc
319: . indices - indices of entries to send/recv
320: . procs - ranks of remote processors
321: - bs - block size
323: .seealso: `VecScatterGetRemote_Private()`
324: */
325: PetscErrorCode VecScatterRestoreRemote_Private(VecScatter sf, PetscBool send, PetscInt *n, const PetscInt **starts, const PetscInt **indices, const PetscMPIInt **procs, PetscInt *bs)
326: {
327: PetscFunctionBegin;
328: if (starts) *starts = NULL;
329: if (indices) *indices = NULL;
330: if (procs) *procs = NULL;
331: PetscFunctionReturn(PETSC_SUCCESS);
332: }
334: /* Given a parallel VecScatter context, restore the plan returned by VecScatterGetRemoteOrdered_Private. This gives a chance for
335: an implementation to free memory allocated in the VecScatterGetRemoteOrdered_Private call.
337: Input Parameters:
338: + sf - the context
339: - send - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)
341: Output parameters:
342: + n - number of remote processors
343: . starts - starting point in indices for each proc
344: . indices - indices of entries to send/recv
345: . procs - ranks of remote processors
346: - bs - block size
348: .seealso: `VecScatterGetRemoteOrdered_Private()`
349: */
350: PetscErrorCode VecScatterRestoreRemoteOrdered_Private(VecScatter sf, PetscBool send, PetscInt *n, const PetscInt **starts, const PetscInt **indices, const PetscMPIInt **procs, PetscInt *bs)
351: {
352: PetscFunctionBegin;
353: PetscCall(VecScatterRestoreRemote_Private(sf, send, n, starts, indices, procs, bs));
354: PetscFunctionReturn(PETSC_SUCCESS);
355: }
357: /*@
358: VecScatterSetUp - Sets up the `VecScatter` to be able to actually scatter information between vectors
360: Collective
362: Input Parameter:
363: . sf - the scatter context
365: Level: intermediate
367: .seealso: [](sec_scatter), `VecScatter`, `VecScatterCreate()`, `VecScatterCopy()`
368: @*/
369: PetscErrorCode VecScatterSetUp(VecScatter sf)
370: {
371: PetscFunctionBegin;
372: PetscCall(PetscSFSetUp(sf));
373: PetscFunctionReturn(PETSC_SUCCESS);
374: }
376: /*@C
377: VecScatterSetType - Builds a vector scatter, for a particular vector scatter implementation.
379: Collective
381: Input Parameters:
382: + sf - The `VecScatter` object
383: - type - The name of the vector scatter type
385: Options Database Key:
386: . -sf_type <type> - Sets the `VecScatterType`
388: Level: intermediate
390: Note:
391: Use `VecScatterDuplicate()` to form additional vectors scatter of the same type as an existing vector scatter.
393: .seealso: [](sec_scatter), `VecScatter`, `VecScatterType`, `VecScatterGetType()`, `VecScatterCreate()`
394: @*/
395: PetscErrorCode VecScatterSetType(VecScatter sf, VecScatterType type)
396: {
397: PetscFunctionBegin;
398: PetscCall(PetscSFSetType(sf, type));
399: PetscFunctionReturn(PETSC_SUCCESS);
400: }
402: /*@C
403: VecScatterGetType - Gets the vector scatter type name (as a string) from the `VecScatter`.
405: Not Collective
407: Input Parameter:
408: . sf - The vector scatter
410: Output Parameter:
411: . type - The vector scatter type name
413: Level: intermediate
415: .seealso: [](sec_scatter), `VecScatter`, `VecScatterType`, `VecScatterSetType()`, `VecScatterCreate()`
416: @*/
417: PetscErrorCode VecScatterGetType(VecScatter sf, VecScatterType *type)
418: {
419: PetscFunctionBegin;
420: PetscCall(PetscSFGetType(sf, type));
421: PetscFunctionReturn(PETSC_SUCCESS);
422: }
424: /*@C
425: VecScatterRegister - Adds a new vector scatter component implementation
427: Not Collective
429: Input Parameters:
430: + sname - The name of a new user-defined creation routine
431: - function - The creation routine
433: Level: advanced
435: .seealso: [](sec_scatter), `VecScatter`, `VecScatterType`, `VecRegister()`
436: @*/
437: PetscErrorCode VecScatterRegister(const char sname[], PetscErrorCode (*function)(VecScatter))
438: {
439: PetscFunctionBegin;
440: PetscCall(PetscSFRegister(sname, function));
441: PetscFunctionReturn(PETSC_SUCCESS);
442: }
444: /* ------------------------------------------------------------------*/
445: /*@
446: VecScatterGetMerged - Returns true if the scatter is completed in the `VecScatterBegin()`
447: and the `VecScatterEnd()` does nothing
449: Not Collective
451: Input Parameter:
452: . sf - scatter context created with `VecScatterCreate()`
454: Output Parameter:
455: . flg - `PETSC_TRUE` if the `VecScatterBegin()`/`VecScatterEnd()` are all done during the `VecScatterBegin()`
457: Level: developer
459: .seealso: [](sec_scatter), `VecScatter`, `VecScatterCreate()`, `VecScatterEnd()`, `VecScatterBegin()`
460: @*/
461: PetscErrorCode VecScatterGetMerged(VecScatter sf, PetscBool *flg)
462: {
463: PetscFunctionBegin;
465: if (flg) *flg = sf->vscat.beginandendtogether;
466: PetscFunctionReturn(PETSC_SUCCESS);
467: }
468: /*@C
469: VecScatterDestroy - Destroys a scatter context created by `VecScatterCreate()`
471: Collective
473: Input Parameter:
474: . sf - the scatter context
476: Level: intermediate
478: .seealso: [](sec_scatter), `VecScatter`, `VecScatterCreate()`, `VecScatterCopy()`
479: @*/
480: PetscErrorCode VecScatterDestroy(VecScatter *sf)
481: {
482: PetscFunctionBegin;
483: PetscCall(PetscSFDestroy(sf));
484: PetscFunctionReturn(PETSC_SUCCESS);
485: }
487: /*@
488: VecScatterCopy - Makes a copy of a scatter context.
490: Collective
492: Input Parameter:
493: . sf - the scatter context
495: Output Parameter:
496: . newsf - the context copy
498: Level: advanced
500: .seealso: [](sec_scatter), `VecScatter`, `VecScatterType`, `VecScatterCreate()`, `VecScatterDestroy()`
501: @*/
502: PetscErrorCode VecScatterCopy(VecScatter sf, VecScatter *newsf)
503: {
504: PetscFunctionBegin;
505: PetscAssertPointer(newsf, 2);
506: PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_GRAPH, newsf));
507: PetscCall(PetscSFSetUp(*newsf));
508: PetscFunctionReturn(PETSC_SUCCESS);
509: }
511: /*@C
512: VecScatterViewFromOptions - View a `VecScatter` object based on values in the options database
514: Collective
516: Input Parameters:
517: + sf - the scatter context
518: . obj - Optional object
519: - name - command line option
521: Level: intermediate
523: Note:
524: See `PetscObjectViewFromOptions()` for available `PetscViewer` and `PetscViewerFormat` values
526: .seealso: [](sec_scatter), `VecScatter`, `VecScatterView()`, `PetscObjectViewFromOptions()`, `VecScatterCreate()`
527: @*/
528: PetscErrorCode VecScatterViewFromOptions(VecScatter sf, PetscObject obj, const char name[])
529: {
530: PetscFunctionBegin;
532: PetscCall(PetscObjectViewFromOptions((PetscObject)sf, obj, name));
533: PetscFunctionReturn(PETSC_SUCCESS);
534: }
536: /* ------------------------------------------------------------------*/
537: /*@C
538: VecScatterView - Views a vector scatter context.
540: Collective
542: Input Parameters:
543: + sf - the scatter context
544: - viewer - the viewer for displaying the context
546: Level: intermediate
548: .seealso: [](sec_scatter), `VecScatter`, `PetscViewer`, `VecScatterViewFromOptions()`, `PetscObjectViewFromOptions()`, `VecScatterCreate()`
549: @*/
550: PetscErrorCode VecScatterView(VecScatter sf, PetscViewer viewer)
551: {
552: PetscFunctionBegin;
553: PetscCall(PetscSFView(sf, viewer));
554: PetscFunctionReturn(PETSC_SUCCESS);
555: }
557: /*@C
558: VecScatterRemap - Remaps the "from" and "to" indices in a
559: vector scatter context.
561: Collective
563: Input Parameters:
564: + sf - vector scatter context
565: . tomap - remapping plan for "to" indices (may be `NULL`).
566: - frommap - remapping plan for "from" indices (may be `NULL`)
568: Level: developer
570: Notes:
571: In the parallel case the todata contains indices from where the data is taken
572: (and then sent to others)! The fromdata contains indices from where the received
573: data is finally put locally.
575: In the sequential case the todata contains indices from where the data is put
576: and the fromdata contains indices from where the data is taken from.
577: This is backwards from the parallel case!
579: .seealso: [](sec_scatter), `VecScatter`, `VecScatterCreate()`
580: @*/
581: PetscErrorCode VecScatterRemap(VecScatter sf, PetscInt tomap[], PetscInt frommap[])
582: {
583: PetscFunctionBegin;
584: if (tomap) PetscAssertPointer(tomap, 2);
585: if (frommap) PetscAssertPointer(frommap, 3);
586: PetscCall(VecScatterRemap_Internal(sf, tomap, frommap));
587: PetscCheck(!frommap, PETSC_COMM_SELF, PETSC_ERR_SUP, "Unable to remap the FROM in scatters yet");
588: /* Mark then vector lengths as unknown because we do not know the lengths of the remapped vectors */
589: sf->vscat.from_n = -1;
590: sf->vscat.to_n = -1;
591: PetscFunctionReturn(PETSC_SUCCESS);
592: }
594: /*@
595: VecScatterSetFromOptions - Configures the vector scatter from values in the options database.
597: Collective
599: Input Parameter:
600: . sf - The vector scatter
602: Notes:
603: To see all options, run your program with the -help option, or consult the users manual.
605: Must be called before `VecScatterSetUp()` and before the vector scatter is used.
607: Level: beginner
609: .seealso: [](sec_scatter), `VecScatter`, `VecScatterCreate()`, `VecScatterDestroy()`, `VecScatterSetUp()`
610: @*/
611: PetscErrorCode VecScatterSetFromOptions(VecScatter sf)
612: {
613: PetscFunctionBegin;
615: PetscObjectOptionsBegin((PetscObject)sf);
617: sf->vscat.beginandendtogether = PETSC_FALSE;
618: PetscCall(PetscOptionsBool("-vecscatter_merge", "Use combined (merged) vector scatter begin and end", "VecScatterCreate", sf->vscat.beginandendtogether, &sf->vscat.beginandendtogether, NULL));
619: if (sf->vscat.beginandendtogether) PetscCall(PetscInfo(sf, "Using combined (merged) vector scatter begin and end\n"));
620: PetscOptionsEnd();
621: PetscFunctionReturn(PETSC_SUCCESS);
622: }
624: /*@
625: VecScatterCreate - Creates a vector scatter context.
627: Collective
629: Input Parameters:
630: + x - a vector that defines the shape (parallel data layout of the vector) of vectors from
631: which we scatter
632: . y - a vector that defines the shape (parallel data layout of the vector) of vectors to which
633: we scatter
634: . ix - the indices of xin to scatter (if `NULL` scatters all values)
635: - iy - the indices of yin to hold results (if `NULL` fills entire vector `yin` in order)
637: Output Parameter:
638: . newsf - location to store the new scatter context
640: Options Database Keys:
641: + -vecscatter_view - Prints detail of communications
642: . -vecscatter_view ::ascii_info - Print less details about communication
643: - -vecscatter_merge - `VecScatterBegin()` handles all of the communication, `VecScatterEnd()` is a nop
644: eliminates the chance for overlap of computation and communication
646: Level: intermediate
648: Notes:
649: If both `xin` and `yin` are parallel, their communicator must be on the same
650: set of processes, but their process order can be different.
651: In calls to the scatter options you can use different vectors than the `xin` and
652: `yin` you used above; BUT they must have the same parallel data layout, for example,
653: they could be obtained from `VecDuplicate()`.
654: A `VecScatter` context CANNOT be used in two or more simultaneous scatters;
655: that is you cannot call a second `VecScatterBegin()` with the same scatter
656: context until the `VecScatterEnd()` has been called on the first `VecScatterBegin()`.
657: In this case a separate `VecScatter` is needed for each concurrent scatter.
659: Both `ix` and `iy` cannot be `NULL` at the same time.
661: Use `VecScatterCreateToAll()` to create a vecscatter that copies an MPI vector to sequential vectors on all MPI ranks.
662: Use `VecScatterCreateToZero()` to create a vecscatter that copies an MPI vector to a sequential vector on MPI rank 0.
663: These special vecscatters have better performance than general ones.
665: .seealso: [](sec_scatter), `VecScatter`, `VecScatterDestroy()`, `VecScatterCreateToAll()`, `VecScatterCreateToZero()`, `PetscSFCreate()`
666: @*/
667: PetscErrorCode VecScatterCreate(Vec x, IS ix, Vec y, IS iy, VecScatter *newsf)
668: {
669: MPI_Comm xcomm, ycomm, bigcomm;
670: Vec xx, yy;
671: IS ix_old = ix, iy_old = iy, ixx, iyy;
672: PetscMPIInt xcommsize, ycommsize, rank, result;
673: PetscInt i, n, N, nroots, nleaves, *ilocal, xstart, ystart, ixsize, iysize, xlen, ylen;
674: const PetscInt *xindices, *yindices;
675: PetscSFNode *iremote;
676: PetscLayout xlayout, ylayout;
677: ISTypeID ixid, iyid;
678: PetscInt bs, bsx, bsy, min, max, m[2], mg[2], ixfirst, ixstep, iyfirst, iystep;
679: PetscBool can_do_block_opt = PETSC_FALSE;
680: PetscSF sf;
682: PetscFunctionBegin;
683: PetscAssertPointer(newsf, 5);
684: PetscCheck(ix || iy, PetscObjectComm((PetscObject)x), PETSC_ERR_SUP, "Cannot pass default in for both input and output indices");
686: /* Get comm from x and y */
687: PetscCall(PetscObjectGetComm((PetscObject)x, &xcomm));
688: PetscCallMPI(MPI_Comm_size(xcomm, &xcommsize));
689: PetscCall(PetscObjectGetComm((PetscObject)y, &ycomm));
690: PetscCallMPI(MPI_Comm_size(ycomm, &ycommsize));
691: if (xcommsize > 1 && ycommsize > 1) {
692: PetscCallMPI(MPI_Comm_compare(xcomm, ycomm, &result));
693: PetscCheck(result != MPI_UNEQUAL, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "VecScatterCreate: parallel vectors x and y must have identical/congruent/similar communicators");
694: }
695: bs = 1; /* default, no blocking */
697: /*
698: Let P and S stand for parallel and sequential vectors respectively. There are four combinations of vecscatters: PtoP, PtoS,
699: StoP and StoS. The assumption of VecScatterCreate(Vec x,IS ix,Vec y,IS iy,VecScatter *newctx) is: if x is parallel, then ix
700: contains global indices of x. If x is sequential, ix contains local indices of x. Similarly for y and iy.
702: SF builds around concepts of local leaves and remote roots. We treat source vector x as roots and destination vector y as
703: leaves. A PtoS scatter can be naturally mapped to SF. We transform PtoP and StoP to PtoS, and treat StoS as trivial PtoS.
704: */
706: /* NULL ix or iy in VecScatterCreate(x,ix,y,iy,newctx) has special meaning. Recover them for these cases */
707: if (!ix) {
708: if (xcommsize > 1 && ycommsize == 1) { /* PtoS: null ix means the whole x will be scattered to each seq y */
709: PetscCall(VecGetSize(x, &N));
710: PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &ix));
711: } else { /* PtoP, StoP or StoS: null ix means the whole local part of x will be scattered */
712: PetscCall(VecGetLocalSize(x, &n));
713: PetscCall(VecGetOwnershipRange(x, &xstart, NULL));
714: PetscCall(ISCreateStride(PETSC_COMM_SELF, n, xstart, 1, &ix));
715: }
716: }
718: if (!iy) {
719: if (xcommsize == 1 && ycommsize > 1) { /* StoP: null iy means the whole y will be scattered to from each seq x */
720: PetscCall(VecGetSize(y, &N));
721: PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iy));
722: } else { /* PtoP, StoP or StoS: null iy means the whole local part of y will be scattered to */
723: PetscCall(VecGetLocalSize(y, &n));
724: PetscCall(VecGetOwnershipRange(y, &ystart, NULL));
725: PetscCall(ISCreateStride(PETSC_COMM_SELF, n, ystart, 1, &iy));
726: }
727: }
729: /* Do error checking immediately after we have non-empty ix, iy */
730: PetscCall(ISGetLocalSize(ix, &ixsize));
731: PetscCall(ISGetLocalSize(iy, &iysize));
732: PetscCall(VecGetSize(x, &xlen));
733: PetscCall(VecGetSize(y, &ylen));
734: PetscCheck(ixsize == iysize, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Scatter sizes of ix and iy don't match locally ix=%" PetscInt_FMT " iy=%" PetscInt_FMT, ixsize, iysize);
735: PetscCall(ISGetMinMax(ix, &min, &max));
736: PetscCheck(min >= 0 && max < xlen, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Scatter indices in ix are out of range: found [%" PetscInt_FMT ",%" PetscInt_FMT "), expected in [0,%" PetscInt_FMT ")", min, max, xlen);
737: PetscCall(ISGetMinMax(iy, &min, &max));
738: PetscCheck(min >= 0 && max < ylen, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Scatter indices in iy are out of range: found [%" PetscInt_FMT ",%" PetscInt_FMT "), expected in [0,%" PetscInt_FMT ")", min, max, ylen);
740: /* Extract info about ix, iy for further test */
741: PetscCall(ISGetTypeID_Private(ix, &ixid));
742: PetscCall(ISGetTypeID_Private(iy, &iyid));
743: if (ixid == IS_BLOCK) PetscCall(ISGetBlockSize(ix, &bsx));
744: else if (ixid == IS_STRIDE) PetscCall(ISStrideGetInfo(ix, &ixfirst, &ixstep));
746: if (iyid == IS_BLOCK) PetscCall(ISGetBlockSize(iy, &bsy));
747: else if (iyid == IS_STRIDE) PetscCall(ISStrideGetInfo(iy, &iyfirst, &iystep));
749: /* Check if a PtoS is special ToAll/ToZero scatters, which can be results of VecScatterCreateToAll/Zero.
750: ToAll means a whole MPI vector is copied to a seq vector on every process. ToZero means a whole MPI
751: vector is copied to a seq vector on rank 0 and other processes do nothing(i.e.,they input empty ix,iy).
753: We can optimize these scatters with MPI collectives. We can also avoid costly analysis used for general scatters.
754: */
755: if (xcommsize > 1 && ycommsize == 1) { /* Ranks do not diverge at this if-test */
756: PetscInt pattern[2] = {0, 0}; /* A boolean array with pattern[0] for allgather-like (ToAll) and pattern[1] for gather-like (ToZero) */
757: PetscLayout map;
759: PetscCallMPI(MPI_Comm_rank(xcomm, &rank));
760: PetscCall(VecGetLayout(x, &map));
761: if (rank == 0) {
762: if (ixid == IS_STRIDE && iyid == IS_STRIDE && ixsize == xlen && ixfirst == 0 && ixstep == 1 && iyfirst == 0 && iystep == 1) {
763: /* Rank 0 scatters the whole mpi x to seq y, so it is either a ToAll or a ToZero candidate in its view */
764: pattern[0] = pattern[1] = 1;
765: }
766: } else {
767: if (ixid == IS_STRIDE && iyid == IS_STRIDE && ixsize == xlen && ixfirst == 0 && ixstep == 1 && iyfirst == 0 && iystep == 1) {
768: /* Other ranks also scatter the whole mpi x to seq y, so it is a ToAll candidate in their view */
769: pattern[0] = 1;
770: } else if (ixsize == 0) {
771: /* Other ranks do nothing, so it is a ToZero candidate */
772: pattern[1] = 1;
773: }
774: }
776: /* One stone (the expensive allreduce) two birds: pattern[] tells if it is ToAll or ToZero */
777: PetscCall(MPIU_Allreduce(MPI_IN_PLACE, pattern, 2, MPIU_INT, MPI_LAND, xcomm));
779: if (pattern[0] || pattern[1]) {
780: PetscCall(PetscSFCreate(xcomm, &sf));
781: PetscCall(PetscSFSetFromOptions(sf));
782: PetscCall(PetscSFSetGraphWithPattern(sf, map, pattern[0] ? PETSCSF_PATTERN_ALLGATHER : PETSCSF_PATTERN_GATHER));
783: goto functionend; /* No further analysis needed. What a big win! */
784: }
785: }
787: /* Continue ...
788: Do block optimization by taking advantage of high level info available in ix, iy.
789: The block optimization is valid when all of the following conditions are met:
790: 1) ix, iy are blocked or can be blocked (i.e., strided with step=1);
791: 2) ix, iy have the same block size;
792: 3) all processors agree on one block size;
793: 4) no blocks span more than one process;
794: */
795: bigcomm = (xcommsize == 1) ? ycomm : xcomm;
797: /* Processors could go through different path in this if-else test */
798: m[0] = PETSC_MAX_INT;
799: m[1] = PETSC_MIN_INT;
800: if (ixid == IS_BLOCK && iyid == IS_BLOCK) {
801: m[0] = PetscMin(bsx, bsy);
802: m[1] = PetscMax(bsx, bsy);
803: } else if (ixid == IS_BLOCK && iyid == IS_STRIDE && iystep == 1 && iyfirst % bsx == 0) {
804: m[0] = bsx;
805: m[1] = bsx;
806: } else if (ixid == IS_STRIDE && iyid == IS_BLOCK && ixstep == 1 && ixfirst % bsy == 0) {
807: m[0] = bsy;
808: m[1] = bsy;
809: }
810: /* Get max and min of bsx,bsy over all processes in one allreduce */
811: PetscCall(PetscGlobalMinMaxInt(bigcomm, m, mg));
813: /* Since we used allreduce above, all ranks will have the same min and max. min==max
814: implies all ranks have the same bs. Do further test to see if local vectors are dividable
815: by bs on ALL ranks. If they are, we are ensured that no blocks span more than one processor.
816: */
817: if (mg[0] == mg[1] && mg[0] > 1) {
818: PetscCall(VecGetLocalSize(x, &xlen));
819: PetscCall(VecGetLocalSize(y, &ylen));
820: m[0] = xlen % mg[0];
821: m[1] = ylen % mg[0];
822: PetscCall(MPIU_Allreduce(MPI_IN_PLACE, m, 2, MPIU_INT, MPI_LOR, bigcomm));
823: if (!m[0] && !m[1]) can_do_block_opt = PETSC_TRUE;
824: }
826: /* If can_do_block_opt, then shrink x, y, ix and iy by bs to get xx, yy, ixx and iyy, whose indices
827: and layout are actually used in building SF. Suppose blocked ix representing {0,1,2,6,7,8} has
828: indices {0,2} and bs=3, then ixx = {0,2}; suppose strided iy={3,4,5,6,7,8}, then iyy={1,2}.
830: xx is a little special. If x is seq, then xx is the concatenation of seq x's on ycomm. In this way,
831: we can treat PtoP and StoP uniformly as PtoS.
832: */
833: if (can_do_block_opt) {
834: const PetscInt *indices;
836: /* Shrink x and ix */
837: bs = mg[0];
838: PetscCall(VecCreateMPIWithArray(bigcomm, 1, xlen / bs, PETSC_DECIDE, NULL, &xx)); /* We only care xx's layout */
839: if (ixid == IS_BLOCK) {
840: PetscCall(ISBlockGetIndices(ix, &indices));
841: PetscCall(ISBlockGetLocalSize(ix, &ixsize));
842: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ixsize, indices, PETSC_COPY_VALUES, &ixx));
843: PetscCall(ISBlockRestoreIndices(ix, &indices));
844: } else { /* ixid == IS_STRIDE */
845: PetscCall(ISGetLocalSize(ix, &ixsize));
846: PetscCall(ISCreateStride(PETSC_COMM_SELF, ixsize / bs, ixfirst / bs, 1, &ixx));
847: }
849: /* Shrink y and iy */
850: PetscCall(VecCreateMPIWithArray(ycomm, 1, ylen / bs, PETSC_DECIDE, NULL, &yy));
851: if (iyid == IS_BLOCK) {
852: PetscCall(ISBlockGetIndices(iy, &indices));
853: PetscCall(ISBlockGetLocalSize(iy, &iysize));
854: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, iysize, indices, PETSC_COPY_VALUES, &iyy));
855: PetscCall(ISBlockRestoreIndices(iy, &indices));
856: } else { /* iyid == IS_STRIDE */
857: PetscCall(ISGetLocalSize(iy, &iysize));
858: PetscCall(ISCreateStride(PETSC_COMM_SELF, iysize / bs, iyfirst / bs, 1, &iyy));
859: }
860: } else {
861: ixx = ix;
862: iyy = iy;
863: yy = y;
864: if (xcommsize == 1) PetscCall(VecCreateMPIWithArray(bigcomm, 1, xlen, PETSC_DECIDE, NULL, &xx));
865: else xx = x;
866: }
868: /* Now it is ready to build SF with preprocessed (xx, yy) and (ixx, iyy) */
869: PetscCall(ISGetIndices(ixx, &xindices));
870: PetscCall(ISGetIndices(iyy, &yindices));
871: PetscCall(VecGetLayout(xx, &xlayout));
873: if (ycommsize > 1) {
874: /* PtoP or StoP */
876: /* Below is a piece of complex code with a very simple goal: move global index pairs (xindices[i], yindices[i]),
877: to owner process of yindices[i] according to ylayout, i = 0..n.
879: I did it through a temp sf, but later I thought the old design was inefficient and also distorted log view.
880: We want to map one VecScatterCreate() call to one PetscSFCreate() call. The old design mapped to three
881: PetscSFCreate() calls. This code is on critical path of VecScatterSetUp and is used by every VecScatterCreate.
882: So I commented it out and did another optimized implementation. The commented code is left here for reference.
883: */
884: #if 0
885: const PetscInt *degree;
886: PetscSF tmpsf;
887: PetscInt inedges=0,*leafdata,*rootdata;
889: PetscCall(VecGetOwnershipRange(xx,&xstart,NULL));
890: PetscCall(VecGetLayout(yy,&ylayout));
891: PetscCall(VecGetOwnershipRange(yy,&ystart,NULL));
893: PetscCall(VecGetLocalSize(yy,&nroots));
894: PetscCall(ISGetLocalSize(iyy,&nleaves));
895: PetscCall(PetscMalloc2(nleaves,&iremote,nleaves*2,&leafdata));
897: for (i=0; i<nleaves; i++) {
898: PetscCall(PetscLayoutFindOwnerIndex(ylayout,yindices[i],&iremote[i].rank,&iremote[i].index));
899: leafdata[2*i] = yindices[i];
900: leafdata[2*i+1] = (xcommsize > 1)? xindices[i] : xindices[i] + xstart;
901: }
903: PetscCall(PetscSFCreate(ycomm,&tmpsf));
904: PetscCall(PetscSFSetGraph(tmpsf,nroots,nleaves,NULL,PETSC_USE_POINTER,iremote,PETSC_USE_POINTER));
906: PetscCall(PetscSFComputeDegreeBegin(tmpsf,°ree));
907: PetscCall(PetscSFComputeDegreeEnd(tmpsf,°ree));
909: for (i=0; i<nroots; i++) inedges += degree[i];
910: PetscCall(PetscMalloc1(inedges*2,&rootdata));
911: PetscCall(PetscSFGatherBegin(tmpsf,MPIU_2INT,leafdata,rootdata));
912: PetscCall(PetscSFGatherEnd(tmpsf,MPIU_2INT,leafdata,rootdata));
914: PetscCall(PetscFree2(iremote,leafdata));
915: PetscCall(PetscSFDestroy(&tmpsf));
917: /* rootdata contains global index pairs (i, j). j's are owned by the current process, but i's can point to anywhere.
918: We convert j to local, and convert i to (rank, index). In the end, we get an PtoS suitable for building SF.
919: */
920: nleaves = inedges;
921: PetscCall(VecGetLocalSize(xx,&nroots));
922: PetscCall(PetscMalloc1(nleaves,&ilocal));
923: PetscCall(PetscMalloc1(nleaves,&iremote));
925: for (i=0; i<inedges; i++) {
926: ilocal[i] = rootdata[2*i] - ystart; /* convert y's global index to local index */
927: PetscCall(PetscLayoutFindOwnerIndex(xlayout,rootdata[2*i+1],&iremote[i].rank,&iremote[i].index)); /* convert x's global index to (rank, index) */
928: }
929: PetscCall(PetscFree(rootdata));
930: #else
931: PetscInt j, k, n, disp, rlentotal, *sstart, *xindices_sorted, *yindices_sorted;
932: const PetscInt *yrange;
933: PetscMPIInt nsend, nrecv, nreq, yrank, *sendto, *recvfrom, tag1, tag2;
934: PetscInt *slens, *rlens, count;
935: PetscInt *rxindices, *ryindices;
936: MPI_Request *reqs, *sreqs, *rreqs = NULL;
938: /* Sorting makes code simpler, faster and also helps getting rid of many O(P) arrays, which hurt scalability at large scale
939: yindices_sorted - sorted yindices
940: xindices_sorted - xindices sorted along with yindces
941: */
942: PetscCall(ISGetLocalSize(ixx, &n)); /*ixx, iyy have the same local size */
943: PetscCall(PetscMalloc2(n, &xindices_sorted, n, &yindices_sorted));
944: PetscCall(PetscArraycpy(xindices_sorted, xindices, n));
945: PetscCall(PetscArraycpy(yindices_sorted, yindices, n));
946: PetscCall(PetscSortIntWithArray(n, yindices_sorted, xindices_sorted));
947: PetscCall(VecGetOwnershipRange(xx, &xstart, NULL));
948: if (xcommsize == 1) {
949: for (i = 0; i < n; i++) xindices_sorted[i] += xstart;
950: } /* Convert to global indices */
952: /*
953: Calculate info about messages I need to send
954: nsend - number of non-empty messages to send
955: sendto - [nsend] ranks I will send messages to
956: sstart - [nsend+1] sstart[i] is the start index in xsindices_sorted[] I send to rank sendto[i]
957: slens - [ycommsize] I want to send slens[i] entries to rank i.
958: */
959: PetscCall(VecGetLayout(yy, &ylayout));
960: PetscCall(PetscLayoutGetRanges(ylayout, &yrange));
961: PetscCall(PetscCalloc1(ycommsize, &slens)); /* The only O(P) array in this algorithm */
963: i = j = nsend = 0;
964: while (i < n) {
965: if (yindices_sorted[i] >= yrange[j + 1]) { /* If i-th index is out of rank j's bound */
966: do {
967: j++;
968: } while (yindices_sorted[i] >= yrange[j + 1] && j < ycommsize); /* Increase j until i-th index falls in rank j's bound */
969: PetscCheck(j != ycommsize, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " not owned by any process, upper bound %" PetscInt_FMT, yindices_sorted[i], yrange[ycommsize]);
970: }
971: i++;
972: if (!slens[j]++) nsend++;
973: }
975: PetscCall(PetscMalloc2(nsend + 1, &sstart, nsend, &sendto));
977: sstart[0] = 0;
978: for (i = j = 0; i < ycommsize; i++) {
979: if (slens[i]) {
980: sendto[j] = (PetscMPIInt)i;
981: sstart[j + 1] = sstart[j] + slens[i];
982: j++;
983: }
984: }
986: /*
987: Calculate the reverse info about messages I will recv
988: nrecv - number of messages I will recv
989: recvfrom - [nrecv] ranks I recv from
990: rlens - [nrecv] I will recv rlens[i] entries from rank recvfrom[i]
991: rlentotal - sum of rlens[]
992: rxindices - [rlentotal] recv buffer for xindices_sorted
993: ryindices - [rlentotal] recv buffer for yindices_sorted
994: */
995: PetscCall(PetscGatherNumberOfMessages_Private(ycomm, NULL, slens, &nrecv));
996: PetscCall(PetscGatherMessageLengths_Private(ycomm, nsend, nrecv, slens, &recvfrom, &rlens));
997: PetscCall(PetscFree(slens)); /* Free the O(P) array ASAP */
998: rlentotal = 0;
999: for (i = 0; i < nrecv; i++) rlentotal += rlens[i];
1001: /*
1002: Communicate with processors in recvfrom[] to populate rxindices and ryindices
1003: */
1004: PetscCall(PetscCommGetNewTag(ycomm, &tag1));
1005: PetscCall(PetscCommGetNewTag(ycomm, &tag2));
1006: PetscCall(PetscMalloc2(rlentotal, &rxindices, rlentotal, &ryindices));
1007: PetscCall(PetscMPIIntCast((nsend + nrecv) * 2, &nreq));
1008: PetscCall(PetscMalloc1(nreq, &reqs));
1009: sreqs = reqs;
1010: if (reqs) rreqs = reqs + nsend * 2;
1012: for (i = disp = 0; i < nrecv; i++) {
1013: count = rlens[i];
1014: PetscCallMPI(MPIU_Irecv(rxindices + disp, count, MPIU_INT, recvfrom[i], tag1, ycomm, rreqs + i));
1015: PetscCallMPI(MPIU_Irecv(ryindices + disp, count, MPIU_INT, recvfrom[i], tag2, ycomm, rreqs + nrecv + i));
1016: disp += rlens[i];
1017: }
1019: for (i = 0; i < nsend; i++) {
1020: count = sstart[i + 1] - sstart[i];
1021: PetscCallMPI(MPIU_Isend(xindices_sorted + sstart[i], count, MPIU_INT, sendto[i], tag1, ycomm, sreqs + i));
1022: PetscCallMPI(MPIU_Isend(yindices_sorted + sstart[i], count, MPIU_INT, sendto[i], tag2, ycomm, sreqs + nsend + i));
1023: }
1024: PetscCallMPI(MPI_Waitall(nreq, reqs, MPI_STATUS_IGNORE));
1026: /* Transform VecScatter into SF */
1027: nleaves = rlentotal;
1028: PetscCall(PetscMalloc1(nleaves, &ilocal));
1029: PetscCall(PetscMalloc1(nleaves, &iremote));
1030: PetscCallMPI(MPI_Comm_rank(ycomm, &yrank));
1031: for (i = disp = 0; i < nrecv; i++) {
1032: for (j = 0; j < rlens[i]; j++) {
1033: k = disp + j; /* k-th index pair */
1034: ilocal[k] = ryindices[k] - yrange[yrank]; /* Convert y's global index to local index */
1035: PetscCall(PetscLayoutFindOwnerIndex(xlayout, rxindices[k], &rank, &iremote[k].index)); /* Convert x's global index to (rank, index) */
1036: iremote[k].rank = rank;
1037: }
1038: disp += rlens[i];
1039: }
1041: PetscCall(PetscFree2(sstart, sendto));
1042: PetscCall(PetscFree(rlens));
1043: PetscCall(PetscFree(recvfrom));
1044: PetscCall(PetscFree(reqs));
1045: PetscCall(PetscFree2(rxindices, ryindices));
1046: PetscCall(PetscFree2(xindices_sorted, yindices_sorted));
1047: #endif
1048: } else {
1049: /* PtoS or StoS */
1050: PetscCall(ISGetLocalSize(iyy, &nleaves));
1051: PetscCall(PetscMalloc1(nleaves, &ilocal));
1052: PetscCall(PetscMalloc1(nleaves, &iremote));
1053: PetscCall(PetscArraycpy(ilocal, yindices, nleaves));
1054: for (i = 0; i < nleaves; i++) {
1055: PetscCall(PetscLayoutFindOwnerIndex(xlayout, xindices[i], &rank, &iremote[i].index));
1056: iremote[i].rank = rank;
1057: }
1058: }
1060: /* MUST build SF on xx's comm, which is not necessarily identical to yy's comm.
1061: In SF's view, xx contains the roots (i.e., the remote) and iremote[].rank are ranks in xx's comm.
1062: yy contains leaves, which are local and can be thought as part of PETSC_COMM_SELF. */
1063: PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)xx), &sf));
1064: sf->allow_multi_leaves = PETSC_TRUE;
1065: PetscCall(PetscSFSetFromOptions(sf));
1066: PetscCall(VecGetLocalSize(xx, &nroots));
1067: PetscCall(PetscSFSetGraph(sf, nroots, nleaves, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); /* Give ilocal/iremote to petsc and no need to free them here */
1069: /* Free memory no longer needed */
1070: PetscCall(ISRestoreIndices(ixx, &xindices));
1071: PetscCall(ISRestoreIndices(iyy, &yindices));
1072: if (can_do_block_opt) {
1073: PetscCall(VecDestroy(&xx));
1074: PetscCall(VecDestroy(&yy));
1075: PetscCall(ISDestroy(&ixx));
1076: PetscCall(ISDestroy(&iyy));
1077: } else if (xcommsize == 1) {
1078: PetscCall(VecDestroy(&xx));
1079: }
1081: functionend:
1082: sf->vscat.bs = bs;
1083: if (sf->vscat.bs > 1) {
1084: PetscCallMPI(MPI_Type_contiguous(sf->vscat.bs, MPIU_SCALAR, &sf->vscat.unit));
1085: PetscCallMPI(MPI_Type_commit(&sf->vscat.unit));
1086: } else {
1087: sf->vscat.unit = MPIU_SCALAR;
1088: }
1089: PetscCall(VecGetLocalSize(x, &sf->vscat.from_n));
1090: PetscCall(VecGetLocalSize(y, &sf->vscat.to_n));
1091: if (!ix_old) PetscCall(ISDestroy(&ix)); /* We created helper ix, iy. Free them */
1092: if (!iy_old) PetscCall(ISDestroy(&iy));
1094: /* Set default */
1095: PetscCall(VecScatterSetFromOptions(sf));
1096: PetscCall(PetscSFSetUp(sf));
1098: *newsf = sf;
1099: PetscFunctionReturn(PETSC_SUCCESS);
1100: }
1102: /*@C
1103: VecScatterCreateToAll - Creates a vector and a scatter context that copies all
1104: vector values to each processor
1106: Collective
1108: Input Parameter:
1109: . vin - an `MPIVEC`
1111: Output Parameters:
1112: + ctx - scatter context
1113: - vout - output `SEQVEC` that is large enough to scatter into
1115: Level: intermediate
1117: Example Usage:
1118: .vb
1119: VecScatterCreateToAll(vin, &ctx, &vout);
1121: // scatter as many times as you need
1122: VecScatterBegin(ctx, vin, vout, INSERT_VALUES, SCATTER_FORWARD);
1123: VecScatterEnd(ctx, vin, vout, INSERT_VALUES, SCATTER_FORWARD);
1125: // destroy scatter context and local vector when no longer needed
1126: VecScatterDestroy(&ctx);
1127: VecDestroy(&vout);
1128: .ve
1130: Notes:
1131: `vout` may be `NULL` [`PETSC_NULL_VEC` from Fortran] if you do not
1132: need to have it created
1134: Do NOT create a vector and then pass it in as the final argument `vout`! `vout` is created by this routine
1135: automatically (unless you pass `NULL` in for that argument if you do not need it).
1137: .seealso: [](sec_scatter), `VecScatter`, `VecScatterCreate()`, `VecScatterCreateToZero()`, `VecScatterBegin()`, `VecScatterEnd()`
1138: @*/
1139: PetscErrorCode VecScatterCreateToAll(Vec vin, VecScatter *ctx, Vec *vout)
1140: {
1141: PetscInt N;
1142: IS is;
1143: Vec tmp;
1144: Vec *tmpv;
1145: PetscBool tmpvout = PETSC_FALSE;
1146: VecType roottype;
1148: PetscFunctionBegin;
1151: PetscAssertPointer(ctx, 2);
1152: if (vout) {
1153: PetscAssertPointer(vout, 3);
1154: tmpv = vout;
1155: } else {
1156: tmpvout = PETSC_TRUE;
1157: tmpv = &tmp;
1158: }
1160: /* Create seq vec on each proc, with the same size of the original vec */
1161: PetscCall(VecGetSize(vin, &N));
1162: PetscCall(VecGetRootType_Private(vin, &roottype));
1163: PetscCall(VecCreate(PETSC_COMM_SELF, tmpv));
1164: PetscCall(VecSetSizes(*tmpv, N, PETSC_DECIDE));
1165: PetscCall(VecSetType(*tmpv, roottype));
1166: /* Create the VecScatter ctx with the communication info */
1167: PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &is));
1168: PetscCall(VecScatterCreate(vin, is, *tmpv, is, ctx));
1169: PetscCall(ISDestroy(&is));
1170: if (tmpvout) PetscCall(VecDestroy(tmpv));
1171: PetscFunctionReturn(PETSC_SUCCESS);
1172: }
1174: /*@C
1175: VecScatterCreateToZero - Creates an output vector and a scatter context used to
1176: copy all vector values into the output vector on the zeroth processor
1178: Collective
1180: Input Parameter:
1181: . vin - `Vec` of type `MPIVEC`
1183: Output Parameters:
1184: + ctx - scatter context
1185: - vout - output `SEQVEC` that is large enough to scatter into on processor 0 and
1186: of length zero on all other processors
1188: Level: intermediate
1190: Example Usage:
1191: .vb
1192: VecScatterCreateToZero(vin, &ctx, &vout);
1194: // scatter as many times as you need
1195: VecScatterBegin(ctx, vin, vout, INSERT_VALUES, SCATTER_FORWARD);
1196: VecScatterEnd(ctx, vin, vout, INSERT_VALUES, SCATTER_FORWARD);
1198: // destroy scatter context and local vector when no longer needed
1199: VecScatterDestroy(&ctx);
1200: VecDestroy(&vout);
1201: .ve
1203: Notes:
1204: vout may be `NULL` [`PETSC_NULL_VEC` from Fortran] if you do not
1205: need to have it created
1207: Do NOT create a vector and then pass it in as the final argument `vout`! `vout` is created by this routine
1208: automatically (unless you pass `NULL` in for that argument if you do not need it).
1210: .seealso: [](sec_scatter), `VecScatter`, `VecScatterCreate()`, `VecScatterCreateToAll()`, `VecScatterBegin()`, `VecScatterEnd()`
1211: @*/
1212: PetscErrorCode VecScatterCreateToZero(Vec vin, VecScatter *ctx, Vec *vout)
1213: {
1214: PetscInt N;
1215: PetscMPIInt rank;
1216: IS is;
1217: Vec tmp;
1218: Vec *tmpv;
1219: PetscBool tmpvout = PETSC_FALSE;
1220: VecType roottype;
1222: PetscFunctionBegin;
1225: PetscAssertPointer(ctx, 2);
1226: if (vout) {
1227: PetscAssertPointer(vout, 3);
1228: tmpv = vout;
1229: } else {
1230: tmpvout = PETSC_TRUE;
1231: tmpv = &tmp;
1232: }
1234: /* Create vec on each proc, with the same size of the original vec all on process 0 */
1235: PetscCall(VecGetSize(vin, &N));
1236: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)vin), &rank));
1237: if (rank) N = 0;
1238: PetscCall(VecGetRootType_Private(vin, &roottype));
1239: PetscCall(VecCreate(PETSC_COMM_SELF, tmpv));
1240: PetscCall(VecSetSizes(*tmpv, N, PETSC_DECIDE));
1241: PetscCall(VecSetType(*tmpv, roottype));
1242: /* Create the VecScatter ctx with the communication info */
1243: PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &is));
1244: PetscCall(VecScatterCreate(vin, is, *tmpv, is, ctx));
1245: PetscCall(ISDestroy(&is));
1246: if (tmpvout) PetscCall(VecDestroy(tmpv));
1247: PetscFunctionReturn(PETSC_SUCCESS);
1248: }
1250: /*@
1251: VecScatterBegin - Begins a generalized scatter from one vector to
1252: another. Complete the scattering phase with `VecScatterEnd()`.
1254: Neighbor-wise Collective
1256: Input Parameters:
1257: + sf - scatter context generated by `VecScatterCreate()`
1258: . x - the vector from which we scatter
1259: . y - the vector to which we scatter
1260: . addv - either `ADD_VALUES`, `MAX_VALUES`, `MIN_VALUES` or `INSERT_VALUES`, with `INSERT_VALUES` mode any location
1261: not scattered to retains its old value; i.e. the vector is NOT first zeroed.
1262: - mode - the scattering mode, usually `SCATTER_FORWARD`. The available modes are: `SCATTER_FORWARD` or `SCATTER_REVERSE`
1264: Level: intermediate
1266: Notes:
1267: The vectors `x` and `y` need not be the same vectors used in the call
1268: to `VecScatterCreate()`, but `x` must have the same parallel data layout
1269: as that passed in as the `x` to `VecScatterCreate()`, similarly for the `y`.
1270: Most likely they have been obtained from `VecDuplicate()`.
1272: You cannot change the values in the input vector between the calls to `VecScatterBegin()`
1273: and `VecScatterEnd()`.
1275: If you use `SCATTER_REVERSE` the two arguments `x` and `y` should be reversed, from
1276: the `SCATTER_FORWARD`.
1278: y[iy[i]] = x[ix[i]], for i=0,...,ni-1
1280: This scatter is far more general than the conventional
1281: scatter, since it can be a gather or a scatter or a combination,
1282: depending on the indices ix and iy. If x is a parallel vector and y
1283: is sequential, `VecScatterBegin()` can serve to gather values to a
1284: single processor. Similarly, if `y` is parallel and `x` sequential, the
1285: routine can scatter from one processor to many processors.
1287: .seealso: [](sec_scatter), `VecScatter`, `VecScatterCreate()`, `VecScatterEnd()`
1288: @*/
1289: PetscErrorCode VecScatterBegin(VecScatter sf, Vec x, Vec y, InsertMode addv, ScatterMode mode)
1290: {
1291: PetscInt to_n, from_n;
1293: PetscFunctionBegin;
1297: if (PetscDefined(USE_DEBUG)) {
1298: /*
1299: Error checking to make sure these vectors match the vectors used
1300: to create the vector scatter context. -1 in the from_n and to_n indicate the
1301: vector lengths are unknown (for example with mapped scatters) and thus
1302: no error checking is performed.
1303: */
1304: if (sf->vscat.from_n >= 0 && sf->vscat.to_n >= 0) {
1305: PetscCall(VecGetLocalSize(x, &from_n));
1306: PetscCall(VecGetLocalSize(y, &to_n));
1307: if (mode & SCATTER_REVERSE) {
1308: PetscCheck(to_n == sf->vscat.from_n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Vector wrong size %" PetscInt_FMT " for scatter %" PetscInt_FMT " (scatter reverse and vector to != sf from size)", to_n, sf->vscat.from_n);
1309: PetscCheck(from_n == sf->vscat.to_n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Vector wrong size %" PetscInt_FMT " for scatter %" PetscInt_FMT " (scatter reverse and vector from != sf to size)", from_n, sf->vscat.to_n);
1310: } else {
1311: PetscCheck(to_n == sf->vscat.to_n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Vector wrong size %" PetscInt_FMT " for scatter %" PetscInt_FMT " (scatter forward and vector to != sf to size)", to_n, sf->vscat.to_n);
1312: PetscCheck(from_n == sf->vscat.from_n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Vector wrong size %" PetscInt_FMT " for scatter %" PetscInt_FMT " (scatter forward and vector from != sf from size)", from_n, sf->vscat.from_n);
1313: }
1314: }
1315: }
1317: sf->vscat.logging = PETSC_TRUE;
1318: PetscCall(PetscLogEventBegin(VEC_ScatterBegin, sf, x, y, 0));
1319: PetscCall(VecScatterBegin_Internal(sf, x, y, addv, mode));
1320: if (sf->vscat.beginandendtogether) PetscCall(VecScatterEnd_Internal(sf, x, y, addv, mode));
1321: PetscCall(PetscLogEventEnd(VEC_ScatterBegin, sf, x, y, 0));
1322: sf->vscat.logging = PETSC_FALSE;
1323: PetscFunctionReturn(PETSC_SUCCESS);
1324: }
1326: /*@
1327: VecScatterEnd - Ends a generalized scatter from one vector to another. Call
1328: after first calling `VecScatterBegin()`.
1330: Neighbor-wise Collective
1332: Input Parameters:
1333: + sf - scatter context generated by `VecScatterCreate()`
1334: . x - the vector from which we scatter
1335: . y - the vector to which we scatter
1336: . addv - one of `ADD_VALUES`, `MAX_VALUES`, `MIN_VALUES` or `INSERT_VALUES`
1337: - mode - the scattering mode, usually `SCATTER_FORWARD`. The available modes are: `SCATTER_FORWARD`, `SCATTER_REVERSE`
1339: Level: intermediate
1341: Notes:
1342: If you use `SCATTER_REVERSE` the arguments `x` and `y` should be reversed, from the `SCATTER_FORWARD`.
1344: y[iy[i]] = x[ix[i]], for i=0,...,ni-1
1346: .seealso: [](sec_scatter), `VecScatter`, `VecScatterBegin()`, `VecScatterCreate()`
1347: @*/
1348: PetscErrorCode VecScatterEnd(VecScatter sf, Vec x, Vec y, InsertMode addv, ScatterMode mode)
1349: {
1350: PetscFunctionBegin;
1354: if (!sf->vscat.beginandendtogether) {
1355: sf->vscat.logging = PETSC_TRUE;
1356: PetscCall(PetscLogEventBegin(VEC_ScatterEnd, sf, x, y, 0));
1357: PetscCall(VecScatterEnd_Internal(sf, x, y, addv, mode));
1358: PetscCall(PetscLogEventEnd(VEC_ScatterEnd, sf, x, y, 0));
1359: sf->vscat.logging = PETSC_FALSE;
1360: }
1361: PetscFunctionReturn(PETSC_SUCCESS);
1362: }