Actual source code: mpits.c
1: #include <petscsys.h>
2: #include <petsc/private/petscimpl.h>
4: PetscLogEvent PETSC_BuildTwoSided;
5: PetscLogEvent PETSC_BuildTwoSidedF;
7: const char *const PetscBuildTwoSidedTypes[] = {
8: "ALLREDUCE",
9: "IBARRIER",
10: "REDSCATTER",
11: "PetscBuildTwoSidedType",
12: "PETSC_BUILDTWOSIDED_",
13: NULL
14: };
16: static PetscBuildTwoSidedType _twosided_type = PETSC_BUILDTWOSIDED_NOTSET;
18: /*@
19: PetscCommBuildTwoSidedSetType - set algorithm to use when building two-sided communication
21: Logically Collective
23: Input Parameters:
24: + comm - PETSC_COMM_WORLD
25: - twosided - algorithm to use in subsequent calls to PetscCommBuildTwoSided()
27: Level: developer
29: Note:
30: This option is currently global, but could be made per-communicator.
32: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedGetType()
33: @*/
34: PetscErrorCode PetscCommBuildTwoSidedSetType(MPI_Comm comm,PetscBuildTwoSidedType twosided)
35: {
37: PetscMPIInt b1[2],b2[2];
38: b1[0] = -(PetscMPIInt)twosided;
39: b1[1] = (PetscMPIInt)twosided;
40: MPIU_Allreduce(b1,b2,2,MPI_INT,MPI_MAX,comm);
42: }
43: _twosided_type = twosided;
44: return 0;
45: }
47: /*@
48: PetscCommBuildTwoSidedGetType - set algorithm to use when building two-sided communication
50: Logically Collective
52: Output Parameters:
53: + comm - communicator on which to query algorithm
54: - twosided - algorithm to use for PetscCommBuildTwoSided()
56: Level: developer
58: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedSetType()
59: @*/
60: PetscErrorCode PetscCommBuildTwoSidedGetType(MPI_Comm comm,PetscBuildTwoSidedType *twosided)
61: {
62: PetscMPIInt size;
64: *twosided = PETSC_BUILDTWOSIDED_NOTSET;
65: if (_twosided_type == PETSC_BUILDTWOSIDED_NOTSET) {
66: MPI_Comm_size(comm,&size);
67: _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE; /* default for small comms, see https://gitlab.com/petsc/petsc/-/merge_requests/2611 */
68: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
69: if (size > 1024) _twosided_type = PETSC_BUILDTWOSIDED_IBARRIER;
70: #endif
71: PetscOptionsGetEnum(NULL,NULL,"-build_twosided",PetscBuildTwoSidedTypes,(PetscEnum*)&_twosided_type,NULL);
72: }
73: *twosided = _twosided_type;
74: return 0;
75: }
77: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
78: static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
79: {
80: PetscMPIInt nrecvs,tag,done,i;
81: MPI_Aint lb,unitbytes;
82: char *tdata;
83: MPI_Request *sendreqs,barrier;
84: PetscSegBuffer segrank,segdata;
85: PetscBool barrier_started;
87: PetscCommDuplicate(comm,&comm,&tag);
88: MPI_Type_get_extent(dtype,&lb,&unitbytes);
90: tdata = (char*)todata;
91: PetscMalloc1(nto,&sendreqs);
92: for (i=0; i<nto; i++) {
93: MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
94: }
95: PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);
96: PetscSegBufferCreate(unitbytes,4*count,&segdata);
98: nrecvs = 0;
99: barrier = MPI_REQUEST_NULL;
100: /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation,
101: * but we need to work around it. */
102: barrier_started = PETSC_FALSE;
103: for (done=0; !done;) {
104: PetscMPIInt flag;
105: MPI_Status status;
106: MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);
107: if (flag) { /* incoming message */
108: PetscMPIInt *recvrank;
109: void *buf;
110: PetscSegBufferGet(segrank,1,&recvrank);
111: PetscSegBufferGet(segdata,count,&buf);
112: *recvrank = status.MPI_SOURCE;
113: MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);
114: nrecvs++;
115: }
116: if (!barrier_started) {
117: PetscMPIInt sent,nsends;
118: PetscMPIIntCast(nto,&nsends);
119: MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);
120: if (sent) {
121: MPI_Ibarrier(comm,&barrier);
122: barrier_started = PETSC_TRUE;
123: PetscFree(sendreqs);
124: }
125: } else {
126: MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);
127: }
128: }
129: *nfrom = nrecvs;
130: PetscSegBufferExtractAlloc(segrank,fromranks);
131: PetscSegBufferDestroy(&segrank);
132: PetscSegBufferExtractAlloc(segdata,fromdata);
133: PetscSegBufferDestroy(&segdata);
134: PetscCommDestroy(&comm);
135: return 0;
136: }
137: #endif
139: static PetscErrorCode PetscCommBuildTwoSided_Allreduce(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
140: {
141: PetscMPIInt size,rank,*iflags,nrecvs,tag,*franks,i,flg;
142: MPI_Aint lb,unitbytes;
143: char *tdata,*fdata;
144: MPI_Request *reqs,*sendreqs;
145: MPI_Status *statuses;
146: PetscCommCounter *counter;
148: MPI_Comm_size(comm,&size);
149: MPI_Comm_rank(comm,&rank);
150: PetscCommDuplicate(comm,&comm,&tag);
151: MPI_Comm_get_attr(comm,Petsc_Counter_keyval,&counter,&flg);
153: if (!counter->iflags) {
154: PetscCalloc1(size,&counter->iflags);
155: iflags = counter->iflags;
156: } else {
157: iflags = counter->iflags;
158: PetscArrayzero(iflags,size);
159: }
160: for (i=0; i<nto; i++) iflags[toranks[i]] = 1;
161: MPIU_Allreduce(MPI_IN_PLACE,iflags,size,MPI_INT,MPI_SUM,comm);
162: nrecvs = iflags[rank];
163: MPI_Type_get_extent(dtype,&lb,&unitbytes);
165: PetscMalloc(nrecvs*count*unitbytes,&fdata);
166: tdata = (char*)todata;
167: PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);
168: sendreqs = reqs + nrecvs;
169: for (i=0; i<nrecvs; i++) {
170: MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);
171: }
172: for (i=0; i<nto; i++) {
173: MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
174: }
175: MPI_Waitall(nto+nrecvs,reqs,statuses);
176: PetscMalloc1(nrecvs,&franks);
177: for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE;
178: PetscFree2(reqs,statuses);
179: PetscCommDestroy(&comm);
181: *nfrom = nrecvs;
182: *fromranks = franks;
183: *(void**)fromdata = fdata;
184: return 0;
185: }
187: #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK)
188: static PetscErrorCode PetscCommBuildTwoSided_RedScatter(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
189: {
190: PetscMPIInt size,*iflags,nrecvs,tag,*franks,i,flg;
191: MPI_Aint lb,unitbytes;
192: char *tdata,*fdata;
193: MPI_Request *reqs,*sendreqs;
194: MPI_Status *statuses;
195: PetscCommCounter *counter;
197: MPI_Comm_size(comm,&size);
198: PetscCommDuplicate(comm,&comm,&tag);
199: MPI_Comm_get_attr(comm,Petsc_Counter_keyval,&counter,&flg);
201: if (!counter->iflags) {
202: PetscCalloc1(size,&counter->iflags);
203: iflags = counter->iflags;
204: } else {
205: iflags = counter->iflags;
206: PetscArrayzero(iflags,size);
207: }
208: for (i=0; i<nto; i++) iflags[toranks[i]] = 1;
209: MPI_Reduce_scatter_block(iflags,&nrecvs,1,MPI_INT,MPI_SUM,comm);
210: MPI_Type_get_extent(dtype,&lb,&unitbytes);
212: PetscMalloc(nrecvs*count*unitbytes,&fdata);
213: tdata = (char*)todata;
214: PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);
215: sendreqs = reqs + nrecvs;
216: for (i=0; i<nrecvs; i++) {
217: MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);
218: }
219: for (i=0; i<nto; i++) {
220: MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
221: }
222: MPI_Waitall(nto+nrecvs,reqs,statuses);
223: PetscMalloc1(nrecvs,&franks);
224: for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE;
225: PetscFree2(reqs,statuses);
226: PetscCommDestroy(&comm);
228: *nfrom = nrecvs;
229: *fromranks = franks;
230: *(void**)fromdata = fdata;
231: return 0;
232: }
233: #endif
235: /*@C
236: PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths)
238: Collective
240: Input Parameters:
241: + comm - communicator
242: . count - number of entries to send/receive (must match on all ranks)
243: . dtype - datatype to send/receive from each rank (must match on all ranks)
244: . nto - number of ranks to send data to
245: . toranks - ranks to send to (array of length nto)
246: - todata - data to send to each rank (packed)
248: Output Parameters:
249: + nfrom - number of ranks receiving messages from
250: . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
251: - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())
253: Level: developer
255: Options Database Keys:
256: . -build_twosided <allreduce|ibarrier|redscatter> - algorithm to set up two-sided communication. Default is allreduce for communicators with <= 1024 ranks, otherwise ibarrier.
258: Notes:
259: This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
260: PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other constant-size data.
262: Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.
264: References:
265: . * - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in
266: Scalable communication protocols for dynamic sparse data exchange, 2010.
268: .seealso: PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
269: @*/
270: PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
271: {
272: PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET;
274: PetscSysInitializePackage();
275: PetscLogEventSync(PETSC_BuildTwoSided,comm);
276: PetscLogEventBegin(PETSC_BuildTwoSided,0,0,0,0);
277: PetscCommBuildTwoSidedGetType(comm,&buildtype);
278: switch (buildtype) {
279: case PETSC_BUILDTWOSIDED_IBARRIER:
280: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
281: PetscCommBuildTwoSided_Ibarrier(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);
282: break;
283: #else
284: SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)");
285: #endif
286: case PETSC_BUILDTWOSIDED_ALLREDUCE:
287: PetscCommBuildTwoSided_Allreduce(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);
288: break;
289: case PETSC_BUILDTWOSIDED_REDSCATTER:
290: #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK)
291: PetscCommBuildTwoSided_RedScatter(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);
292: break;
293: #else
294: SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Reduce_scatter_block (part of MPI-2.2)");
295: #endif
296: default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication");
297: }
298: PetscLogEventEnd(PETSC_BuildTwoSided,0,0,0,0);
299: return 0;
300: }
302: static PetscErrorCode PetscCommBuildTwoSidedFReq_Reference(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,
303: PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs,
304: PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
305: PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
306: {
307: PetscMPIInt i,*tag;
308: MPI_Aint lb,unitbytes;
309: MPI_Request *sendreq,*recvreq;
311: PetscMalloc1(ntags,&tag);
312: if (ntags > 0) {
313: PetscCommDuplicate(comm,&comm,&tag[0]);
314: }
315: for (i=1; i<ntags; i++) {
316: PetscCommGetNewTag(comm,&tag[i]);
317: }
319: /* Perform complete initial rendezvous */
320: PetscCommBuildTwoSided(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);
322: PetscMalloc1(nto*ntags,&sendreq);
323: PetscMalloc1(*nfrom*ntags,&recvreq);
325: MPI_Type_get_extent(dtype,&lb,&unitbytes);
327: for (i=0; i<nto; i++) {
328: PetscMPIInt k;
329: for (k=0; k<ntags; k++) sendreq[i*ntags+k] = MPI_REQUEST_NULL;
330: (*send)(comm,tag,i,toranks[i],((char*)todata)+count*unitbytes*i,sendreq+i*ntags,ctx);
331: }
332: for (i=0; i<*nfrom; i++) {
333: void *header = (*(char**)fromdata) + count*unitbytes*i;
334: PetscMPIInt k;
335: for (k=0; k<ntags; k++) recvreq[i*ntags+k] = MPI_REQUEST_NULL;
336: (*recv)(comm,tag,(*fromranks)[i],header,recvreq+i*ntags,ctx);
337: }
338: PetscFree(tag);
339: PetscCommDestroy(&comm);
340: *toreqs = sendreq;
341: *fromreqs = recvreq;
342: return 0;
343: }
345: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
347: static PetscErrorCode PetscCommBuildTwoSidedFReq_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,
348: PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs,
349: PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
350: PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
351: {
352: PetscMPIInt nrecvs,tag,*tags,done,i;
353: MPI_Aint lb,unitbytes;
354: char *tdata;
355: MPI_Request *sendreqs,*usendreqs,*req,barrier;
356: PetscSegBuffer segrank,segdata,segreq;
357: PetscBool barrier_started;
359: PetscCommDuplicate(comm,&comm,&tag);
360: PetscMalloc1(ntags,&tags);
361: for (i=0; i<ntags; i++) {
362: PetscCommGetNewTag(comm,&tags[i]);
363: }
364: MPI_Type_get_extent(dtype,&lb,&unitbytes);
366: tdata = (char*)todata;
367: PetscMalloc1(nto,&sendreqs);
368: PetscMalloc1(nto*ntags,&usendreqs);
369: /* Post synchronous sends */
370: for (i=0; i<nto; i++) {
371: MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
372: }
373: /* Post actual payloads. These are typically larger messages. Hopefully sending these later does not slow down the
374: * synchronous messages above. */
375: for (i=0; i<nto; i++) {
376: PetscMPIInt k;
377: for (k=0; k<ntags; k++) usendreqs[i*ntags+k] = MPI_REQUEST_NULL;
378: (*send)(comm,tags,i,toranks[i],tdata+count*unitbytes*i,usendreqs+i*ntags,ctx);
379: }
381: PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);
382: PetscSegBufferCreate(unitbytes,4*count,&segdata);
383: PetscSegBufferCreate(sizeof(MPI_Request),4,&segreq);
385: nrecvs = 0;
386: barrier = MPI_REQUEST_NULL;
387: /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation,
388: * but we need to work around it. */
389: barrier_started = PETSC_FALSE;
390: for (done=0; !done;) {
391: PetscMPIInt flag;
392: MPI_Status status;
393: MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);
394: if (flag) { /* incoming message */
395: PetscMPIInt *recvrank,k;
396: void *buf;
397: PetscSegBufferGet(segrank,1,&recvrank);
398: PetscSegBufferGet(segdata,count,&buf);
399: *recvrank = status.MPI_SOURCE;
400: MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);
401: PetscSegBufferGet(segreq,ntags,&req);
402: for (k=0; k<ntags; k++) req[k] = MPI_REQUEST_NULL;
403: (*recv)(comm,tags,status.MPI_SOURCE,buf,req,ctx);
404: nrecvs++;
405: }
406: if (!barrier_started) {
407: PetscMPIInt sent,nsends;
408: PetscMPIIntCast(nto,&nsends);
409: MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);
410: if (sent) {
411: MPI_Ibarrier(comm,&barrier);
412: barrier_started = PETSC_TRUE;
413: }
414: } else {
415: MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);
416: }
417: }
418: *nfrom = nrecvs;
419: PetscSegBufferExtractAlloc(segrank,fromranks);
420: PetscSegBufferDestroy(&segrank);
421: PetscSegBufferExtractAlloc(segdata,fromdata);
422: PetscSegBufferDestroy(&segdata);
423: *toreqs = usendreqs;
424: PetscSegBufferExtractAlloc(segreq,fromreqs);
425: PetscSegBufferDestroy(&segreq);
426: PetscFree(sendreqs);
427: PetscFree(tags);
428: PetscCommDestroy(&comm);
429: return 0;
430: }
431: #endif
433: /*@C
434: PetscCommBuildTwoSidedF - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous
436: Collective
438: Input Parameters:
439: + comm - communicator
440: . count - number of entries to send/receive in initial rendezvous (must match on all ranks)
441: . dtype - datatype to send/receive from each rank (must match on all ranks)
442: . nto - number of ranks to send data to
443: . toranks - ranks to send to (array of length nto)
444: . todata - data to send to each rank (packed)
445: . ntags - number of tags needed by send/recv callbacks
446: . send - callback invoked on sending process when ready to send primary payload
447: . recv - callback invoked on receiving process after delivery of rendezvous message
448: - ctx - context for callbacks
450: Output Parameters:
451: + nfrom - number of ranks receiving messages from
452: . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
453: - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())
455: Level: developer
457: Notes:
458: This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
459: PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other data.
461: Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.
463: References:
464: . * - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in
465: Scalable communication protocols for dynamic sparse data exchange, 2010.
467: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedFReq(), PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
468: @*/
469: PetscErrorCode PetscCommBuildTwoSidedF(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,
470: PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
471: PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
472: {
473: MPI_Request *toreqs,*fromreqs;
475: PetscCommBuildTwoSidedFReq(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata,ntags,&toreqs,&fromreqs,send,recv,ctx);
476: MPI_Waitall(nto*ntags,toreqs,MPI_STATUSES_IGNORE);
477: MPI_Waitall(*nfrom*ntags,fromreqs,MPI_STATUSES_IGNORE);
478: PetscFree(toreqs);
479: PetscFree(fromreqs);
480: return 0;
481: }
483: /*@C
484: PetscCommBuildTwoSidedFReq - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous, returns requests
486: Collective
488: Input Parameters:
489: + comm - communicator
490: . count - number of entries to send/receive in initial rendezvous (must match on all ranks)
491: . dtype - datatype to send/receive from each rank (must match on all ranks)
492: . nto - number of ranks to send data to
493: . toranks - ranks to send to (array of length nto)
494: . todata - data to send to each rank (packed)
495: . ntags - number of tags needed by send/recv callbacks
496: . send - callback invoked on sending process when ready to send primary payload
497: . recv - callback invoked on receiving process after delivery of rendezvous message
498: - ctx - context for callbacks
500: Output Parameters:
501: + nfrom - number of ranks receiving messages from
502: . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
503: . fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())
504: . toreqs - array of nto*ntags sender requests (caller must wait on these, then PetscFree())
505: - fromreqs - array of nfrom*ntags receiver requests (caller must wait on these, then PetscFree())
507: Level: developer
509: Notes:
510: This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
511: PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other data.
513: Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.
515: References:
516: . * - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in
517: Scalable communication protocols for dynamic sparse data exchange, 2010.
519: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedF(), PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
520: @*/
521: PetscErrorCode PetscCommBuildTwoSidedFReq(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,
522: PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs,
523: PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
524: PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
525: {
526: PetscErrorCode (*f)(MPI_Comm,PetscMPIInt,MPI_Datatype,PetscMPIInt,const PetscMPIInt[],const void*,
527: PetscMPIInt*,PetscMPIInt**,void*,PetscMPIInt,MPI_Request**,MPI_Request**,
528: PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
529: PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx);
530: PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET;
531: PetscMPIInt i,size;
533: PetscSysInitializePackage();
534: MPI_Comm_size(comm,&size);
535: for (i=0; i<nto; i++) {
537: }
538: PetscLogEventSync(PETSC_BuildTwoSidedF,comm);
539: PetscLogEventBegin(PETSC_BuildTwoSidedF,0,0,0,0);
540: PetscCommBuildTwoSidedGetType(comm,&buildtype);
541: switch (buildtype) {
542: case PETSC_BUILDTWOSIDED_IBARRIER:
543: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
544: f = PetscCommBuildTwoSidedFReq_Ibarrier;
545: break;
546: #else
547: SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)");
548: #endif
549: case PETSC_BUILDTWOSIDED_ALLREDUCE:
550: case PETSC_BUILDTWOSIDED_REDSCATTER:
551: f = PetscCommBuildTwoSidedFReq_Reference;
552: break;
553: default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication");
554: }
555: (*f)(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata,ntags,toreqs,fromreqs,send,recv,ctx);
556: PetscLogEventEnd(PETSC_BuildTwoSidedF,0,0,0,0);
557: return 0;
558: }