Actual source code: mpits.c
petsc-3.11.4 2019-09-28
1: #include <petscsys.h>
2: #include <petsc/private/petscimpl.h>
4: PetscLogEvent PETSC_BuildTwoSided;
5: PetscLogEvent PETSC_BuildTwoSidedF;
7: const char *const PetscBuildTwoSidedTypes[] = {
8: "ALLREDUCE",
9: "IBARRIER",
10: "REDSCATTER",
11: "PetscBuildTwoSidedType",
12: "PETSC_BUILDTWOSIDED_",
13: 0
14: };
16: static PetscBuildTwoSidedType _twosided_type = PETSC_BUILDTWOSIDED_NOTSET;
18: /*@
19: PetscCommBuildTwoSidedSetType - set algorithm to use when building two-sided communication
21: Logically Collective
23: Input Arguments:
24: + comm - PETSC_COMM_WORLD
25: - twosided - algorithm to use in subsequent calls to PetscCommBuildTwoSided()
27: Level: developer
29: Note:
30: This option is currently global, but could be made per-communicator.
32: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedGetType()
33: @*/
34: PetscErrorCode PetscCommBuildTwoSidedSetType(MPI_Comm comm,PetscBuildTwoSidedType twosided)
35: {
37: #if defined(PETSC_USE_DEBUG)
39: PetscMPIInt ierr;
40: PetscMPIInt b1[2],b2[2];
41: b1[0] = -(PetscMPIInt)twosided;
42: b1[1] = (PetscMPIInt)twosided;
43: MPIU_Allreduce(b1,b2,2,MPI_INT,MPI_MAX,comm);
44: if (-b2[0] != b2[1]) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"Enum value must be same on all processes");
45: }
46: #endif
47: _twosided_type = twosided;
48: return(0);
49: }
51: /*@
52: PetscCommBuildTwoSidedGetType - set algorithm to use when building two-sided communication
54: Logically Collective
56: Output Arguments:
57: + comm - communicator on which to query algorithm
58: - twosided - algorithm to use for PetscCommBuildTwoSided()
60: Level: developer
62: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedSetType()
63: @*/
64: PetscErrorCode PetscCommBuildTwoSidedGetType(MPI_Comm comm,PetscBuildTwoSidedType *twosided)
65: {
69: *twosided = PETSC_BUILDTWOSIDED_NOTSET;
70: if (_twosided_type == PETSC_BUILDTWOSIDED_NOTSET) {
71: #if defined(PETSC_HAVE_MPI_IBARRIER)
72: # if defined(PETSC_HAVE_MPICH_CH3_SOCK) && !defined(PETSC_HAVE_MPICH_CH3_SOCK_FIXED_NBC_PROGRESS)
73: /* Deadlock in Ibarrier: http://trac.mpich.org/projects/mpich/ticket/1785 */
74: _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE;
75: # else
76: _twosided_type = PETSC_BUILDTWOSIDED_IBARRIER;
77: # endif
78: #else
79: _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE;
80: #endif
81: PetscOptionsGetEnum(NULL,NULL,"-build_twosided",PetscBuildTwoSidedTypes,(PetscEnum*)&_twosided_type,NULL);
82: }
83: *twosided = _twosided_type;
84: return(0);
85: }
87: #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER)
89: static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
90: {
92: PetscMPIInt nrecvs,tag,done,i;
93: MPI_Aint lb,unitbytes;
94: char *tdata;
95: MPI_Request *sendreqs,barrier;
96: PetscSegBuffer segrank,segdata;
97: PetscBool barrier_started;
100: PetscCommDuplicate(comm,&comm,&tag);
101: MPI_Type_get_extent(dtype,&lb,&unitbytes);
102: if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
103: tdata = (char*)todata;
104: PetscMalloc1(nto,&sendreqs);
105: for (i=0; i<nto; i++) {
106: MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
107: }
108: PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);
109: PetscSegBufferCreate(unitbytes,4*count,&segdata);
111: nrecvs = 0;
112: barrier = MPI_REQUEST_NULL;
113: /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation,
114: * but we need to work around it. */
115: barrier_started = PETSC_FALSE;
116: for (done=0; !done; ) {
117: PetscMPIInt flag;
118: MPI_Status status;
119: MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);
120: if (flag) { /* incoming message */
121: PetscMPIInt *recvrank;
122: void *buf;
123: PetscSegBufferGet(segrank,1,&recvrank);
124: PetscSegBufferGet(segdata,count,&buf);
125: *recvrank = status.MPI_SOURCE;
126: MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);
127: nrecvs++;
128: }
129: if (!barrier_started) {
130: PetscMPIInt sent,nsends;
131: PetscMPIIntCast(nto,&nsends);
132: MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);
133: if (sent) {
134: #if defined(PETSC_HAVE_MPI_IBARRIER)
135: MPI_Ibarrier(comm,&barrier);
136: #elif defined(PETSC_HAVE_MPIX_IBARRIER)
137: MPIX_Ibarrier(comm,&barrier);
138: #endif
139: barrier_started = PETSC_TRUE;
140: PetscFree(sendreqs);
141: }
142: } else {
143: MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);
144: }
145: }
146: *nfrom = nrecvs;
147: PetscSegBufferExtractAlloc(segrank,fromranks);
148: PetscSegBufferDestroy(&segrank);
149: PetscSegBufferExtractAlloc(segdata,fromdata);
150: PetscSegBufferDestroy(&segdata);
151: PetscCommDestroy(&comm);
152: return(0);
153: }
154: #endif
156: static PetscErrorCode PetscCommBuildTwoSided_Allreduce(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
157: {
159: PetscMPIInt size,*iflags,nrecvs,tag,*franks,i;
160: MPI_Aint lb,unitbytes;
161: char *tdata,*fdata;
162: MPI_Request *reqs,*sendreqs;
163: MPI_Status *statuses;
166: MPI_Comm_size(comm,&size);
167: PetscCalloc1(size,&iflags);
168: for (i=0; i<nto; i++) iflags[toranks[i]] = 1;
169: PetscGatherNumberOfMessages(comm,iflags,NULL,&nrecvs);
170: PetscFree(iflags);
172: PetscCommDuplicate(comm,&comm,&tag);
173: MPI_Type_get_extent(dtype,&lb,&unitbytes);
174: if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
175: PetscMalloc(nrecvs*count*unitbytes,&fdata);
176: tdata = (char*)todata;
177: PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);
178: sendreqs = reqs + nrecvs;
179: for (i=0; i<nrecvs; i++) {
180: MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);
181: }
182: for (i=0; i<nto; i++) {
183: MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
184: }
185: MPI_Waitall(nto+nrecvs,reqs,statuses);
186: PetscMalloc1(nrecvs,&franks);
187: for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE;
188: PetscFree2(reqs,statuses);
189: PetscCommDestroy(&comm);
191: *nfrom = nrecvs;
192: *fromranks = franks;
193: *(void**)fromdata = fdata;
194: return(0);
195: }
197: #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK)
198: static PetscErrorCode PetscCommBuildTwoSided_RedScatter(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
199: {
201: PetscMPIInt size,*iflags,nrecvs,tag,*franks,i;
202: MPI_Aint lb,unitbytes;
203: char *tdata,*fdata;
204: MPI_Request *reqs,*sendreqs;
205: MPI_Status *statuses;
208: MPI_Comm_size(comm,&size);
209: PetscMalloc1(size,&iflags);
210: PetscMemzero(iflags,size*sizeof(*iflags));
211: for (i=0; i<nto; i++) iflags[toranks[i]] = 1;
212: MPI_Reduce_scatter_block(iflags,&nrecvs,1,MPI_INT,MPI_SUM,comm);
213: PetscFree(iflags);
215: PetscCommDuplicate(comm,&comm,&tag);
216: MPI_Type_get_extent(dtype,&lb,&unitbytes);
217: if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
218: PetscMalloc(nrecvs*count*unitbytes,&fdata);
219: tdata = (char*)todata;
220: PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);
221: sendreqs = reqs + nrecvs;
222: for (i=0; i<nrecvs; i++) {
223: MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);
224: }
225: for (i=0; i<nto; i++) {
226: MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
227: }
228: MPI_Waitall(nto+nrecvs,reqs,statuses);
229: PetscMalloc1(nrecvs,&franks);
230: for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE;
231: PetscFree2(reqs,statuses);
232: PetscCommDestroy(&comm);
234: *nfrom = nrecvs;
235: *fromranks = franks;
236: *(void**)fromdata = fdata;
237: return(0);
238: }
239: #endif
241: /*@C
242: PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths)
244: Collective on MPI_Comm
246: Input Arguments:
247: + comm - communicator
248: . count - number of entries to send/receive (must match on all ranks)
249: . dtype - datatype to send/receive from each rank (must match on all ranks)
250: . nto - number of ranks to send data to
251: . toranks - ranks to send to (array of length nto)
252: - todata - data to send to each rank (packed)
254: Output Arguments:
255: + nfrom - number of ranks receiving messages from
256: . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
257: - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())
259: Level: developer
261: Options Database Keys:
262: . -build_twosided <allreduce|ibarrier|redscatter> - algorithm to set up two-sided communication
264: Notes:
265: This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
266: PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other constant-size data.
268: Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.
270: References:
271: . 1. - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in
272: Scalable communication protocols for dynamic sparse data exchange, 2010.
274: .seealso: PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
275: @*/
276: PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
277: {
278: PetscErrorCode ierr;
279: PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET;
282: PetscSysInitializePackage();
283: PetscLogEventSync(PETSC_BuildTwoSided,comm);
284: PetscLogEventBegin(PETSC_BuildTwoSided,0,0,0,0);
285: PetscCommBuildTwoSidedGetType(comm,&buildtype);
286: switch (buildtype) {
287: case PETSC_BUILDTWOSIDED_IBARRIER:
288: #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER)
289: PetscCommBuildTwoSided_Ibarrier(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);
290: #else
291: SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)");
292: #endif
293: break;
294: case PETSC_BUILDTWOSIDED_ALLREDUCE:
295: PetscCommBuildTwoSided_Allreduce(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);
296: break;
297: case PETSC_BUILDTWOSIDED_REDSCATTER:
298: #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK)
299: PetscCommBuildTwoSided_RedScatter(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);
300: #else
301: SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Reduce_scatter_block (part of MPI-2.2)");
302: #endif
303: break;
304: default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication");
305: }
306: PetscLogEventEnd(PETSC_BuildTwoSided,0,0,0,0);
307: return(0);
308: }
310: static PetscErrorCode PetscCommBuildTwoSidedFReq_Reference(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,
311: PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs,
312: PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
313: PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
314: {
316: PetscMPIInt i,*tag;
317: MPI_Aint lb,unitbytes;
318: MPI_Request *sendreq,*recvreq;
321: PetscMalloc1(ntags,&tag);
322: if (ntags > 0) {
323: PetscCommDuplicate(comm,&comm,&tag[0]);
324: }
325: for (i=1; i<ntags; i++) {
326: PetscCommGetNewTag(comm,&tag[i]);
327: }
329: /* Perform complete initial rendezvous */
330: PetscCommBuildTwoSided(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);
332: PetscMalloc1(nto*ntags,&sendreq);
333: PetscMalloc1(*nfrom*ntags,&recvreq);
335: MPI_Type_get_extent(dtype,&lb,&unitbytes);
336: if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
337: for (i=0; i<nto; i++) {
338: PetscMPIInt k;
339: for (k=0; k<ntags; k++) sendreq[i*ntags+k] = MPI_REQUEST_NULL;
340: (*send)(comm,tag,i,toranks[i],((char*)todata)+count*unitbytes*i,sendreq+i*ntags,ctx);
341: }
342: for (i=0; i<*nfrom; i++) {
343: void *header = (*(char**)fromdata) + count*unitbytes*i;
344: PetscMPIInt k;
345: for (k=0; k<ntags; k++) recvreq[i*ntags+k] = MPI_REQUEST_NULL;
346: (*recv)(comm,tag,(*fromranks)[i],header,recvreq+i*ntags,ctx);
347: }
348: PetscFree(tag);
349: PetscCommDestroy(&comm);
350: *toreqs = sendreq;
351: *fromreqs = recvreq;
352: return(0);
353: }
355: #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER)
357: static PetscErrorCode PetscCommBuildTwoSidedFReq_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,
358: PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs,
359: PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
360: PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
361: {
363: PetscMPIInt nrecvs,tag,*tags,done,i;
364: MPI_Aint lb,unitbytes;
365: char *tdata;
366: MPI_Request *sendreqs,*usendreqs,*req,barrier;
367: PetscSegBuffer segrank,segdata,segreq;
368: PetscBool barrier_started;
371: PetscCommDuplicate(comm,&comm,&tag);
372: PetscMalloc1(ntags,&tags);
373: for (i=0; i<ntags; i++) {
374: PetscCommGetNewTag(comm,&tags[i]);
375: }
376: MPI_Type_get_extent(dtype,&lb,&unitbytes);
377: if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
378: tdata = (char*)todata;
379: PetscMalloc1(nto,&sendreqs);
380: PetscMalloc1(nto*ntags,&usendreqs);
381: /* Post synchronous sends */
382: for (i=0; i<nto; i++) {
383: MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
384: }
385: /* Post actual payloads. These are typically larger messages. Hopefully sending these later does not slow down the
386: * synchronous messages above. */
387: for (i=0; i<nto; i++) {
388: PetscMPIInt k;
389: for (k=0; k<ntags; k++) usendreqs[i*ntags+k] = MPI_REQUEST_NULL;
390: (*send)(comm,tags,i,toranks[i],tdata+count*unitbytes*i,usendreqs+i*ntags,ctx);
391: }
393: PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);
394: PetscSegBufferCreate(unitbytes,4*count,&segdata);
395: PetscSegBufferCreate(sizeof(MPI_Request),4,&segreq);
397: nrecvs = 0;
398: barrier = MPI_REQUEST_NULL;
399: /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation,
400: * but we need to work around it. */
401: barrier_started = PETSC_FALSE;
402: for (done=0; !done; ) {
403: PetscMPIInt flag;
404: MPI_Status status;
405: MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);
406: if (flag) { /* incoming message */
407: PetscMPIInt *recvrank,k;
408: void *buf;
409: PetscSegBufferGet(segrank,1,&recvrank);
410: PetscSegBufferGet(segdata,count,&buf);
411: *recvrank = status.MPI_SOURCE;
412: MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);
413: PetscSegBufferGet(segreq,ntags,&req);
414: for (k=0; k<ntags; k++) req[k] = MPI_REQUEST_NULL;
415: (*recv)(comm,tags,status.MPI_SOURCE,buf,req,ctx);
416: nrecvs++;
417: }
418: if (!barrier_started) {
419: PetscMPIInt sent,nsends;
420: PetscMPIIntCast(nto,&nsends);
421: MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);
422: if (sent) {
423: #if defined(PETSC_HAVE_MPI_IBARRIER)
424: MPI_Ibarrier(comm,&barrier);
425: #elif defined(PETSC_HAVE_MPIX_IBARRIER)
426: MPIX_Ibarrier(comm,&barrier);
427: #endif
428: barrier_started = PETSC_TRUE;
429: }
430: } else {
431: MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);
432: }
433: }
434: *nfrom = nrecvs;
435: PetscSegBufferExtractAlloc(segrank,fromranks);
436: PetscSegBufferDestroy(&segrank);
437: PetscSegBufferExtractAlloc(segdata,fromdata);
438: PetscSegBufferDestroy(&segdata);
439: *toreqs = usendreqs;
440: PetscSegBufferExtractAlloc(segreq,fromreqs);
441: PetscSegBufferDestroy(&segreq);
442: PetscFree(sendreqs);
443: PetscFree(tags);
444: PetscCommDestroy(&comm);
445: return(0);
446: }
447: #endif
449: /*@C
450: PetscCommBuildTwoSidedF - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous
452: Collective on MPI_Comm
454: Input Arguments:
455: + comm - communicator
456: . count - number of entries to send/receive in initial rendezvous (must match on all ranks)
457: . dtype - datatype to send/receive from each rank (must match on all ranks)
458: . nto - number of ranks to send data to
459: . toranks - ranks to send to (array of length nto)
460: . todata - data to send to each rank (packed)
461: . ntags - number of tags needed by send/recv callbacks
462: . send - callback invoked on sending process when ready to send primary payload
463: . recv - callback invoked on receiving process after delivery of rendezvous message
464: - ctx - context for callbacks
466: Output Arguments:
467: + nfrom - number of ranks receiving messages from
468: . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
469: - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())
471: Level: developer
473: Notes:
474: This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
475: PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other data.
477: Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.
479: References:
480: . 1. - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in
481: Scalable communication protocols for dynamic sparse data exchange, 2010.
483: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedFReq(), PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
484: @*/
485: PetscErrorCode PetscCommBuildTwoSidedF(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,
486: PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
487: PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
488: {
490: MPI_Request *toreqs,*fromreqs;
493: PetscCommBuildTwoSidedFReq(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata,ntags,&toreqs,&fromreqs,send,recv,ctx);
494: MPI_Waitall(nto*ntags,toreqs,MPI_STATUSES_IGNORE);
495: MPI_Waitall(*nfrom*ntags,fromreqs,MPI_STATUSES_IGNORE);
496: PetscFree(toreqs);
497: PetscFree(fromreqs);
498: return(0);
499: }
501: /*@C
502: PetscCommBuildTwoSidedFReq - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous, returns requests
504: Collective on MPI_Comm
506: Input Arguments:
507: + comm - communicator
508: . count - number of entries to send/receive in initial rendezvous (must match on all ranks)
509: . dtype - datatype to send/receive from each rank (must match on all ranks)
510: . nto - number of ranks to send data to
511: . toranks - ranks to send to (array of length nto)
512: . todata - data to send to each rank (packed)
513: . ntags - number of tags needed by send/recv callbacks
514: . send - callback invoked on sending process when ready to send primary payload
515: . recv - callback invoked on receiving process after delivery of rendezvous message
516: - ctx - context for callbacks
518: Output Arguments:
519: + nfrom - number of ranks receiving messages from
520: . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
521: . fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())
522: . toreqs - array of nto*ntags sender requests (caller must wait on these, then PetscFree())
523: - fromreqs - array of nfrom*ntags receiver requests (caller must wait on these, then PetscFree())
525: Level: developer
527: Notes:
528: This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
529: PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other data.
531: Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.
533: References:
534: . 1. - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in
535: Scalable communication protocols for dynamic sparse data exchange, 2010.
537: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedF(), PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
538: @*/
539: PetscErrorCode PetscCommBuildTwoSidedFReq(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,
540: PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs,
541: PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
542: PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
543: {
544: PetscErrorCode ierr,(*f)(MPI_Comm,PetscMPIInt,MPI_Datatype,PetscMPIInt,const PetscMPIInt[],const void*,
545: PetscMPIInt*,PetscMPIInt**,void*,PetscMPIInt,MPI_Request**,MPI_Request**,
546: PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
547: PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx);
548: PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET;
549: PetscMPIInt i,size;
552: PetscSysInitializePackage();
553: MPI_Comm_size(comm,&size);
554: for (i=0; i<nto; i++) {
555: if (toranks[i] < 0 || size <= toranks[i]) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"toranks[%d] %d not in comm size %d",i,toranks[i],size);
556: }
557: PetscLogEventSync(PETSC_BuildTwoSidedF,comm);
558: PetscLogEventBegin(PETSC_BuildTwoSidedF,0,0,0,0);
559: PetscCommBuildTwoSidedGetType(comm,&buildtype);
560: switch (buildtype) {
561: case PETSC_BUILDTWOSIDED_IBARRIER:
562: #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER)
563: f = PetscCommBuildTwoSidedFReq_Ibarrier;
564: #else
565: SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)");
566: #endif
567: break;
568: case PETSC_BUILDTWOSIDED_ALLREDUCE:
569: case PETSC_BUILDTWOSIDED_REDSCATTER:
570: f = PetscCommBuildTwoSidedFReq_Reference;
571: break;
572: default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication");
573: }
574: (*f)(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata,ntags,toreqs,fromreqs,send,recv,ctx);
575: PetscLogEventEnd(PETSC_BuildTwoSidedF,0,0,0,0);
576: return(0);
577: }