Actual source code: mpits.c
petsc-3.8.4 2018-03-24
1: #include <petscsys.h>
3: PetscLogEvent PETSC_BuildTwoSided,PETSC_BuildTwoSidedF;
5: const char *const PetscBuildTwoSidedTypes[] = {
6: "ALLREDUCE",
7: "IBARRIER",
8: "REDSCATTER",
9: "PetscBuildTwoSidedType",
10: "PETSC_BUILDTWOSIDED_",
11: 0
12: };
14: static PetscBuildTwoSidedType _twosided_type = PETSC_BUILDTWOSIDED_NOTSET;
16: /*@
17: PetscCommBuildTwoSidedSetType - set algorithm to use when building two-sided communication
19: Logically Collective
21: Input Arguments:
22: + comm - PETSC_COMM_WORLD
23: - twosided - algorithm to use in subsequent calls to PetscCommBuildTwoSided()
25: Level: developer
27: Note:
28: This option is currently global, but could be made per-communicator.
30: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedGetType()
31: @*/
32: PetscErrorCode PetscCommBuildTwoSidedSetType(MPI_Comm comm,PetscBuildTwoSidedType twosided)
33: {
35: #if defined(PETSC_USE_DEBUG)
37: PetscMPIInt ierr;
38: PetscMPIInt b1[2],b2[2];
39: b1[0] = -(PetscMPIInt)twosided;
40: b1[1] = (PetscMPIInt)twosided;
41: MPIU_Allreduce(b1,b2,2,MPI_INT,MPI_MAX,comm);
42: if (-b2[0] != b2[1]) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"Enum value must be same on all processes");
43: }
44: #endif
45: _twosided_type = twosided;
46: return(0);
47: }
49: /*@
50: PetscCommBuildTwoSidedGetType - set algorithm to use when building two-sided communication
52: Logically Collective
54: Output Arguments:
55: + comm - communicator on which to query algorithm
56: - twosided - algorithm to use for PetscCommBuildTwoSided()
58: Level: developer
60: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedSetType()
61: @*/
62: PetscErrorCode PetscCommBuildTwoSidedGetType(MPI_Comm comm,PetscBuildTwoSidedType *twosided)
63: {
67: *twosided = PETSC_BUILDTWOSIDED_NOTSET;
68: if (_twosided_type == PETSC_BUILDTWOSIDED_NOTSET) {
69: #if defined(PETSC_HAVE_MPI_IBARRIER)
70: # if defined(PETSC_HAVE_MPICH_CH3_SOCK) && !defined(PETSC_HAVE_MPICH_CH3_SOCK_FIXED_NBC_PROGRESS)
71: /* Deadlock in Ibarrier: http://trac.mpich.org/projects/mpich/ticket/1785 */
72: _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE;
73: # else
74: _twosided_type = PETSC_BUILDTWOSIDED_IBARRIER;
75: # endif
76: #else
77: _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE;
78: #endif
79: PetscOptionsGetEnum(NULL,NULL,"-build_twosided",PetscBuildTwoSidedTypes,(PetscEnum*)&_twosided_type,NULL);
80: }
81: *twosided = _twosided_type;
82: return(0);
83: }
85: #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER)
87: static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
88: {
90: PetscMPIInt nrecvs,tag,done,i;
91: MPI_Aint lb,unitbytes;
92: char *tdata;
93: MPI_Request *sendreqs,barrier;
94: PetscSegBuffer segrank,segdata;
95: PetscBool barrier_started;
98: PetscCommDuplicate(comm,&comm,&tag);
99: MPI_Type_get_extent(dtype,&lb,&unitbytes);
100: if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
101: tdata = (char*)todata;
102: PetscMalloc1(nto,&sendreqs);
103: for (i=0; i<nto; i++) {
104: MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
105: }
106: PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);
107: PetscSegBufferCreate(unitbytes,4*count,&segdata);
109: nrecvs = 0;
110: barrier = MPI_REQUEST_NULL;
111: /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation,
112: * but we need to work around it. */
113: barrier_started = PETSC_FALSE;
114: for (done=0; !done; ) {
115: PetscMPIInt flag;
116: MPI_Status status;
117: MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);
118: if (flag) { /* incoming message */
119: PetscMPIInt *recvrank;
120: void *buf;
121: PetscSegBufferGet(segrank,1,&recvrank);
122: PetscSegBufferGet(segdata,count,&buf);
123: *recvrank = status.MPI_SOURCE;
124: MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);
125: nrecvs++;
126: }
127: if (!barrier_started) {
128: PetscMPIInt sent,nsends;
129: PetscMPIIntCast(nto,&nsends);
130: MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);
131: if (sent) {
132: #if defined(PETSC_HAVE_MPI_IBARRIER)
133: MPI_Ibarrier(comm,&barrier);
134: #elif defined(PETSC_HAVE_MPIX_IBARRIER)
135: MPIX_Ibarrier(comm,&barrier);
136: #endif
137: barrier_started = PETSC_TRUE;
138: PetscFree(sendreqs);
139: }
140: } else {
141: MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);
142: }
143: }
144: *nfrom = nrecvs;
145: PetscSegBufferExtractAlloc(segrank,fromranks);
146: PetscSegBufferDestroy(&segrank);
147: PetscSegBufferExtractAlloc(segdata,fromdata);
148: PetscSegBufferDestroy(&segdata);
149: PetscCommDestroy(&comm);
150: return(0);
151: }
152: #endif
154: static PetscErrorCode PetscCommBuildTwoSided_Allreduce(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
155: {
157: PetscMPIInt size,*iflags,nrecvs,tag,*franks,i;
158: MPI_Aint lb,unitbytes;
159: char *tdata,*fdata;
160: MPI_Request *reqs,*sendreqs;
161: MPI_Status *statuses;
164: MPI_Comm_size(comm,&size);
165: PetscCalloc1(size,&iflags);
166: for (i=0; i<nto; i++) iflags[toranks[i]] = 1;
167: PetscGatherNumberOfMessages(comm,iflags,NULL,&nrecvs);
168: PetscFree(iflags);
170: PetscCommDuplicate(comm,&comm,&tag);
171: MPI_Type_get_extent(dtype,&lb,&unitbytes);
172: if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
173: PetscMalloc(nrecvs*count*unitbytes,&fdata);
174: tdata = (char*)todata;
175: PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);
176: sendreqs = reqs + nrecvs;
177: for (i=0; i<nrecvs; i++) {
178: MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);
179: }
180: for (i=0; i<nto; i++) {
181: MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
182: }
183: MPI_Waitall(nto+nrecvs,reqs,statuses);
184: PetscMalloc1(nrecvs,&franks);
185: for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE;
186: PetscFree2(reqs,statuses);
187: PetscCommDestroy(&comm);
189: *nfrom = nrecvs;
190: *fromranks = franks;
191: *(void**)fromdata = fdata;
192: return(0);
193: }
195: #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK)
196: static PetscErrorCode PetscCommBuildTwoSided_RedScatter(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
197: {
199: PetscMPIInt size,*iflags,nrecvs,tag,*franks,i;
200: MPI_Aint lb,unitbytes;
201: char *tdata,*fdata;
202: MPI_Request *reqs,*sendreqs;
203: MPI_Status *statuses;
206: MPI_Comm_size(comm,&size);
207: PetscMalloc1(size,&iflags);
208: PetscMemzero(iflags,size*sizeof(*iflags));
209: for (i=0; i<nto; i++) iflags[toranks[i]] = 1;
210: MPI_Reduce_scatter_block(iflags,&nrecvs,1,MPI_INT,MPI_SUM,comm);
211: PetscFree(iflags);
213: PetscCommDuplicate(comm,&comm,&tag);
214: MPI_Type_get_extent(dtype,&lb,&unitbytes);
215: if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
216: PetscMalloc(nrecvs*count*unitbytes,&fdata);
217: tdata = (char*)todata;
218: PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);
219: sendreqs = reqs + nrecvs;
220: for (i=0; i<nrecvs; i++) {
221: MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);
222: }
223: for (i=0; i<nto; i++) {
224: MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
225: }
226: MPI_Waitall(nto+nrecvs,reqs,statuses);
227: PetscMalloc1(nrecvs,&franks);
228: for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE;
229: PetscFree2(reqs,statuses);
230: PetscCommDestroy(&comm);
232: *nfrom = nrecvs;
233: *fromranks = franks;
234: *(void**)fromdata = fdata;
235: return(0);
236: }
237: #endif
239: /*@C
240: PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths)
242: Collective on MPI_Comm
244: Input Arguments:
245: + comm - communicator
246: . count - number of entries to send/receive (must match on all ranks)
247: . dtype - datatype to send/receive from each rank (must match on all ranks)
248: . nto - number of ranks to send data to
249: . toranks - ranks to send to (array of length nto)
250: - todata - data to send to each rank (packed)
252: Output Arguments:
253: + nfrom - number of ranks receiving messages from
254: . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
255: - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())
257: Level: developer
259: Options Database Keys:
260: . -build_twosided <allreduce|ibarrier|redscatter> - algorithm to set up two-sided communication
262: Notes:
263: This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
264: PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other constant-size data.
266: Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.
268: References:
269: . 1. - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in
270: Scalable communication protocols for dynamic sparse data exchange, 2010.
272: .seealso: PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
273: @*/
274: PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
275: {
276: PetscErrorCode ierr;
277: PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET;
280: PetscSysInitializePackage();
281: PetscLogEventBegin(PETSC_BuildTwoSided,0,0,0,0);
282: PetscCommBuildTwoSidedGetType(comm,&buildtype);
283: switch (buildtype) {
284: case PETSC_BUILDTWOSIDED_IBARRIER:
285: #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER)
286: PetscCommBuildTwoSided_Ibarrier(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);
287: #else
288: SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)");
289: #endif
290: break;
291: case PETSC_BUILDTWOSIDED_ALLREDUCE:
292: PetscCommBuildTwoSided_Allreduce(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);
293: break;
294: case PETSC_BUILDTWOSIDED_REDSCATTER:
295: #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK)
296: PetscCommBuildTwoSided_RedScatter(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);
297: #else
298: SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Reduce_scatter_block (part of MPI-2.2)");
299: #endif
300: break;
301: default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication");
302: }
303: PetscLogEventEnd(PETSC_BuildTwoSided,0,0,0,0);
304: return(0);
305: }
307: static PetscErrorCode PetscCommBuildTwoSidedFReq_Reference(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,
308: PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs,
309: PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
310: PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
311: {
313: PetscMPIInt i,*tag;
314: MPI_Aint lb,unitbytes;
315: MPI_Request *sendreq,*recvreq;
318: PetscMalloc1(ntags,&tag);
319: if (ntags > 0) {
320: PetscCommDuplicate(comm,&comm,&tag[0]);
321: }
322: for (i=1; i<ntags; i++) {
323: PetscCommGetNewTag(comm,&tag[i]);
324: }
326: /* Perform complete initial rendezvous */
327: PetscCommBuildTwoSided(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);
329: PetscMalloc1(nto*ntags,&sendreq);
330: PetscMalloc1(*nfrom*ntags,&recvreq);
332: MPI_Type_get_extent(dtype,&lb,&unitbytes);
333: if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
334: for (i=0; i<nto; i++) {
335: PetscMPIInt k;
336: for (k=0; k<ntags; k++) sendreq[i*ntags+k] = MPI_REQUEST_NULL;
337: (*send)(comm,tag,i,toranks[i],((char*)todata)+count*unitbytes*i,sendreq+i*ntags,ctx);
338: }
339: for (i=0; i<*nfrom; i++) {
340: void *header = (*(char**)fromdata) + count*unitbytes*i;
341: PetscMPIInt k;
342: for (k=0; k<ntags; k++) recvreq[i*ntags+k] = MPI_REQUEST_NULL;
343: (*recv)(comm,tag,(*fromranks)[i],header,recvreq+i*ntags,ctx);
344: }
345: PetscFree(tag);
346: PetscCommDestroy(&comm);
347: *toreqs = sendreq;
348: *fromreqs = recvreq;
349: return(0);
350: }
352: #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER)
354: static PetscErrorCode PetscCommBuildTwoSidedFReq_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,
355: PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs,
356: PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
357: PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
358: {
360: PetscMPIInt nrecvs,tag,*tags,done,i;
361: MPI_Aint lb,unitbytes;
362: char *tdata;
363: MPI_Request *sendreqs,*usendreqs,*req,barrier;
364: PetscSegBuffer segrank,segdata,segreq;
365: PetscBool barrier_started;
368: PetscCommDuplicate(comm,&comm,&tag);
369: PetscMalloc1(ntags,&tags);
370: for (i=0; i<ntags; i++) {
371: PetscCommGetNewTag(comm,&tags[i]);
372: }
373: MPI_Type_get_extent(dtype,&lb,&unitbytes);
374: if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
375: tdata = (char*)todata;
376: PetscMalloc1(nto,&sendreqs);
377: PetscMalloc1(nto*ntags,&usendreqs);
378: /* Post synchronous sends */
379: for (i=0; i<nto; i++) {
380: MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
381: }
382: /* Post actual payloads. These are typically larger messages. Hopefully sending these later does not slow down the
383: * synchronous messages above. */
384: for (i=0; i<nto; i++) {
385: PetscMPIInt k;
386: for (k=0; k<ntags; k++) usendreqs[i*ntags+k] = MPI_REQUEST_NULL;
387: (*send)(comm,tags,i,toranks[i],tdata+count*unitbytes*i,usendreqs+i*ntags,ctx);
388: }
390: PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);
391: PetscSegBufferCreate(unitbytes,4*count,&segdata);
392: PetscSegBufferCreate(sizeof(MPI_Request),4,&segreq);
394: nrecvs = 0;
395: barrier = MPI_REQUEST_NULL;
396: /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation,
397: * but we need to work around it. */
398: barrier_started = PETSC_FALSE;
399: for (done=0; !done; ) {
400: PetscMPIInt flag;
401: MPI_Status status;
402: MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);
403: if (flag) { /* incoming message */
404: PetscMPIInt *recvrank,k;
405: void *buf;
406: PetscSegBufferGet(segrank,1,&recvrank);
407: PetscSegBufferGet(segdata,count,&buf);
408: *recvrank = status.MPI_SOURCE;
409: MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);
410: PetscSegBufferGet(segreq,ntags,&req);
411: for (k=0; k<ntags; k++) req[k] = MPI_REQUEST_NULL;
412: (*recv)(comm,tags,status.MPI_SOURCE,buf,req,ctx);
413: nrecvs++;
414: }
415: if (!barrier_started) {
416: PetscMPIInt sent,nsends;
417: PetscMPIIntCast(nto,&nsends);
418: MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);
419: if (sent) {
420: #if defined(PETSC_HAVE_MPI_IBARRIER)
421: MPI_Ibarrier(comm,&barrier);
422: #elif defined(PETSC_HAVE_MPIX_IBARRIER)
423: MPIX_Ibarrier(comm,&barrier);
424: #endif
425: barrier_started = PETSC_TRUE;
426: }
427: } else {
428: MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);
429: }
430: }
431: *nfrom = nrecvs;
432: PetscSegBufferExtractAlloc(segrank,fromranks);
433: PetscSegBufferDestroy(&segrank);
434: PetscSegBufferExtractAlloc(segdata,fromdata);
435: PetscSegBufferDestroy(&segdata);
436: *toreqs = usendreqs;
437: PetscSegBufferExtractAlloc(segreq,fromreqs);
438: PetscSegBufferDestroy(&segreq);
439: PetscFree(sendreqs);
440: PetscFree(tags);
441: PetscCommDestroy(&comm);
442: return(0);
443: }
444: #endif
446: /*@C
447: PetscCommBuildTwoSidedF - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous
449: Collective on MPI_Comm
451: Input Arguments:
452: + comm - communicator
453: . count - number of entries to send/receive in initial rendezvous (must match on all ranks)
454: . dtype - datatype to send/receive from each rank (must match on all ranks)
455: . nto - number of ranks to send data to
456: . toranks - ranks to send to (array of length nto)
457: . todata - data to send to each rank (packed)
458: . ntags - number of tags needed by send/recv callbacks
459: . send - callback invoked on sending process when ready to send primary payload
460: . recv - callback invoked on receiving process after delivery of rendezvous message
461: - ctx - context for callbacks
463: Output Arguments:
464: + nfrom - number of ranks receiving messages from
465: . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
466: - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())
468: Level: developer
470: Notes:
471: This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
472: PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other data.
474: Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.
476: References:
477: . 1. - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in
478: Scalable communication protocols for dynamic sparse data exchange, 2010.
480: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedFReq(), PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
481: @*/
482: PetscErrorCode PetscCommBuildTwoSidedF(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,
483: PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
484: PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
485: {
487: MPI_Request *toreqs,*fromreqs;
490: PetscCommBuildTwoSidedFReq(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata,ntags,&toreqs,&fromreqs,send,recv,ctx);
491: MPI_Waitall(nto*ntags,toreqs,MPI_STATUSES_IGNORE);
492: MPI_Waitall(*nfrom*ntags,fromreqs,MPI_STATUSES_IGNORE);
493: PetscFree(toreqs);
494: PetscFree(fromreqs);
495: return(0);
496: }
498: /*@C
499: PetscCommBuildTwoSidedFReq - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous, returns requests
501: Collective on MPI_Comm
503: Input Arguments:
504: + comm - communicator
505: . count - number of entries to send/receive in initial rendezvous (must match on all ranks)
506: . dtype - datatype to send/receive from each rank (must match on all ranks)
507: . nto - number of ranks to send data to
508: . toranks - ranks to send to (array of length nto)
509: . todata - data to send to each rank (packed)
510: . ntags - number of tags needed by send/recv callbacks
511: . send - callback invoked on sending process when ready to send primary payload
512: . recv - callback invoked on receiving process after delivery of rendezvous message
513: - ctx - context for callbacks
515: Output Arguments:
516: + nfrom - number of ranks receiving messages from
517: . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
518: . fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())
519: . toreqs - array of nto*ntags sender requests (caller must wait on these, then PetscFree())
520: - fromreqs - array of nfrom*ntags receiver requests (caller must wait on these, then PetscFree())
522: Level: developer
524: Notes:
525: This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
526: PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other data.
528: Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.
530: References:
531: . 1. - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in
532: Scalable communication protocols for dynamic sparse data exchange, 2010.
534: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedF(), PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
535: @*/
536: PetscErrorCode PetscCommBuildTwoSidedFReq(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,
537: PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs,
538: PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
539: PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
540: {
541: PetscErrorCode ierr,(*f)(MPI_Comm,PetscMPIInt,MPI_Datatype,PetscMPIInt,const PetscMPIInt[],const void*,
542: PetscMPIInt*,PetscMPIInt**,void*,PetscMPIInt,MPI_Request**,MPI_Request**,
543: PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
544: PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx);
545: PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET;
546: PetscMPIInt i,size;
549: PetscSysInitializePackage();
550: MPI_Comm_size(comm,&size);
551: for (i=0; i<nto; i++) {
552: if (toranks[i] < 0 || size <= toranks[i]) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"toranks[%d] %d not in comm size %d",i,toranks[i],size);
553: }
554: PetscLogEventBegin(PETSC_BuildTwoSidedF,0,0,0,0);
555: PetscCommBuildTwoSidedGetType(comm,&buildtype);
556: switch (buildtype) {
557: case PETSC_BUILDTWOSIDED_IBARRIER:
558: #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER)
559: f = PetscCommBuildTwoSidedFReq_Ibarrier;
560: #else
561: SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)");
562: #endif
563: break;
564: case PETSC_BUILDTWOSIDED_ALLREDUCE:
565: case PETSC_BUILDTWOSIDED_REDSCATTER:
566: f = PetscCommBuildTwoSidedFReq_Reference;
567: break;
568: default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication");
569: }
570: (*f)(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata,ntags,toreqs,fromreqs,send,recv,ctx);
571: PetscLogEventEnd(PETSC_BuildTwoSidedF,0,0,0,0);
572: return(0);
573: }