Actual source code: mpits.c
petsc-3.7.7 2017-09-25
1: #include <petscsys.h> /*I "petscsys.h" I*/
3: PetscLogEvent PETSC_BuildTwoSided,PETSC_BuildTwoSidedF;
5: const char *const PetscBuildTwoSidedTypes[] = {
6: "ALLREDUCE",
7: "IBARRIER",
8: "REDSCATTER",
9: "PetscBuildTwoSidedType",
10: "PETSC_BUILDTWOSIDED_",
11: 0
12: };
14: static PetscBuildTwoSidedType _twosided_type = PETSC_BUILDTWOSIDED_NOTSET;
18: /*@
19: PetscCommBuildTwoSidedSetType - set algorithm to use when building two-sided communication
21: Logically Collective
23: Input Arguments:
24: + comm - PETSC_COMM_WORLD
25: - twosided - algorithm to use in subsequent calls to PetscCommBuildTwoSided()
27: Level: developer
29: Note:
30: This option is currently global, but could be made per-communicator.
32: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedGetType()
33: @*/
34: PetscErrorCode PetscCommBuildTwoSidedSetType(MPI_Comm comm,PetscBuildTwoSidedType twosided)
35: {
37: #if defined(PETSC_USE_DEBUG)
39: PetscMPIInt ierr;
40: PetscMPIInt b1[2],b2[2];
41: b1[0] = -(PetscMPIInt)twosided;
42: b1[1] = (PetscMPIInt)twosided;
43: MPIU_Allreduce(b1,b2,2,MPI_INT,MPI_MAX,comm);
44: if (-b2[0] != b2[1]) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"Enum value must be same on all processes");
45: }
46: #endif
47: _twosided_type = twosided;
48: return(0);
49: }
53: /*@
54: PetscCommBuildTwoSidedGetType - set algorithm to use when building two-sided communication
56: Logically Collective
58: Output Arguments:
59: + comm - communicator on which to query algorithm
60: - twosided - algorithm to use for PetscCommBuildTwoSided()
62: Level: developer
64: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedSetType()
65: @*/
66: PetscErrorCode PetscCommBuildTwoSidedGetType(MPI_Comm comm,PetscBuildTwoSidedType *twosided)
67: {
71: *twosided = PETSC_BUILDTWOSIDED_NOTSET;
72: if (_twosided_type == PETSC_BUILDTWOSIDED_NOTSET) {
73: #if defined(PETSC_HAVE_MPI_IBARRIER)
74: # if defined(PETSC_HAVE_MPICH_CH3_SOCK) && !defined(PETSC_HAVE_MPICH_CH3_SOCK_FIXED_NBC_PROGRESS)
75: /* Deadlock in Ibarrier: http://trac.mpich.org/projects/mpich/ticket/1785 */
76: _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE;
77: # else
78: _twosided_type = PETSC_BUILDTWOSIDED_IBARRIER;
79: # endif
80: #else
81: _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE;
82: #endif
83: PetscOptionsGetEnum(NULL,NULL,"-build_twosided",PetscBuildTwoSidedTypes,(PetscEnum*)&_twosided_type,NULL);
84: }
85: *twosided = _twosided_type;
86: return(0);
87: }
89: #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER)
93: static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
94: {
96: PetscMPIInt nrecvs,tag,done,i;
97: MPI_Aint lb,unitbytes;
98: char *tdata;
99: MPI_Request *sendreqs,barrier;
100: PetscSegBuffer segrank,segdata;
101: PetscBool barrier_started;
104: PetscCommDuplicate(comm,&comm,&tag);
105: MPI_Type_get_extent(dtype,&lb,&unitbytes);
106: if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
107: tdata = (char*)todata;
108: PetscMalloc1(nto,&sendreqs);
109: for (i=0; i<nto; i++) {
110: MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
111: }
112: PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);
113: PetscSegBufferCreate(unitbytes,4*count,&segdata);
115: nrecvs = 0;
116: barrier = MPI_REQUEST_NULL;
117: /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation,
118: * but we need to work around it. */
119: barrier_started = PETSC_FALSE;
120: for (done=0; !done; ) {
121: PetscMPIInt flag;
122: MPI_Status status;
123: MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);
124: if (flag) { /* incoming message */
125: PetscMPIInt *recvrank;
126: void *buf;
127: PetscSegBufferGet(segrank,1,&recvrank);
128: PetscSegBufferGet(segdata,count,&buf);
129: *recvrank = status.MPI_SOURCE;
130: MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);
131: nrecvs++;
132: }
133: if (!barrier_started) {
134: PetscMPIInt sent,nsends;
135: PetscMPIIntCast(nto,&nsends);
136: MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);
137: if (sent) {
138: #if defined(PETSC_HAVE_MPI_IBARRIER)
139: MPI_Ibarrier(comm,&barrier);
140: #elif defined(PETSC_HAVE_MPIX_IBARRIER)
141: MPIX_Ibarrier(comm,&barrier);
142: #endif
143: barrier_started = PETSC_TRUE;
144: PetscFree(sendreqs);
145: }
146: } else {
147: MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);
148: }
149: }
150: *nfrom = nrecvs;
151: PetscSegBufferExtractAlloc(segrank,fromranks);
152: PetscSegBufferDestroy(&segrank);
153: PetscSegBufferExtractAlloc(segdata,fromdata);
154: PetscSegBufferDestroy(&segdata);
155: PetscCommDestroy(&comm);
156: return(0);
157: }
158: #endif
162: static PetscErrorCode PetscCommBuildTwoSided_Allreduce(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
163: {
165: PetscMPIInt size,*iflags,nrecvs,tag,*franks,i;
166: MPI_Aint lb,unitbytes;
167: char *tdata,*fdata;
168: MPI_Request *reqs,*sendreqs;
169: MPI_Status *statuses;
172: MPI_Comm_size(comm,&size);
173: PetscCalloc1(size,&iflags);
174: for (i=0; i<nto; i++) iflags[toranks[i]] = 1;
175: PetscGatherNumberOfMessages(comm,iflags,NULL,&nrecvs);
176: PetscFree(iflags);
178: PetscCommDuplicate(comm,&comm,&tag);
179: MPI_Type_get_extent(dtype,&lb,&unitbytes);
180: if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
181: PetscMalloc(nrecvs*count*unitbytes,&fdata);
182: tdata = (char*)todata;
183: PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);
184: sendreqs = reqs + nrecvs;
185: for (i=0; i<nrecvs; i++) {
186: MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);
187: }
188: for (i=0; i<nto; i++) {
189: MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
190: }
191: MPI_Waitall(nto+nrecvs,reqs,statuses);
192: PetscMalloc1(nrecvs,&franks);
193: for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE;
194: PetscFree2(reqs,statuses);
195: PetscCommDestroy(&comm);
197: *nfrom = nrecvs;
198: *fromranks = franks;
199: *(void**)fromdata = fdata;
200: return(0);
201: }
203: #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK)
206: static PetscErrorCode PetscCommBuildTwoSided_RedScatter(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
207: {
209: PetscMPIInt size,*iflags,nrecvs,tag,*franks,i;
210: MPI_Aint lb,unitbytes;
211: char *tdata,*fdata;
212: MPI_Request *reqs,*sendreqs;
213: MPI_Status *statuses;
216: MPI_Comm_size(comm,&size);
217: PetscMalloc1(size,&iflags);
218: PetscMemzero(iflags,size*sizeof(*iflags));
219: for (i=0; i<nto; i++) iflags[toranks[i]] = 1;
220: MPI_Reduce_scatter_block(iflags,&nrecvs,1,MPI_INT,MPI_SUM,comm);
221: PetscFree(iflags);
223: PetscCommDuplicate(comm,&comm,&tag);
224: MPI_Type_get_extent(dtype,&lb,&unitbytes);
225: if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
226: PetscMalloc(nrecvs*count*unitbytes,&fdata);
227: tdata = (char*)todata;
228: PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);
229: sendreqs = reqs + nrecvs;
230: for (i=0; i<nrecvs; i++) {
231: MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);
232: }
233: for (i=0; i<nto; i++) {
234: MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
235: }
236: MPI_Waitall(nto+nrecvs,reqs,statuses);
237: PetscMalloc1(nrecvs,&franks);
238: for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE;
239: PetscFree2(reqs,statuses);
240: PetscCommDestroy(&comm);
242: *nfrom = nrecvs;
243: *fromranks = franks;
244: *(void**)fromdata = fdata;
245: return(0);
246: }
247: #endif
251: /*@C
252: PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths)
254: Collective on MPI_Comm
256: Input Arguments:
257: + comm - communicator
258: . count - number of entries to send/receive (must match on all ranks)
259: . dtype - datatype to send/receive from each rank (must match on all ranks)
260: . nto - number of ranks to send data to
261: . toranks - ranks to send to (array of length nto)
262: - todata - data to send to each rank (packed)
264: Output Arguments:
265: + nfrom - number of ranks receiving messages from
266: . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
267: - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())
269: Level: developer
271: Options Database Keys:
272: . -build_twosided <allreduce|ibarrier|redscatter> - algorithm to set up two-sided communication
274: Notes:
275: This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
276: PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other constant-size data.
278: Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.
280: References:
281: . 1. - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in
282: Scalable communication protocols for dynamic sparse data exchange, 2010.
284: .seealso: PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
285: @*/
286: PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
287: {
288: PetscErrorCode ierr;
289: PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET;
292: PetscSysInitializePackage();
293: PetscLogEventBegin(PETSC_BuildTwoSided,0,0,0,0);
294: PetscCommBuildTwoSidedGetType(comm,&buildtype);
295: switch (buildtype) {
296: case PETSC_BUILDTWOSIDED_IBARRIER:
297: #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER)
298: PetscCommBuildTwoSided_Ibarrier(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);
299: #else
300: SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)");
301: #endif
302: break;
303: case PETSC_BUILDTWOSIDED_ALLREDUCE:
304: PetscCommBuildTwoSided_Allreduce(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);
305: break;
306: case PETSC_BUILDTWOSIDED_REDSCATTER:
307: #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK)
308: PetscCommBuildTwoSided_RedScatter(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);
309: #else
310: SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Reduce_scatter_block (part of MPI-2.2)");
311: #endif
312: break;
313: default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication");
314: }
315: PetscLogEventEnd(PETSC_BuildTwoSided,0,0,0,0);
316: return(0);
317: }
321: static PetscErrorCode PetscCommBuildTwoSidedFReq_Reference(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,
322: PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs,
323: PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
324: PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
325: {
327: PetscMPIInt i,*tag;
328: MPI_Aint lb,unitbytes;
329: MPI_Request *sendreq,*recvreq;
332: PetscMalloc1(ntags,&tag);
333: if (ntags > 0) {
334: PetscCommDuplicate(comm,&comm,&tag[0]);
335: }
336: for (i=1; i<ntags; i++) {
337: PetscCommGetNewTag(comm,&tag[i]);
338: }
340: /* Perform complete initial rendezvous */
341: PetscCommBuildTwoSided(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);
343: PetscMalloc1(nto*ntags,&sendreq);
344: PetscMalloc1(*nfrom*ntags,&recvreq);
346: MPI_Type_get_extent(dtype,&lb,&unitbytes);
347: if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
348: for (i=0; i<nto; i++) {
349: PetscMPIInt k;
350: for (k=0; k<ntags; k++) sendreq[i*ntags+k] = MPI_REQUEST_NULL;
351: (*send)(comm,tag,i,toranks[i],((char*)todata)+count*unitbytes*i,sendreq+i*ntags,ctx);
352: }
353: for (i=0; i<*nfrom; i++) {
354: void *header = (*(char**)fromdata) + count*unitbytes*i;
355: PetscMPIInt k;
356: for (k=0; k<ntags; k++) recvreq[i*ntags+k] = MPI_REQUEST_NULL;
357: (*recv)(comm,tag,(*fromranks)[i],header,recvreq+i*ntags,ctx);
358: }
359: PetscFree(tag);
360: PetscCommDestroy(&comm);
361: *toreqs = sendreq;
362: *fromreqs = recvreq;
363: return(0);
364: }
366: #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER)
370: static PetscErrorCode PetscCommBuildTwoSidedFReq_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,
371: PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs,
372: PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
373: PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
374: {
376: PetscMPIInt nrecvs,tag,*tags,done,i;
377: MPI_Aint lb,unitbytes;
378: char *tdata;
379: MPI_Request *sendreqs,*usendreqs,*req,barrier;
380: PetscSegBuffer segrank,segdata,segreq;
381: PetscBool barrier_started;
384: PetscCommDuplicate(comm,&comm,&tag);
385: PetscMalloc1(ntags,&tags);
386: for (i=0; i<ntags; i++) {
387: PetscCommGetNewTag(comm,&tags[i]);
388: }
389: MPI_Type_get_extent(dtype,&lb,&unitbytes);
390: if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
391: tdata = (char*)todata;
392: PetscMalloc1(nto,&sendreqs);
393: PetscMalloc1(nto*ntags,&usendreqs);
394: /* Post synchronous sends */
395: for (i=0; i<nto; i++) {
396: MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
397: }
398: /* Post actual payloads. These are typically larger messages. Hopefully sending these later does not slow down the
399: * synchronous messages above. */
400: for (i=0; i<nto; i++) {
401: PetscMPIInt k;
402: for (k=0; k<ntags; k++) usendreqs[i*ntags+k] = MPI_REQUEST_NULL;
403: (*send)(comm,tags,i,toranks[i],tdata+count*unitbytes*i,usendreqs+i*ntags,ctx);
404: }
406: PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);
407: PetscSegBufferCreate(unitbytes,4*count,&segdata);
408: PetscSegBufferCreate(sizeof(MPI_Request),4,&segreq);
410: nrecvs = 0;
411: barrier = MPI_REQUEST_NULL;
412: /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation,
413: * but we need to work around it. */
414: barrier_started = PETSC_FALSE;
415: for (done=0; !done; ) {
416: PetscMPIInt flag;
417: MPI_Status status;
418: MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);
419: if (flag) { /* incoming message */
420: PetscMPIInt *recvrank,k;
421: void *buf;
422: PetscSegBufferGet(segrank,1,&recvrank);
423: PetscSegBufferGet(segdata,count,&buf);
424: *recvrank = status.MPI_SOURCE;
425: MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);
426: PetscSegBufferGet(segreq,ntags,&req);
427: for (k=0; k<ntags; k++) req[k] = MPI_REQUEST_NULL;
428: (*recv)(comm,tags,status.MPI_SOURCE,buf,req,ctx);
429: nrecvs++;
430: }
431: if (!barrier_started) {
432: PetscMPIInt sent,nsends;
433: PetscMPIIntCast(nto,&nsends);
434: MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);
435: if (sent) {
436: #if defined(PETSC_HAVE_MPI_IBARRIER)
437: MPI_Ibarrier(comm,&barrier);
438: #elif defined(PETSC_HAVE_MPIX_IBARRIER)
439: MPIX_Ibarrier(comm,&barrier);
440: #endif
441: barrier_started = PETSC_TRUE;
442: }
443: } else {
444: MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);
445: }
446: }
447: *nfrom = nrecvs;
448: PetscSegBufferExtractAlloc(segrank,fromranks);
449: PetscSegBufferDestroy(&segrank);
450: PetscSegBufferExtractAlloc(segdata,fromdata);
451: PetscSegBufferDestroy(&segdata);
452: *toreqs = usendreqs;
453: PetscSegBufferExtractAlloc(segreq,fromreqs);
454: PetscSegBufferDestroy(&segreq);
455: PetscFree(sendreqs);
456: PetscFree(tags);
457: PetscCommDestroy(&comm);
458: return(0);
459: }
460: #endif
464: /*@C
465: PetscCommBuildTwoSidedF - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous
467: Collective on MPI_Comm
469: Input Arguments:
470: + comm - communicator
471: . count - number of entries to send/receive in initial rendezvous (must match on all ranks)
472: . dtype - datatype to send/receive from each rank (must match on all ranks)
473: . nto - number of ranks to send data to
474: . toranks - ranks to send to (array of length nto)
475: . todata - data to send to each rank (packed)
476: . ntags - number of tags needed by send/recv callbacks
477: . send - callback invoked on sending process when ready to send primary payload
478: . recv - callback invoked on receiving process after delivery of rendezvous message
479: - ctx - context for callbacks
481: Output Arguments:
482: + nfrom - number of ranks receiving messages from
483: . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
484: - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())
486: Level: developer
488: Notes:
489: This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
490: PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other data.
492: Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.
494: References:
495: . 1. - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in
496: Scalable communication protocols for dynamic sparse data exchange, 2010.
498: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedFReq(), PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
499: @*/
500: PetscErrorCode PetscCommBuildTwoSidedF(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,
501: PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
502: PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
503: {
505: MPI_Request *toreqs,*fromreqs;
508: PetscCommBuildTwoSidedFReq(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata,ntags,&toreqs,&fromreqs,send,recv,ctx);
509: MPI_Waitall(nto*ntags,toreqs,MPI_STATUSES_IGNORE);
510: MPI_Waitall(*nfrom*ntags,fromreqs,MPI_STATUSES_IGNORE);
511: PetscFree(toreqs);
512: PetscFree(fromreqs);
513: return(0);
514: }
518: /*@C
519: PetscCommBuildTwoSidedFReq - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous, returns requests
521: Collective on MPI_Comm
523: Input Arguments:
524: + comm - communicator
525: . count - number of entries to send/receive in initial rendezvous (must match on all ranks)
526: . dtype - datatype to send/receive from each rank (must match on all ranks)
527: . nto - number of ranks to send data to
528: . toranks - ranks to send to (array of length nto)
529: . todata - data to send to each rank (packed)
530: . ntags - number of tags needed by send/recv callbacks
531: . send - callback invoked on sending process when ready to send primary payload
532: . recv - callback invoked on receiving process after delivery of rendezvous message
533: - ctx - context for callbacks
535: Output Arguments:
536: + nfrom - number of ranks receiving messages from
537: . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
538: . fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())
539: . toreqs - array of nto*ntags sender requests (caller must wait on these, then PetscFree())
540: - fromreqs - array of nfrom*ntags receiver requests (caller must wait on these, then PetscFree())
542: Level: developer
544: Notes:
545: This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
546: PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other data.
548: Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.
550: References:
551: . 1. - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in
552: Scalable communication protocols for dynamic sparse data exchange, 2010.
554: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedF(), PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
555: @*/
556: PetscErrorCode PetscCommBuildTwoSidedFReq(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,
557: PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs,
558: PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
559: PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
560: {
561: PetscErrorCode ierr,(*f)(MPI_Comm,PetscMPIInt,MPI_Datatype,PetscMPIInt,const PetscMPIInt[],const void*,
562: PetscMPIInt*,PetscMPIInt**,void*,PetscMPIInt,MPI_Request**,MPI_Request**,
563: PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
564: PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx);
565: PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET;
566: PetscMPIInt i,size;
569: PetscSysInitializePackage();
570: MPI_Comm_size(comm,&size);
571: for (i=0; i<nto; i++) {
572: if (toranks[i] < 0 || size <= toranks[i]) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"toranks[%d] %d not in comm size %d",i,toranks[i],size);
573: }
574: PetscLogEventBegin(PETSC_BuildTwoSidedF,0,0,0,0);
575: PetscCommBuildTwoSidedGetType(comm,&buildtype);
576: switch (buildtype) {
577: case PETSC_BUILDTWOSIDED_IBARRIER:
578: #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER)
579: f = PetscCommBuildTwoSidedFReq_Ibarrier;
580: #else
581: SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)");
582: #endif
583: break;
584: case PETSC_BUILDTWOSIDED_ALLREDUCE:
585: case PETSC_BUILDTWOSIDED_REDSCATTER:
586: f = PetscCommBuildTwoSidedFReq_Reference;
587: break;
588: default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication");
589: }
590: (*f)(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata,ntags,toreqs,fromreqs,send,recv,ctx);
591: PetscLogEventEnd(PETSC_BuildTwoSidedF,0,0,0,0);
592: return(0);
593: }