Actual source code: mpits.c
petsc-3.10.5 2019-03-28
1: #include <petscsys.h>
2: #include <petsc/private/petscimpl.h>
4: PetscLogEvent PETSC_BuildTwoSided;
5: PetscLogEvent PETSC_BuildTwoSidedF;
7: const char *const PetscBuildTwoSidedTypes[] = {
8: "ALLREDUCE",
9: "IBARRIER",
10: "REDSCATTER",
11: "PetscBuildTwoSidedType",
12: "PETSC_BUILDTWOSIDED_",
13: 0
14: };
16: static PetscBuildTwoSidedType _twosided_type = PETSC_BUILDTWOSIDED_NOTSET;
18: /*@
19: PetscCommBuildTwoSidedSetType - set algorithm to use when building two-sided communication
21: Logically Collective
23: Input Arguments:
24: + comm - PETSC_COMM_WORLD
25: - twosided - algorithm to use in subsequent calls to PetscCommBuildTwoSided()
27: Level: developer
29: Note:
30: This option is currently global, but could be made per-communicator.
32: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedGetType()
33: @*/
34: PetscErrorCode PetscCommBuildTwoSidedSetType(MPI_Comm comm,PetscBuildTwoSidedType twosided)
35: {
37: #if defined(PETSC_USE_DEBUG)
39: PetscMPIInt ierr;
40: PetscMPIInt b1[2],b2[2];
41: b1[0] = -(PetscMPIInt)twosided;
42: b1[1] = (PetscMPIInt)twosided;
43: MPIU_Allreduce(b1,b2,2,MPI_INT,MPI_MAX,comm);
44: if (-b2[0] != b2[1]) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"Enum value must be same on all processes");
45: }
46: #endif
47: _twosided_type = twosided;
48: return(0);
49: }
51: /*@
52: PetscCommBuildTwoSidedGetType - set algorithm to use when building two-sided communication
54: Logically Collective
56: Output Arguments:
57: + comm - communicator on which to query algorithm
58: - twosided - algorithm to use for PetscCommBuildTwoSided()
60: Level: developer
62: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedSetType()
63: @*/
64: PetscErrorCode PetscCommBuildTwoSidedGetType(MPI_Comm comm,PetscBuildTwoSidedType *twosided)
65: {
69: *twosided = PETSC_BUILDTWOSIDED_NOTSET;
70: if (_twosided_type == PETSC_BUILDTWOSIDED_NOTSET) {
71: #if defined(PETSC_HAVE_MPI_IBARRIER)
72: # if defined(PETSC_HAVE_MPICH_CH3_SOCK) && !defined(PETSC_HAVE_MPICH_CH3_SOCK_FIXED_NBC_PROGRESS)
73: /* Deadlock in Ibarrier: http://trac.mpich.org/projects/mpich/ticket/1785 */
74: _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE;
75: # else
76: _twosided_type = PETSC_BUILDTWOSIDED_IBARRIER;
77: # endif
78: #else
79: _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE;
80: #endif
81: PetscOptionsGetEnum(NULL,NULL,"-build_twosided",PetscBuildTwoSidedTypes,(PetscEnum*)&_twosided_type,NULL);
82: }
83: *twosided = _twosided_type;
84: return(0);
85: }
87: #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER)
89: static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
90: {
92: PetscMPIInt nrecvs,tag,done,i;
93: MPI_Aint lb,unitbytes;
94: char *tdata;
95: MPI_Request *sendreqs,barrier;
96: PetscSegBuffer segrank,segdata;
97: PetscBool barrier_started;
100: PetscCommDuplicate(comm,&comm,&tag);
101: MPI_Type_get_extent(dtype,&lb,&unitbytes);
102: if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
103: tdata = (char*)todata;
104: PetscMalloc1(nto,&sendreqs);
105: for (i=0; i<nto; i++) {
106: MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
107: }
108: PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);
109: PetscSegBufferCreate(unitbytes,4*count,&segdata);
111: nrecvs = 0;
112: barrier = MPI_REQUEST_NULL;
113: /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation,
114: * but we need to work around it. */
115: barrier_started = PETSC_FALSE;
116: for (done=0; !done; ) {
117: PetscMPIInt flag;
118: MPI_Status status;
119: MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);
120: if (flag) { /* incoming message */
121: PetscMPIInt *recvrank;
122: void *buf;
123: PetscSegBufferGet(segrank,1,&recvrank);
124: PetscSegBufferGet(segdata,count,&buf);
125: *recvrank = status.MPI_SOURCE;
126: MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);
127: nrecvs++;
128: }
129: if (!barrier_started) {
130: PetscMPIInt sent,nsends;
131: PetscMPIIntCast(nto,&nsends);
132: MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);
133: if (sent) {
134: #if defined(PETSC_HAVE_MPI_IBARRIER)
135: MPI_Ibarrier(comm,&barrier);
136: #elif defined(PETSC_HAVE_MPIX_IBARRIER)
137: MPIX_Ibarrier(comm,&barrier);
138: #endif
139: barrier_started = PETSC_TRUE;
140: PetscFree(sendreqs);
141: }
142: } else {
143: MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);
144: }
145: }
146: *nfrom = nrecvs;
147: PetscSegBufferExtractAlloc(segrank,fromranks);
148: PetscSegBufferDestroy(&segrank);
149: PetscSegBufferExtractAlloc(segdata,fromdata);
150: PetscSegBufferDestroy(&segdata);
151: PetscCommDestroy(&comm);
152: return(0);
153: }
154: #endif
156: static PetscErrorCode PetscCommBuildTwoSided_Allreduce(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
157: {
159: PetscMPIInt size,*iflags,nrecvs,tag,*franks,i;
160: MPI_Aint lb,unitbytes;
161: char *tdata,*fdata;
162: MPI_Request *reqs,*sendreqs;
163: MPI_Status *statuses;
166: MPI_Comm_size(comm,&size);
167: PetscCalloc1(size,&iflags);
168: for (i=0; i<nto; i++) iflags[toranks[i]] = 1;
169: PetscGatherNumberOfMessages(comm,iflags,NULL,&nrecvs);
170: PetscFree(iflags);
172: PetscCommDuplicate(comm,&comm,&tag);
173: MPI_Type_get_extent(dtype,&lb,&unitbytes);
174: if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
175: PetscMalloc(nrecvs*count*unitbytes,&fdata);
176: tdata = (char*)todata;
177: PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);
178: sendreqs = reqs + nrecvs;
179: for (i=0; i<nrecvs; i++) {
180: MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);
181: }
182: for (i=0; i<nto; i++) {
183: MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
184: }
185: MPI_Waitall(nto+nrecvs,reqs,statuses);
186: PetscMalloc1(nrecvs,&franks);
187: for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE;
188: PetscFree2(reqs,statuses);
189: PetscCommDestroy(&comm);
191: *nfrom = nrecvs;
192: *fromranks = franks;
193: *(void**)fromdata = fdata;
194: return(0);
195: }
197: #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK)
198: static PetscErrorCode PetscCommBuildTwoSided_RedScatter(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
199: {
201: PetscMPIInt size,*iflags,nrecvs,tag,*franks,i;
202: MPI_Aint lb,unitbytes;
203: char *tdata,*fdata;
204: MPI_Request *reqs,*sendreqs;
205: MPI_Status *statuses;
208: MPI_Comm_size(comm,&size);
209: PetscMalloc1(size,&iflags);
210: PetscMemzero(iflags,size*sizeof(*iflags));
211: for (i=0; i<nto; i++) iflags[toranks[i]] = 1;
212: MPI_Reduce_scatter_block(iflags,&nrecvs,1,MPI_INT,MPI_SUM,comm);
213: PetscFree(iflags);
215: PetscCommDuplicate(comm,&comm,&tag);
216: MPI_Type_get_extent(dtype,&lb,&unitbytes);
217: if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
218: PetscMalloc(nrecvs*count*unitbytes,&fdata);
219: tdata = (char*)todata;
220: PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);
221: sendreqs = reqs + nrecvs;
222: for (i=0; i<nrecvs; i++) {
223: MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);
224: }
225: for (i=0; i<nto; i++) {
226: MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
227: }
228: MPI_Waitall(nto+nrecvs,reqs,statuses);
229: PetscMalloc1(nrecvs,&franks);
230: for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE;
231: PetscFree2(reqs,statuses);
232: PetscCommDestroy(&comm);
234: *nfrom = nrecvs;
235: *fromranks = franks;
236: *(void**)fromdata = fdata;
237: return(0);
238: }
239: #endif
241: /*@C
242: PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths)
244: Collective on MPI_Comm
246: Input Arguments:
247: + comm - communicator
248: . count - number of entries to send/receive (must match on all ranks)
249: . dtype - datatype to send/receive from each rank (must match on all ranks)
250: . nto - number of ranks to send data to
251: . toranks - ranks to send to (array of length nto)
252: - todata - data to send to each rank (packed)
254: Output Arguments:
255: + nfrom - number of ranks receiving messages from
256: . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
257: - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())
259: Level: developer
261: Options Database Keys:
262: . -build_twosided <allreduce|ibarrier|redscatter> - algorithm to set up two-sided communication
264: Notes:
265: This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
266: PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other constant-size data.
268: Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.
270: References:
271: . 1. - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in
272: Scalable communication protocols for dynamic sparse data exchange, 2010.
274: .seealso: PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
275: @*/
276: PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
277: {
278: PetscErrorCode ierr;
279: PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET;
282: PetscSysInitializePackage();
283: PetscLogEventBegin(PETSC_BuildTwoSided,0,0,0,0);
284: PetscCommBuildTwoSidedGetType(comm,&buildtype);
285: switch (buildtype) {
286: case PETSC_BUILDTWOSIDED_IBARRIER:
287: #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER)
288: PetscCommBuildTwoSided_Ibarrier(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);
289: #else
290: SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)");
291: #endif
292: break;
293: case PETSC_BUILDTWOSIDED_ALLREDUCE:
294: PetscCommBuildTwoSided_Allreduce(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);
295: break;
296: case PETSC_BUILDTWOSIDED_REDSCATTER:
297: #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK)
298: PetscCommBuildTwoSided_RedScatter(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);
299: #else
300: SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Reduce_scatter_block (part of MPI-2.2)");
301: #endif
302: break;
303: default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication");
304: }
305: PetscLogEventEnd(PETSC_BuildTwoSided,0,0,0,0);
306: return(0);
307: }
309: static PetscErrorCode PetscCommBuildTwoSidedFReq_Reference(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,
310: PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs,
311: PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
312: PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
313: {
315: PetscMPIInt i,*tag;
316: MPI_Aint lb,unitbytes;
317: MPI_Request *sendreq,*recvreq;
320: PetscMalloc1(ntags,&tag);
321: if (ntags > 0) {
322: PetscCommDuplicate(comm,&comm,&tag[0]);
323: }
324: for (i=1; i<ntags; i++) {
325: PetscCommGetNewTag(comm,&tag[i]);
326: }
328: /* Perform complete initial rendezvous */
329: PetscCommBuildTwoSided(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);
331: PetscMalloc1(nto*ntags,&sendreq);
332: PetscMalloc1(*nfrom*ntags,&recvreq);
334: MPI_Type_get_extent(dtype,&lb,&unitbytes);
335: if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
336: for (i=0; i<nto; i++) {
337: PetscMPIInt k;
338: for (k=0; k<ntags; k++) sendreq[i*ntags+k] = MPI_REQUEST_NULL;
339: (*send)(comm,tag,i,toranks[i],((char*)todata)+count*unitbytes*i,sendreq+i*ntags,ctx);
340: }
341: for (i=0; i<*nfrom; i++) {
342: void *header = (*(char**)fromdata) + count*unitbytes*i;
343: PetscMPIInt k;
344: for (k=0; k<ntags; k++) recvreq[i*ntags+k] = MPI_REQUEST_NULL;
345: (*recv)(comm,tag,(*fromranks)[i],header,recvreq+i*ntags,ctx);
346: }
347: PetscFree(tag);
348: PetscCommDestroy(&comm);
349: *toreqs = sendreq;
350: *fromreqs = recvreq;
351: return(0);
352: }
354: #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER)
356: static PetscErrorCode PetscCommBuildTwoSidedFReq_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,
357: PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs,
358: PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
359: PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
360: {
362: PetscMPIInt nrecvs,tag,*tags,done,i;
363: MPI_Aint lb,unitbytes;
364: char *tdata;
365: MPI_Request *sendreqs,*usendreqs,*req,barrier;
366: PetscSegBuffer segrank,segdata,segreq;
367: PetscBool barrier_started;
370: PetscCommDuplicate(comm,&comm,&tag);
371: PetscMalloc1(ntags,&tags);
372: for (i=0; i<ntags; i++) {
373: PetscCommGetNewTag(comm,&tags[i]);
374: }
375: MPI_Type_get_extent(dtype,&lb,&unitbytes);
376: if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
377: tdata = (char*)todata;
378: PetscMalloc1(nto,&sendreqs);
379: PetscMalloc1(nto*ntags,&usendreqs);
380: /* Post synchronous sends */
381: for (i=0; i<nto; i++) {
382: MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
383: }
384: /* Post actual payloads. These are typically larger messages. Hopefully sending these later does not slow down the
385: * synchronous messages above. */
386: for (i=0; i<nto; i++) {
387: PetscMPIInt k;
388: for (k=0; k<ntags; k++) usendreqs[i*ntags+k] = MPI_REQUEST_NULL;
389: (*send)(comm,tags,i,toranks[i],tdata+count*unitbytes*i,usendreqs+i*ntags,ctx);
390: }
392: PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);
393: PetscSegBufferCreate(unitbytes,4*count,&segdata);
394: PetscSegBufferCreate(sizeof(MPI_Request),4,&segreq);
396: nrecvs = 0;
397: barrier = MPI_REQUEST_NULL;
398: /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation,
399: * but we need to work around it. */
400: barrier_started = PETSC_FALSE;
401: for (done=0; !done; ) {
402: PetscMPIInt flag;
403: MPI_Status status;
404: MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);
405: if (flag) { /* incoming message */
406: PetscMPIInt *recvrank,k;
407: void *buf;
408: PetscSegBufferGet(segrank,1,&recvrank);
409: PetscSegBufferGet(segdata,count,&buf);
410: *recvrank = status.MPI_SOURCE;
411: MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);
412: PetscSegBufferGet(segreq,ntags,&req);
413: for (k=0; k<ntags; k++) req[k] = MPI_REQUEST_NULL;
414: (*recv)(comm,tags,status.MPI_SOURCE,buf,req,ctx);
415: nrecvs++;
416: }
417: if (!barrier_started) {
418: PetscMPIInt sent,nsends;
419: PetscMPIIntCast(nto,&nsends);
420: MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);
421: if (sent) {
422: #if defined(PETSC_HAVE_MPI_IBARRIER)
423: MPI_Ibarrier(comm,&barrier);
424: #elif defined(PETSC_HAVE_MPIX_IBARRIER)
425: MPIX_Ibarrier(comm,&barrier);
426: #endif
427: barrier_started = PETSC_TRUE;
428: }
429: } else {
430: MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);
431: }
432: }
433: *nfrom = nrecvs;
434: PetscSegBufferExtractAlloc(segrank,fromranks);
435: PetscSegBufferDestroy(&segrank);
436: PetscSegBufferExtractAlloc(segdata,fromdata);
437: PetscSegBufferDestroy(&segdata);
438: *toreqs = usendreqs;
439: PetscSegBufferExtractAlloc(segreq,fromreqs);
440: PetscSegBufferDestroy(&segreq);
441: PetscFree(sendreqs);
442: PetscFree(tags);
443: PetscCommDestroy(&comm);
444: return(0);
445: }
446: #endif
448: /*@C
449: PetscCommBuildTwoSidedF - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous
451: Collective on MPI_Comm
453: Input Arguments:
454: + comm - communicator
455: . count - number of entries to send/receive in initial rendezvous (must match on all ranks)
456: . dtype - datatype to send/receive from each rank (must match on all ranks)
457: . nto - number of ranks to send data to
458: . toranks - ranks to send to (array of length nto)
459: . todata - data to send to each rank (packed)
460: . ntags - number of tags needed by send/recv callbacks
461: . send - callback invoked on sending process when ready to send primary payload
462: . recv - callback invoked on receiving process after delivery of rendezvous message
463: - ctx - context for callbacks
465: Output Arguments:
466: + nfrom - number of ranks receiving messages from
467: . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
468: - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())
470: Level: developer
472: Notes:
473: This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
474: PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other data.
476: Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.
478: References:
479: . 1. - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in
480: Scalable communication protocols for dynamic sparse data exchange, 2010.
482: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedFReq(), PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
483: @*/
484: PetscErrorCode PetscCommBuildTwoSidedF(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,
485: PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
486: PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
487: {
489: MPI_Request *toreqs,*fromreqs;
492: PetscCommBuildTwoSidedFReq(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata,ntags,&toreqs,&fromreqs,send,recv,ctx);
493: MPI_Waitall(nto*ntags,toreqs,MPI_STATUSES_IGNORE);
494: MPI_Waitall(*nfrom*ntags,fromreqs,MPI_STATUSES_IGNORE);
495: PetscFree(toreqs);
496: PetscFree(fromreqs);
497: return(0);
498: }
500: /*@C
501: PetscCommBuildTwoSidedFReq - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous, returns requests
503: Collective on MPI_Comm
505: Input Arguments:
506: + comm - communicator
507: . count - number of entries to send/receive in initial rendezvous (must match on all ranks)
508: . dtype - datatype to send/receive from each rank (must match on all ranks)
509: . nto - number of ranks to send data to
510: . toranks - ranks to send to (array of length nto)
511: . todata - data to send to each rank (packed)
512: . ntags - number of tags needed by send/recv callbacks
513: . send - callback invoked on sending process when ready to send primary payload
514: . recv - callback invoked on receiving process after delivery of rendezvous message
515: - ctx - context for callbacks
517: Output Arguments:
518: + nfrom - number of ranks receiving messages from
519: . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
520: . fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())
521: . toreqs - array of nto*ntags sender requests (caller must wait on these, then PetscFree())
522: - fromreqs - array of nfrom*ntags receiver requests (caller must wait on these, then PetscFree())
524: Level: developer
526: Notes:
527: This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
528: PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other data.
530: Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.
532: References:
533: . 1. - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in
534: Scalable communication protocols for dynamic sparse data exchange, 2010.
536: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedF(), PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
537: @*/
538: PetscErrorCode PetscCommBuildTwoSidedFReq(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,
539: PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs,
540: PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
541: PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
542: {
543: PetscErrorCode ierr,(*f)(MPI_Comm,PetscMPIInt,MPI_Datatype,PetscMPIInt,const PetscMPIInt[],const void*,
544: PetscMPIInt*,PetscMPIInt**,void*,PetscMPIInt,MPI_Request**,MPI_Request**,
545: PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
546: PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx);
547: PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET;
548: PetscMPIInt i,size;
551: PetscSysInitializePackage();
552: MPI_Comm_size(comm,&size);
553: for (i=0; i<nto; i++) {
554: if (toranks[i] < 0 || size <= toranks[i]) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"toranks[%d] %d not in comm size %d",i,toranks[i],size);
555: }
556: PetscLogEventBegin(PETSC_BuildTwoSidedF,0,0,0,0);
557: PetscCommBuildTwoSidedGetType(comm,&buildtype);
558: switch (buildtype) {
559: case PETSC_BUILDTWOSIDED_IBARRIER:
560: #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER)
561: f = PetscCommBuildTwoSidedFReq_Ibarrier;
562: #else
563: SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)");
564: #endif
565: break;
566: case PETSC_BUILDTWOSIDED_ALLREDUCE:
567: case PETSC_BUILDTWOSIDED_REDSCATTER:
568: f = PetscCommBuildTwoSidedFReq_Reference;
569: break;
570: default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication");
571: }
572: (*f)(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata,ntags,toreqs,fromreqs,send,recv,ctx);
573: PetscLogEventEnd(PETSC_BuildTwoSidedF,0,0,0,0);
574: return(0);
575: }