Actual source code: mpits.c
petsc-3.14.6 2021-03-30
1: #include <petscsys.h>
2: #include <petsc/private/petscimpl.h>
4: PetscLogEvent PETSC_BuildTwoSided;
5: PetscLogEvent PETSC_BuildTwoSidedF;
7: const char *const PetscBuildTwoSidedTypes[] = {
8: "ALLREDUCE",
9: "IBARRIER",
10: "REDSCATTER",
11: "PetscBuildTwoSidedType",
12: "PETSC_BUILDTWOSIDED_",
13: NULL
14: };
16: static PetscBuildTwoSidedType _twosided_type = PETSC_BUILDTWOSIDED_NOTSET;
18: /*@
19: PetscCommBuildTwoSidedSetType - set algorithm to use when building two-sided communication
21: Logically Collective
23: Input Arguments:
24: + comm - PETSC_COMM_WORLD
25: - twosided - algorithm to use in subsequent calls to PetscCommBuildTwoSided()
27: Level: developer
29: Note:
30: This option is currently global, but could be made per-communicator.
32: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedGetType()
33: @*/
34: PetscErrorCode PetscCommBuildTwoSidedSetType(MPI_Comm comm,PetscBuildTwoSidedType twosided)
35: {
38: PetscMPIInt ierr;
39: PetscMPIInt b1[2],b2[2];
40: b1[0] = -(PetscMPIInt)twosided;
41: b1[1] = (PetscMPIInt)twosided;
42: MPIU_Allreduce(b1,b2,2,MPI_INT,MPI_MAX,comm);
43: if (-b2[0] != b2[1]) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"Enum value must be same on all processes");
44: }
45: _twosided_type = twosided;
46: return(0);
47: }
49: /*@
50: PetscCommBuildTwoSidedGetType - set algorithm to use when building two-sided communication
52: Logically Collective
54: Output Arguments:
55: + comm - communicator on which to query algorithm
56: - twosided - algorithm to use for PetscCommBuildTwoSided()
58: Level: developer
60: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedSetType()
61: @*/
62: PetscErrorCode PetscCommBuildTwoSidedGetType(MPI_Comm comm,PetscBuildTwoSidedType *twosided)
63: {
65: PetscMPIInt size;
68: *twosided = PETSC_BUILDTWOSIDED_NOTSET;
69: if (_twosided_type == PETSC_BUILDTWOSIDED_NOTSET) {
70: MPI_Comm_size(comm,&size);
71: _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE; /* default for small comms, see https://gitlab.com/petsc/petsc/-/merge_requests/2611 */
72: #if defined(PETSC_HAVE_MPI_IBARRIER)
73: if (size > 1024) _twosided_type = PETSC_BUILDTWOSIDED_IBARRIER;
74: #endif
75: PetscOptionsGetEnum(NULL,NULL,"-build_twosided",PetscBuildTwoSidedTypes,(PetscEnum*)&_twosided_type,NULL);
76: }
77: *twosided = _twosided_type;
78: return(0);
79: }
81: #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER)
83: static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
84: {
86: PetscMPIInt nrecvs,tag,done,i;
87: MPI_Aint lb,unitbytes;
88: char *tdata;
89: MPI_Request *sendreqs,barrier;
90: PetscSegBuffer segrank,segdata;
91: PetscBool barrier_started;
94: PetscCommDuplicate(comm,&comm,&tag);
95: MPI_Type_get_extent(dtype,&lb,&unitbytes);
96: if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
97: tdata = (char*)todata;
98: PetscMalloc1(nto,&sendreqs);
99: for (i=0; i<nto; i++) {
100: MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
101: }
102: PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);
103: PetscSegBufferCreate(unitbytes,4*count,&segdata);
105: nrecvs = 0;
106: barrier = MPI_REQUEST_NULL;
107: /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation,
108: * but we need to work around it. */
109: barrier_started = PETSC_FALSE;
110: for (done=0; !done;) {
111: PetscMPIInt flag;
112: MPI_Status status;
113: MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);
114: if (flag) { /* incoming message */
115: PetscMPIInt *recvrank;
116: void *buf;
117: PetscSegBufferGet(segrank,1,&recvrank);
118: PetscSegBufferGet(segdata,count,&buf);
119: *recvrank = status.MPI_SOURCE;
120: MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);
121: nrecvs++;
122: }
123: if (!barrier_started) {
124: PetscMPIInt sent,nsends;
125: PetscMPIIntCast(nto,&nsends);
126: MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);
127: if (sent) {
128: #if defined(PETSC_HAVE_MPI_IBARRIER)
129: MPI_Ibarrier(comm,&barrier);
130: #elif defined(PETSC_HAVE_MPIX_IBARRIER)
131: MPIX_Ibarrier(comm,&barrier);
132: #endif
133: barrier_started = PETSC_TRUE;
134: PetscFree(sendreqs);
135: }
136: } else {
137: MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);
138: }
139: }
140: *nfrom = nrecvs;
141: PetscSegBufferExtractAlloc(segrank,fromranks);
142: PetscSegBufferDestroy(&segrank);
143: PetscSegBufferExtractAlloc(segdata,fromdata);
144: PetscSegBufferDestroy(&segdata);
145: PetscCommDestroy(&comm);
146: return(0);
147: }
148: #endif
150: static PetscErrorCode PetscCommBuildTwoSided_Allreduce(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
151: {
152: PetscErrorCode ierr;
153: PetscMPIInt size,rank,*iflags,nrecvs,tag,*franks,i,flg;
154: MPI_Aint lb,unitbytes;
155: char *tdata,*fdata;
156: MPI_Request *reqs,*sendreqs;
157: MPI_Status *statuses;
158: PetscCommCounter *counter;
161: MPI_Comm_size(comm,&size);
162: MPI_Comm_rank(comm,&rank);
163: PetscCommDuplicate(comm,&comm,&tag);
164: MPI_Comm_get_attr(comm,Petsc_Counter_keyval,&counter,&flg);
165: if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Inner PETSc communicator does not have its tag/name counter attribute set");
166: if (!counter->iflags) {
167: PetscCalloc1(size,&counter->iflags);
168: iflags = counter->iflags;
169: } else {
170: iflags = counter->iflags;
171: PetscArrayzero(iflags,size);
172: }
173: for (i=0; i<nto; i++) iflags[toranks[i]] = 1;
174: MPIU_Allreduce(MPI_IN_PLACE,iflags,size,MPI_INT,MPI_SUM,comm);
175: nrecvs = iflags[rank];
176: MPI_Type_get_extent(dtype,&lb,&unitbytes);
177: if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
178: PetscMalloc(nrecvs*count*unitbytes,&fdata);
179: tdata = (char*)todata;
180: PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);
181: sendreqs = reqs + nrecvs;
182: for (i=0; i<nrecvs; i++) {
183: MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);
184: }
185: for (i=0; i<nto; i++) {
186: MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
187: }
188: MPI_Waitall(nto+nrecvs,reqs,statuses);
189: PetscMalloc1(nrecvs,&franks);
190: for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE;
191: PetscFree2(reqs,statuses);
192: PetscCommDestroy(&comm);
194: *nfrom = nrecvs;
195: *fromranks = franks;
196: *(void**)fromdata = fdata;
197: return(0);
198: }
200: #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK)
201: static PetscErrorCode PetscCommBuildTwoSided_RedScatter(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
202: {
204: PetscMPIInt size,*iflags,nrecvs,tag,*franks,i,flg;
205: MPI_Aint lb,unitbytes;
206: char *tdata,*fdata;
207: MPI_Request *reqs,*sendreqs;
208: MPI_Status *statuses;
209: PetscCommCounter *counter;
212: MPI_Comm_size(comm,&size);
213: PetscCommDuplicate(comm,&comm,&tag);
214: MPI_Comm_get_attr(comm,Petsc_Counter_keyval,&counter,&flg);
215: if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Inner PETSc communicator does not have its tag/name counter attribute set");
216: if (!counter->iflags) {
217: PetscCalloc1(size,&counter->iflags);
218: iflags = counter->iflags;
219: } else {
220: iflags = counter->iflags;
221: PetscArrayzero(iflags,size);
222: }
223: for (i=0; i<nto; i++) iflags[toranks[i]] = 1;
224: MPI_Reduce_scatter_block(iflags,&nrecvs,1,MPI_INT,MPI_SUM,comm);
225: MPI_Type_get_extent(dtype,&lb,&unitbytes);
226: if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
227: PetscMalloc(nrecvs*count*unitbytes,&fdata);
228: tdata = (char*)todata;
229: PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);
230: sendreqs = reqs + nrecvs;
231: for (i=0; i<nrecvs; i++) {
232: MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);
233: }
234: for (i=0; i<nto; i++) {
235: MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
236: }
237: MPI_Waitall(nto+nrecvs,reqs,statuses);
238: PetscMalloc1(nrecvs,&franks);
239: for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE;
240: PetscFree2(reqs,statuses);
241: PetscCommDestroy(&comm);
243: *nfrom = nrecvs;
244: *fromranks = franks;
245: *(void**)fromdata = fdata;
246: return(0);
247: }
248: #endif
250: /*@C
251: PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths)
253: Collective
255: Input Arguments:
256: + comm - communicator
257: . count - number of entries to send/receive (must match on all ranks)
258: . dtype - datatype to send/receive from each rank (must match on all ranks)
259: . nto - number of ranks to send data to
260: . toranks - ranks to send to (array of length nto)
261: - todata - data to send to each rank (packed)
263: Output Arguments:
264: + nfrom - number of ranks receiving messages from
265: . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
266: - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())
268: Level: developer
270: Options Database Keys:
271: . -build_twosided <allreduce|ibarrier|redscatter> - algorithm to set up two-sided communication. Default is allreduce for communicators with <= 1024 ranks, otherwise ibarrier.
273: Notes:
274: This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
275: PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other constant-size data.
277: Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.
279: References:
280: . 1. - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in
281: Scalable communication protocols for dynamic sparse data exchange, 2010.
283: .seealso: PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
284: @*/
285: PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
286: {
287: PetscErrorCode ierr;
288: PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET;
291: PetscSysInitializePackage();
292: PetscLogEventSync(PETSC_BuildTwoSided,comm);
293: PetscLogEventBegin(PETSC_BuildTwoSided,0,0,0,0);
294: PetscCommBuildTwoSidedGetType(comm,&buildtype);
295: switch (buildtype) {
296: case PETSC_BUILDTWOSIDED_IBARRIER:
297: #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER)
298: PetscCommBuildTwoSided_Ibarrier(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);
299: break;
300: #else
301: SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)");
302: #endif
303: case PETSC_BUILDTWOSIDED_ALLREDUCE:
304: PetscCommBuildTwoSided_Allreduce(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);
305: break;
306: case PETSC_BUILDTWOSIDED_REDSCATTER:
307: #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK)
308: PetscCommBuildTwoSided_RedScatter(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);
309: break;
310: #else
311: SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Reduce_scatter_block (part of MPI-2.2)");
312: #endif
313: default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication");
314: }
315: PetscLogEventEnd(PETSC_BuildTwoSided,0,0,0,0);
316: return(0);
317: }
319: static PetscErrorCode PetscCommBuildTwoSidedFReq_Reference(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,
320: PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs,
321: PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
322: PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
323: {
325: PetscMPIInt i,*tag;
326: MPI_Aint lb,unitbytes;
327: MPI_Request *sendreq,*recvreq;
330: PetscMalloc1(ntags,&tag);
331: if (ntags > 0) {
332: PetscCommDuplicate(comm,&comm,&tag[0]);
333: }
334: for (i=1; i<ntags; i++) {
335: PetscCommGetNewTag(comm,&tag[i]);
336: }
338: /* Perform complete initial rendezvous */
339: PetscCommBuildTwoSided(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);
341: PetscMalloc1(nto*ntags,&sendreq);
342: PetscMalloc1(*nfrom*ntags,&recvreq);
344: MPI_Type_get_extent(dtype,&lb,&unitbytes);
345: if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
346: for (i=0; i<nto; i++) {
347: PetscMPIInt k;
348: for (k=0; k<ntags; k++) sendreq[i*ntags+k] = MPI_REQUEST_NULL;
349: (*send)(comm,tag,i,toranks[i],((char*)todata)+count*unitbytes*i,sendreq+i*ntags,ctx);
350: }
351: for (i=0; i<*nfrom; i++) {
352: void *header = (*(char**)fromdata) + count*unitbytes*i;
353: PetscMPIInt k;
354: for (k=0; k<ntags; k++) recvreq[i*ntags+k] = MPI_REQUEST_NULL;
355: (*recv)(comm,tag,(*fromranks)[i],header,recvreq+i*ntags,ctx);
356: }
357: PetscFree(tag);
358: PetscCommDestroy(&comm);
359: *toreqs = sendreq;
360: *fromreqs = recvreq;
361: return(0);
362: }
364: #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER)
366: static PetscErrorCode PetscCommBuildTwoSidedFReq_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,
367: PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs,
368: PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
369: PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
370: {
372: PetscMPIInt nrecvs,tag,*tags,done,i;
373: MPI_Aint lb,unitbytes;
374: char *tdata;
375: MPI_Request *sendreqs,*usendreqs,*req,barrier;
376: PetscSegBuffer segrank,segdata,segreq;
377: PetscBool barrier_started;
380: PetscCommDuplicate(comm,&comm,&tag);
381: PetscMalloc1(ntags,&tags);
382: for (i=0; i<ntags; i++) {
383: PetscCommGetNewTag(comm,&tags[i]);
384: }
385: MPI_Type_get_extent(dtype,&lb,&unitbytes);
386: if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
387: tdata = (char*)todata;
388: PetscMalloc1(nto,&sendreqs);
389: PetscMalloc1(nto*ntags,&usendreqs);
390: /* Post synchronous sends */
391: for (i=0; i<nto; i++) {
392: MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
393: }
394: /* Post actual payloads. These are typically larger messages. Hopefully sending these later does not slow down the
395: * synchronous messages above. */
396: for (i=0; i<nto; i++) {
397: PetscMPIInt k;
398: for (k=0; k<ntags; k++) usendreqs[i*ntags+k] = MPI_REQUEST_NULL;
399: (*send)(comm,tags,i,toranks[i],tdata+count*unitbytes*i,usendreqs+i*ntags,ctx);
400: }
402: PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);
403: PetscSegBufferCreate(unitbytes,4*count,&segdata);
404: PetscSegBufferCreate(sizeof(MPI_Request),4,&segreq);
406: nrecvs = 0;
407: barrier = MPI_REQUEST_NULL;
408: /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation,
409: * but we need to work around it. */
410: barrier_started = PETSC_FALSE;
411: for (done=0; !done;) {
412: PetscMPIInt flag;
413: MPI_Status status;
414: MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);
415: if (flag) { /* incoming message */
416: PetscMPIInt *recvrank,k;
417: void *buf;
418: PetscSegBufferGet(segrank,1,&recvrank);
419: PetscSegBufferGet(segdata,count,&buf);
420: *recvrank = status.MPI_SOURCE;
421: MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);
422: PetscSegBufferGet(segreq,ntags,&req);
423: for (k=0; k<ntags; k++) req[k] = MPI_REQUEST_NULL;
424: (*recv)(comm,tags,status.MPI_SOURCE,buf,req,ctx);
425: nrecvs++;
426: }
427: if (!barrier_started) {
428: PetscMPIInt sent,nsends;
429: PetscMPIIntCast(nto,&nsends);
430: MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);
431: if (sent) {
432: #if defined(PETSC_HAVE_MPI_IBARRIER)
433: MPI_Ibarrier(comm,&barrier);
434: #elif defined(PETSC_HAVE_MPIX_IBARRIER)
435: MPIX_Ibarrier(comm,&barrier);
436: #endif
437: barrier_started = PETSC_TRUE;
438: }
439: } else {
440: MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);
441: }
442: }
443: *nfrom = nrecvs;
444: PetscSegBufferExtractAlloc(segrank,fromranks);
445: PetscSegBufferDestroy(&segrank);
446: PetscSegBufferExtractAlloc(segdata,fromdata);
447: PetscSegBufferDestroy(&segdata);
448: *toreqs = usendreqs;
449: PetscSegBufferExtractAlloc(segreq,fromreqs);
450: PetscSegBufferDestroy(&segreq);
451: PetscFree(sendreqs);
452: PetscFree(tags);
453: PetscCommDestroy(&comm);
454: return(0);
455: }
456: #endif
458: /*@C
459: PetscCommBuildTwoSidedF - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous
461: Collective
463: Input Arguments:
464: + comm - communicator
465: . count - number of entries to send/receive in initial rendezvous (must match on all ranks)
466: . dtype - datatype to send/receive from each rank (must match on all ranks)
467: . nto - number of ranks to send data to
468: . toranks - ranks to send to (array of length nto)
469: . todata - data to send to each rank (packed)
470: . ntags - number of tags needed by send/recv callbacks
471: . send - callback invoked on sending process when ready to send primary payload
472: . recv - callback invoked on receiving process after delivery of rendezvous message
473: - ctx - context for callbacks
475: Output Arguments:
476: + nfrom - number of ranks receiving messages from
477: . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
478: - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())
480: Level: developer
482: Notes:
483: This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
484: PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other data.
486: Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.
488: References:
489: . 1. - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in
490: Scalable communication protocols for dynamic sparse data exchange, 2010.
492: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedFReq(), PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
493: @*/
494: PetscErrorCode PetscCommBuildTwoSidedF(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,
495: PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
496: PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
497: {
499: MPI_Request *toreqs,*fromreqs;
502: PetscCommBuildTwoSidedFReq(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata,ntags,&toreqs,&fromreqs,send,recv,ctx);
503: MPI_Waitall(nto*ntags,toreqs,MPI_STATUSES_IGNORE);
504: MPI_Waitall(*nfrom*ntags,fromreqs,MPI_STATUSES_IGNORE);
505: PetscFree(toreqs);
506: PetscFree(fromreqs);
507: return(0);
508: }
510: /*@C
511: PetscCommBuildTwoSidedFReq - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous, returns requests
513: Collective
515: Input Arguments:
516: + comm - communicator
517: . count - number of entries to send/receive in initial rendezvous (must match on all ranks)
518: . dtype - datatype to send/receive from each rank (must match on all ranks)
519: . nto - number of ranks to send data to
520: . toranks - ranks to send to (array of length nto)
521: . todata - data to send to each rank (packed)
522: . ntags - number of tags needed by send/recv callbacks
523: . send - callback invoked on sending process when ready to send primary payload
524: . recv - callback invoked on receiving process after delivery of rendezvous message
525: - ctx - context for callbacks
527: Output Arguments:
528: + nfrom - number of ranks receiving messages from
529: . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
530: . fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())
531: . toreqs - array of nto*ntags sender requests (caller must wait on these, then PetscFree())
532: - fromreqs - array of nfrom*ntags receiver requests (caller must wait on these, then PetscFree())
534: Level: developer
536: Notes:
537: This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
538: PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other data.
540: Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.
542: References:
543: . 1. - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in
544: Scalable communication protocols for dynamic sparse data exchange, 2010.
546: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedF(), PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
547: @*/
548: PetscErrorCode PetscCommBuildTwoSidedFReq(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,
549: PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs,
550: PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
551: PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
552: {
553: PetscErrorCode ierr,(*f)(MPI_Comm,PetscMPIInt,MPI_Datatype,PetscMPIInt,const PetscMPIInt[],const void*,
554: PetscMPIInt*,PetscMPIInt**,void*,PetscMPIInt,MPI_Request**,MPI_Request**,
555: PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
556: PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx);
557: PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET;
558: PetscMPIInt i,size;
561: PetscSysInitializePackage();
562: MPI_Comm_size(comm,&size);
563: for (i=0; i<nto; i++) {
564: if (toranks[i] < 0 || size <= toranks[i]) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"toranks[%d] %d not in comm size %d",i,toranks[i],size);
565: }
566: PetscLogEventSync(PETSC_BuildTwoSidedF,comm);
567: PetscLogEventBegin(PETSC_BuildTwoSidedF,0,0,0,0);
568: PetscCommBuildTwoSidedGetType(comm,&buildtype);
569: switch (buildtype) {
570: case PETSC_BUILDTWOSIDED_IBARRIER:
571: #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER)
572: f = PetscCommBuildTwoSidedFReq_Ibarrier;
573: break;
574: #else
575: SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)");
576: #endif
577: case PETSC_BUILDTWOSIDED_ALLREDUCE:
578: case PETSC_BUILDTWOSIDED_REDSCATTER:
579: f = PetscCommBuildTwoSidedFReq_Reference;
580: break;
581: default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication");
582: }
583: (*f)(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata,ntags,toreqs,fromreqs,send,recv,ctx);
584: PetscLogEventEnd(PETSC_BuildTwoSidedF,0,0,0,0);
585: return(0);
586: }