Actual source code: mpits.c
petsc-3.13.6 2020-09-29
1: #include <petscsys.h>
2: #include <petsc/private/petscimpl.h>
4: PetscLogEvent PETSC_BuildTwoSided;
5: PetscLogEvent PETSC_BuildTwoSidedF;
7: const char *const PetscBuildTwoSidedTypes[] = {
8: "ALLREDUCE",
9: "IBARRIER",
10: "REDSCATTER",
11: "PetscBuildTwoSidedType",
12: "PETSC_BUILDTWOSIDED_",
13: NULL
14: };
16: static PetscBuildTwoSidedType _twosided_type = PETSC_BUILDTWOSIDED_NOTSET;
18: /*@
19: PetscCommBuildTwoSidedSetType - set algorithm to use when building two-sided communication
21: Logically Collective
23: Input Arguments:
24: + comm - PETSC_COMM_WORLD
25: - twosided - algorithm to use in subsequent calls to PetscCommBuildTwoSided()
27: Level: developer
29: Note:
30: This option is currently global, but could be made per-communicator.
32: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedGetType()
33: @*/
34: PetscErrorCode PetscCommBuildTwoSidedSetType(MPI_Comm comm,PetscBuildTwoSidedType twosided)
35: {
37: #if defined(PETSC_USE_DEBUG)
39: PetscMPIInt ierr;
40: PetscMPIInt b1[2],b2[2];
41: b1[0] = -(PetscMPIInt)twosided;
42: b1[1] = (PetscMPIInt)twosided;
43: MPIU_Allreduce(b1,b2,2,MPI_INT,MPI_MAX,comm);
44: if (-b2[0] != b2[1]) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"Enum value must be same on all processes");
45: }
46: #endif
47: _twosided_type = twosided;
48: return(0);
49: }
51: /*@
52: PetscCommBuildTwoSidedGetType - set algorithm to use when building two-sided communication
54: Logically Collective
56: Output Arguments:
57: + comm - communicator on which to query algorithm
58: - twosided - algorithm to use for PetscCommBuildTwoSided()
60: Level: developer
62: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedSetType()
63: @*/
64: PetscErrorCode PetscCommBuildTwoSidedGetType(MPI_Comm comm,PetscBuildTwoSidedType *twosided)
65: {
67: PetscMPIInt size;
70: *twosided = PETSC_BUILDTWOSIDED_NOTSET;
71: if (_twosided_type == PETSC_BUILDTWOSIDED_NOTSET) {
72: MPI_Comm_size(comm,&size);
73: _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE; /* default for small comms, see https://gitlab.com/petsc/petsc/-/merge_requests/2611 */
74: #if defined(PETSC_HAVE_MPI_IBARRIER)
75: if (size > 1024) _twosided_type = PETSC_BUILDTWOSIDED_IBARRIER;
76: #endif
77: PetscOptionsGetEnum(NULL,NULL,"-build_twosided",PetscBuildTwoSidedTypes,(PetscEnum*)&_twosided_type,NULL);
78: }
79: *twosided = _twosided_type;
80: return(0);
81: }
83: #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER)
85: static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
86: {
88: PetscMPIInt nrecvs,tag,done,i;
89: MPI_Aint lb,unitbytes;
90: char *tdata;
91: MPI_Request *sendreqs,barrier;
92: PetscSegBuffer segrank,segdata;
93: PetscBool barrier_started;
96: PetscCommDuplicate(comm,&comm,&tag);
97: MPI_Type_get_extent(dtype,&lb,&unitbytes);
98: if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
99: tdata = (char*)todata;
100: PetscMalloc1(nto,&sendreqs);
101: for (i=0; i<nto; i++) {
102: MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
103: }
104: PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);
105: PetscSegBufferCreate(unitbytes,4*count,&segdata);
107: nrecvs = 0;
108: barrier = MPI_REQUEST_NULL;
109: /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation,
110: * but we need to work around it. */
111: barrier_started = PETSC_FALSE;
112: for (done=0; !done; ) {
113: PetscMPIInt flag;
114: MPI_Status status;
115: MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);
116: if (flag) { /* incoming message */
117: PetscMPIInt *recvrank;
118: void *buf;
119: PetscSegBufferGet(segrank,1,&recvrank);
120: PetscSegBufferGet(segdata,count,&buf);
121: *recvrank = status.MPI_SOURCE;
122: MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);
123: nrecvs++;
124: }
125: if (!barrier_started) {
126: PetscMPIInt sent,nsends;
127: PetscMPIIntCast(nto,&nsends);
128: MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);
129: if (sent) {
130: #if defined(PETSC_HAVE_MPI_IBARRIER)
131: MPI_Ibarrier(comm,&barrier);
132: #elif defined(PETSC_HAVE_MPIX_IBARRIER)
133: MPIX_Ibarrier(comm,&barrier);
134: #endif
135: barrier_started = PETSC_TRUE;
136: PetscFree(sendreqs);
137: }
138: } else {
139: MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);
140: }
141: }
142: *nfrom = nrecvs;
143: PetscSegBufferExtractAlloc(segrank,fromranks);
144: PetscSegBufferDestroy(&segrank);
145: PetscSegBufferExtractAlloc(segdata,fromdata);
146: PetscSegBufferDestroy(&segdata);
147: PetscCommDestroy(&comm);
148: return(0);
149: }
150: #endif
152: static PetscErrorCode PetscCommBuildTwoSided_Allreduce(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
153: {
154: PetscErrorCode ierr;
155: PetscMPIInt size,rank,*iflags,nrecvs,tag,*franks,i,flg;
156: MPI_Aint lb,unitbytes;
157: char *tdata,*fdata;
158: MPI_Request *reqs,*sendreqs;
159: MPI_Status *statuses;
160: PetscCommCounter *counter;
163: MPI_Comm_size(comm,&size);
164: MPI_Comm_rank(comm,&rank);
165: PetscCommDuplicate(comm,&comm,&tag);
166: MPI_Comm_get_attr(comm,Petsc_Counter_keyval,&counter,&flg);
167: if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Inner PETSc communicator does not have its tag/name counter attribute set");
168: if (!counter->iflags) {
169: PetscCalloc1(size,&counter->iflags);
170: iflags = counter->iflags;
171: } else {
172: iflags = counter->iflags;
173: PetscArrayzero(iflags,size);
174: }
175: for (i=0; i<nto; i++) iflags[toranks[i]] = 1;
176: MPIU_Allreduce(MPI_IN_PLACE,iflags,size,MPI_INT,MPI_SUM,comm);
177: nrecvs = iflags[rank];
178: MPI_Type_get_extent(dtype,&lb,&unitbytes);
179: if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
180: PetscMalloc(nrecvs*count*unitbytes,&fdata);
181: tdata = (char*)todata;
182: PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);
183: sendreqs = reqs + nrecvs;
184: for (i=0; i<nrecvs; i++) {
185: MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);
186: }
187: for (i=0; i<nto; i++) {
188: MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
189: }
190: MPI_Waitall(nto+nrecvs,reqs,statuses);
191: PetscMalloc1(nrecvs,&franks);
192: for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE;
193: PetscFree2(reqs,statuses);
194: PetscCommDestroy(&comm);
196: *nfrom = nrecvs;
197: *fromranks = franks;
198: *(void**)fromdata = fdata;
199: return(0);
200: }
202: #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK)
203: static PetscErrorCode PetscCommBuildTwoSided_RedScatter(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
204: {
206: PetscMPIInt size,*iflags,nrecvs,tag,*franks,i,flg;
207: MPI_Aint lb,unitbytes;
208: char *tdata,*fdata;
209: MPI_Request *reqs,*sendreqs;
210: MPI_Status *statuses;
211: PetscCommCounter *counter;
214: MPI_Comm_size(comm,&size);
215: PetscCommDuplicate(comm,&comm,&tag);
216: MPI_Comm_get_attr(comm,Petsc_Counter_keyval,&counter,&flg);
217: if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Inner PETSc communicator does not have its tag/name counter attribute set");
218: if (!counter->iflags) {
219: PetscCalloc1(size,&counter->iflags);
220: iflags = counter->iflags;
221: } else {
222: iflags = counter->iflags;
223: PetscArrayzero(iflags,size);
224: }
225: for (i=0; i<nto; i++) iflags[toranks[i]] = 1;
226: MPI_Reduce_scatter_block(iflags,&nrecvs,1,MPI_INT,MPI_SUM,comm);
227: MPI_Type_get_extent(dtype,&lb,&unitbytes);
228: if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
229: PetscMalloc(nrecvs*count*unitbytes,&fdata);
230: tdata = (char*)todata;
231: PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);
232: sendreqs = reqs + nrecvs;
233: for (i=0; i<nrecvs; i++) {
234: MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);
235: }
236: for (i=0; i<nto; i++) {
237: MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
238: }
239: MPI_Waitall(nto+nrecvs,reqs,statuses);
240: PetscMalloc1(nrecvs,&franks);
241: for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE;
242: PetscFree2(reqs,statuses);
243: PetscCommDestroy(&comm);
245: *nfrom = nrecvs;
246: *fromranks = franks;
247: *(void**)fromdata = fdata;
248: return(0);
249: }
250: #endif
252: /*@C
253: PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths)
255: Collective
257: Input Arguments:
258: + comm - communicator
259: . count - number of entries to send/receive (must match on all ranks)
260: . dtype - datatype to send/receive from each rank (must match on all ranks)
261: . nto - number of ranks to send data to
262: . toranks - ranks to send to (array of length nto)
263: - todata - data to send to each rank (packed)
265: Output Arguments:
266: + nfrom - number of ranks receiving messages from
267: . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
268: - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())
270: Level: developer
272: Options Database Keys:
273: . -build_twosided <allreduce|ibarrier|redscatter> - algorithm to set up two-sided communication. Default is allreduce for communicators with <= 1024 ranks, otherwise ibarrier.
275: Notes:
276: This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
277: PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other constant-size data.
279: Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.
281: References:
282: . 1. - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in
283: Scalable communication protocols for dynamic sparse data exchange, 2010.
285: .seealso: PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
286: @*/
287: PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
288: {
289: PetscErrorCode ierr;
290: PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET;
293: PetscSysInitializePackage();
294: PetscLogEventSync(PETSC_BuildTwoSided,comm);
295: PetscLogEventBegin(PETSC_BuildTwoSided,0,0,0,0);
296: PetscCommBuildTwoSidedGetType(comm,&buildtype);
297: switch (buildtype) {
298: case PETSC_BUILDTWOSIDED_IBARRIER:
299: #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER)
300: PetscCommBuildTwoSided_Ibarrier(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);
301: #else
302: SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)");
303: #endif
304: break;
305: case PETSC_BUILDTWOSIDED_ALLREDUCE:
306: PetscCommBuildTwoSided_Allreduce(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);
307: break;
308: case PETSC_BUILDTWOSIDED_REDSCATTER:
309: #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK)
310: PetscCommBuildTwoSided_RedScatter(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);
311: #else
312: SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Reduce_scatter_block (part of MPI-2.2)");
313: #endif
314: break;
315: default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication");
316: }
317: PetscLogEventEnd(PETSC_BuildTwoSided,0,0,0,0);
318: return(0);
319: }
321: static PetscErrorCode PetscCommBuildTwoSidedFReq_Reference(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,
322: PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs,
323: PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
324: PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
325: {
327: PetscMPIInt i,*tag;
328: MPI_Aint lb,unitbytes;
329: MPI_Request *sendreq,*recvreq;
332: PetscMalloc1(ntags,&tag);
333: if (ntags > 0) {
334: PetscCommDuplicate(comm,&comm,&tag[0]);
335: }
336: for (i=1; i<ntags; i++) {
337: PetscCommGetNewTag(comm,&tag[i]);
338: }
340: /* Perform complete initial rendezvous */
341: PetscCommBuildTwoSided(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);
343: PetscMalloc1(nto*ntags,&sendreq);
344: PetscMalloc1(*nfrom*ntags,&recvreq);
346: MPI_Type_get_extent(dtype,&lb,&unitbytes);
347: if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
348: for (i=0; i<nto; i++) {
349: PetscMPIInt k;
350: for (k=0; k<ntags; k++) sendreq[i*ntags+k] = MPI_REQUEST_NULL;
351: (*send)(comm,tag,i,toranks[i],((char*)todata)+count*unitbytes*i,sendreq+i*ntags,ctx);
352: }
353: for (i=0; i<*nfrom; i++) {
354: void *header = (*(char**)fromdata) + count*unitbytes*i;
355: PetscMPIInt k;
356: for (k=0; k<ntags; k++) recvreq[i*ntags+k] = MPI_REQUEST_NULL;
357: (*recv)(comm,tag,(*fromranks)[i],header,recvreq+i*ntags,ctx);
358: }
359: PetscFree(tag);
360: PetscCommDestroy(&comm);
361: *toreqs = sendreq;
362: *fromreqs = recvreq;
363: return(0);
364: }
366: #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER)
368: static PetscErrorCode PetscCommBuildTwoSidedFReq_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,
369: PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs,
370: PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
371: PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
372: {
374: PetscMPIInt nrecvs,tag,*tags,done,i;
375: MPI_Aint lb,unitbytes;
376: char *tdata;
377: MPI_Request *sendreqs,*usendreqs,*req,barrier;
378: PetscSegBuffer segrank,segdata,segreq;
379: PetscBool barrier_started;
382: PetscCommDuplicate(comm,&comm,&tag);
383: PetscMalloc1(ntags,&tags);
384: for (i=0; i<ntags; i++) {
385: PetscCommGetNewTag(comm,&tags[i]);
386: }
387: MPI_Type_get_extent(dtype,&lb,&unitbytes);
388: if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
389: tdata = (char*)todata;
390: PetscMalloc1(nto,&sendreqs);
391: PetscMalloc1(nto*ntags,&usendreqs);
392: /* Post synchronous sends */
393: for (i=0; i<nto; i++) {
394: MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
395: }
396: /* Post actual payloads. These are typically larger messages. Hopefully sending these later does not slow down the
397: * synchronous messages above. */
398: for (i=0; i<nto; i++) {
399: PetscMPIInt k;
400: for (k=0; k<ntags; k++) usendreqs[i*ntags+k] = MPI_REQUEST_NULL;
401: (*send)(comm,tags,i,toranks[i],tdata+count*unitbytes*i,usendreqs+i*ntags,ctx);
402: }
404: PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);
405: PetscSegBufferCreate(unitbytes,4*count,&segdata);
406: PetscSegBufferCreate(sizeof(MPI_Request),4,&segreq);
408: nrecvs = 0;
409: barrier = MPI_REQUEST_NULL;
410: /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation,
411: * but we need to work around it. */
412: barrier_started = PETSC_FALSE;
413: for (done=0; !done; ) {
414: PetscMPIInt flag;
415: MPI_Status status;
416: MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);
417: if (flag) { /* incoming message */
418: PetscMPIInt *recvrank,k;
419: void *buf;
420: PetscSegBufferGet(segrank,1,&recvrank);
421: PetscSegBufferGet(segdata,count,&buf);
422: *recvrank = status.MPI_SOURCE;
423: MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);
424: PetscSegBufferGet(segreq,ntags,&req);
425: for (k=0; k<ntags; k++) req[k] = MPI_REQUEST_NULL;
426: (*recv)(comm,tags,status.MPI_SOURCE,buf,req,ctx);
427: nrecvs++;
428: }
429: if (!barrier_started) {
430: PetscMPIInt sent,nsends;
431: PetscMPIIntCast(nto,&nsends);
432: MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);
433: if (sent) {
434: #if defined(PETSC_HAVE_MPI_IBARRIER)
435: MPI_Ibarrier(comm,&barrier);
436: #elif defined(PETSC_HAVE_MPIX_IBARRIER)
437: MPIX_Ibarrier(comm,&barrier);
438: #endif
439: barrier_started = PETSC_TRUE;
440: }
441: } else {
442: MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);
443: }
444: }
445: *nfrom = nrecvs;
446: PetscSegBufferExtractAlloc(segrank,fromranks);
447: PetscSegBufferDestroy(&segrank);
448: PetscSegBufferExtractAlloc(segdata,fromdata);
449: PetscSegBufferDestroy(&segdata);
450: *toreqs = usendreqs;
451: PetscSegBufferExtractAlloc(segreq,fromreqs);
452: PetscSegBufferDestroy(&segreq);
453: PetscFree(sendreqs);
454: PetscFree(tags);
455: PetscCommDestroy(&comm);
456: return(0);
457: }
458: #endif
460: /*@C
461: PetscCommBuildTwoSidedF - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous
463: Collective
465: Input Arguments:
466: + comm - communicator
467: . count - number of entries to send/receive in initial rendezvous (must match on all ranks)
468: . dtype - datatype to send/receive from each rank (must match on all ranks)
469: . nto - number of ranks to send data to
470: . toranks - ranks to send to (array of length nto)
471: . todata - data to send to each rank (packed)
472: . ntags - number of tags needed by send/recv callbacks
473: . send - callback invoked on sending process when ready to send primary payload
474: . recv - callback invoked on receiving process after delivery of rendezvous message
475: - ctx - context for callbacks
477: Output Arguments:
478: + nfrom - number of ranks receiving messages from
479: . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
480: - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())
482: Level: developer
484: Notes:
485: This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
486: PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other data.
488: Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.
490: References:
491: . 1. - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in
492: Scalable communication protocols for dynamic sparse data exchange, 2010.
494: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedFReq(), PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
495: @*/
496: PetscErrorCode PetscCommBuildTwoSidedF(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,
497: PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
498: PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
499: {
501: MPI_Request *toreqs,*fromreqs;
504: PetscCommBuildTwoSidedFReq(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata,ntags,&toreqs,&fromreqs,send,recv,ctx);
505: MPI_Waitall(nto*ntags,toreqs,MPI_STATUSES_IGNORE);
506: MPI_Waitall(*nfrom*ntags,fromreqs,MPI_STATUSES_IGNORE);
507: PetscFree(toreqs);
508: PetscFree(fromreqs);
509: return(0);
510: }
512: /*@C
513: PetscCommBuildTwoSidedFReq - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous, returns requests
515: Collective
517: Input Arguments:
518: + comm - communicator
519: . count - number of entries to send/receive in initial rendezvous (must match on all ranks)
520: . dtype - datatype to send/receive from each rank (must match on all ranks)
521: . nto - number of ranks to send data to
522: . toranks - ranks to send to (array of length nto)
523: . todata - data to send to each rank (packed)
524: . ntags - number of tags needed by send/recv callbacks
525: . send - callback invoked on sending process when ready to send primary payload
526: . recv - callback invoked on receiving process after delivery of rendezvous message
527: - ctx - context for callbacks
529: Output Arguments:
530: + nfrom - number of ranks receiving messages from
531: . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
532: . fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())
533: . toreqs - array of nto*ntags sender requests (caller must wait on these, then PetscFree())
534: - fromreqs - array of nfrom*ntags receiver requests (caller must wait on these, then PetscFree())
536: Level: developer
538: Notes:
539: This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
540: PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other data.
542: Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.
544: References:
545: . 1. - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in
546: Scalable communication protocols for dynamic sparse data exchange, 2010.
548: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedF(), PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
549: @*/
550: PetscErrorCode PetscCommBuildTwoSidedFReq(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,
551: PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs,
552: PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
553: PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
554: {
555: PetscErrorCode ierr,(*f)(MPI_Comm,PetscMPIInt,MPI_Datatype,PetscMPIInt,const PetscMPIInt[],const void*,
556: PetscMPIInt*,PetscMPIInt**,void*,PetscMPIInt,MPI_Request**,MPI_Request**,
557: PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
558: PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx);
559: PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET;
560: PetscMPIInt i,size;
563: PetscSysInitializePackage();
564: MPI_Comm_size(comm,&size);
565: for (i=0; i<nto; i++) {
566: if (toranks[i] < 0 || size <= toranks[i]) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"toranks[%d] %d not in comm size %d",i,toranks[i],size);
567: }
568: PetscLogEventSync(PETSC_BuildTwoSidedF,comm);
569: PetscLogEventBegin(PETSC_BuildTwoSidedF,0,0,0,0);
570: PetscCommBuildTwoSidedGetType(comm,&buildtype);
571: switch (buildtype) {
572: case PETSC_BUILDTWOSIDED_IBARRIER:
573: #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER)
574: f = PetscCommBuildTwoSidedFReq_Ibarrier;
575: #else
576: SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)");
577: #endif
578: break;
579: case PETSC_BUILDTWOSIDED_ALLREDUCE:
580: case PETSC_BUILDTWOSIDED_REDSCATTER:
581: f = PetscCommBuildTwoSidedFReq_Reference;
582: break;
583: default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication");
584: }
585: (*f)(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata,ntags,toreqs,fromreqs,send,recv,ctx);
586: PetscLogEventEnd(PETSC_BuildTwoSidedF,0,0,0,0);
587: return(0);
588: }