Actual source code: mpits.c

  1: #include <petscsys.h>
  2: #include <petsc/private/petscimpl.h>

  4: PetscLogEvent PETSC_BuildTwoSided;
  5: PetscLogEvent PETSC_BuildTwoSidedF;

  7: const char *const PetscBuildTwoSidedTypes[] = {
  8:   "ALLREDUCE",
  9:   "IBARRIER",
 10:   "REDSCATTER",
 11:   "PetscBuildTwoSidedType",
 12:   "PETSC_BUILDTWOSIDED_",
 13:   NULL
 14: };

 16: static PetscBuildTwoSidedType _twosided_type = PETSC_BUILDTWOSIDED_NOTSET;

 18: /*@
 19:    PetscCommBuildTwoSidedSetType - set algorithm to use when building two-sided communication

 21:    Logically Collective

 23:    Input Parameters:
 24: +  comm - PETSC_COMM_WORLD
 25: -  twosided - algorithm to use in subsequent calls to PetscCommBuildTwoSided()

 27:    Level: developer

 29:    Note:
 30:    This option is currently global, but could be made per-communicator.

 32: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedGetType()
 33: @*/
 34: PetscErrorCode PetscCommBuildTwoSidedSetType(MPI_Comm comm,PetscBuildTwoSidedType twosided)
 35: {
 37:     PetscMPIInt b1[2],b2[2];
 38:     b1[0] = -(PetscMPIInt)twosided;
 39:     b1[1] = (PetscMPIInt)twosided;
 40:     MPIU_Allreduce(b1,b2,2,MPI_INT,MPI_MAX,comm);
 42:   }
 43:   _twosided_type = twosided;
 44:   return 0;
 45: }

 47: /*@
 48:    PetscCommBuildTwoSidedGetType - set algorithm to use when building two-sided communication

 50:    Logically Collective

 52:    Output Parameters:
 53: +  comm - communicator on which to query algorithm
 54: -  twosided - algorithm to use for PetscCommBuildTwoSided()

 56:    Level: developer

 58: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedSetType()
 59: @*/
 60: PetscErrorCode PetscCommBuildTwoSidedGetType(MPI_Comm comm,PetscBuildTwoSidedType *twosided)
 61: {
 62:   PetscMPIInt    size;

 64:   *twosided = PETSC_BUILDTWOSIDED_NOTSET;
 65:   if (_twosided_type == PETSC_BUILDTWOSIDED_NOTSET) {
 66:     MPI_Comm_size(comm,&size);
 67:     _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE; /* default for small comms, see https://gitlab.com/petsc/petsc/-/merge_requests/2611 */
 68: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
 69:     if (size > 1024) _twosided_type = PETSC_BUILDTWOSIDED_IBARRIER;
 70: #endif
 71:     PetscOptionsGetEnum(NULL,NULL,"-build_twosided",PetscBuildTwoSidedTypes,(PetscEnum*)&_twosided_type,NULL);
 72:   }
 73:   *twosided = _twosided_type;
 74:   return 0;
 75: }

 77: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
 78: static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
 79: {
 80:   PetscMPIInt    nrecvs,tag,done,i;
 81:   MPI_Aint       lb,unitbytes;
 82:   char           *tdata;
 83:   MPI_Request    *sendreqs,barrier;
 84:   PetscSegBuffer segrank,segdata;
 85:   PetscBool      barrier_started;

 87:   PetscCommDuplicate(comm,&comm,&tag);
 88:   MPI_Type_get_extent(dtype,&lb,&unitbytes);
 90:   tdata = (char*)todata;
 91:   PetscMalloc1(nto,&sendreqs);
 92:   for (i=0; i<nto; i++) {
 93:     MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
 94:   }
 95:   PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);
 96:   PetscSegBufferCreate(unitbytes,4*count,&segdata);

 98:   nrecvs  = 0;
 99:   barrier = MPI_REQUEST_NULL;
100:   /* MPICH-3.2 sometimes does not create a request in some "optimized" cases.  This is arguably a standard violation,
101:    * but we need to work around it. */
102:   barrier_started = PETSC_FALSE;
103:   for (done=0; !done;) {
104:     PetscMPIInt flag;
105:     MPI_Status  status;
106:     MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);
107:     if (flag) {                 /* incoming message */
108:       PetscMPIInt *recvrank;
109:       void        *buf;
110:       PetscSegBufferGet(segrank,1,&recvrank);
111:       PetscSegBufferGet(segdata,count,&buf);
112:       *recvrank = status.MPI_SOURCE;
113:       MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);
114:       nrecvs++;
115:     }
116:     if (!barrier_started) {
117:       PetscMPIInt sent,nsends;
118:       PetscMPIIntCast(nto,&nsends);
119:       MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);
120:       if (sent) {
121:         MPI_Ibarrier(comm,&barrier);
122:         barrier_started = PETSC_TRUE;
123:         PetscFree(sendreqs);
124:       }
125:     } else {
126:       MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);
127:     }
128:   }
129:   *nfrom = nrecvs;
130:   PetscSegBufferExtractAlloc(segrank,fromranks);
131:   PetscSegBufferDestroy(&segrank);
132:   PetscSegBufferExtractAlloc(segdata,fromdata);
133:   PetscSegBufferDestroy(&segdata);
134:   PetscCommDestroy(&comm);
135:   return 0;
136: }
137: #endif

139: static PetscErrorCode PetscCommBuildTwoSided_Allreduce(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
140: {
141:   PetscMPIInt      size,rank,*iflags,nrecvs,tag,*franks,i,flg;
142:   MPI_Aint         lb,unitbytes;
143:   char             *tdata,*fdata;
144:   MPI_Request      *reqs,*sendreqs;
145:   MPI_Status       *statuses;
146:   PetscCommCounter *counter;

148:   MPI_Comm_size(comm,&size);
149:   MPI_Comm_rank(comm,&rank);
150:   PetscCommDuplicate(comm,&comm,&tag);
151:   MPI_Comm_get_attr(comm,Petsc_Counter_keyval,&counter,&flg);
153:   if (!counter->iflags) {
154:     PetscCalloc1(size,&counter->iflags);
155:     iflags = counter->iflags;
156:   } else {
157:     iflags = counter->iflags;
158:     PetscArrayzero(iflags,size);
159:   }
160:   for (i=0; i<nto; i++) iflags[toranks[i]] = 1;
161:   MPIU_Allreduce(MPI_IN_PLACE,iflags,size,MPI_INT,MPI_SUM,comm);
162:   nrecvs   = iflags[rank];
163:   MPI_Type_get_extent(dtype,&lb,&unitbytes);
165:   PetscMalloc(nrecvs*count*unitbytes,&fdata);
166:   tdata    = (char*)todata;
167:   PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);
168:   sendreqs = reqs + nrecvs;
169:   for (i=0; i<nrecvs; i++) {
170:     MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);
171:   }
172:   for (i=0; i<nto; i++) {
173:     MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
174:   }
175:   MPI_Waitall(nto+nrecvs,reqs,statuses);
176:   PetscMalloc1(nrecvs,&franks);
177:   for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE;
178:   PetscFree2(reqs,statuses);
179:   PetscCommDestroy(&comm);

181:   *nfrom            = nrecvs;
182:   *fromranks        = franks;
183:   *(void**)fromdata = fdata;
184:   return 0;
185: }

187: #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK)
188: static PetscErrorCode PetscCommBuildTwoSided_RedScatter(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
189: {
190:   PetscMPIInt    size,*iflags,nrecvs,tag,*franks,i,flg;
191:   MPI_Aint       lb,unitbytes;
192:   char           *tdata,*fdata;
193:   MPI_Request    *reqs,*sendreqs;
194:   MPI_Status     *statuses;
195:   PetscCommCounter *counter;

197:   MPI_Comm_size(comm,&size);
198:   PetscCommDuplicate(comm,&comm,&tag);
199:   MPI_Comm_get_attr(comm,Petsc_Counter_keyval,&counter,&flg);
201:   if (!counter->iflags) {
202:     PetscCalloc1(size,&counter->iflags);
203:     iflags = counter->iflags;
204:   } else {
205:     iflags = counter->iflags;
206:     PetscArrayzero(iflags,size);
207:   }
208:   for (i=0; i<nto; i++) iflags[toranks[i]] = 1;
209:   MPI_Reduce_scatter_block(iflags,&nrecvs,1,MPI_INT,MPI_SUM,comm);
210:   MPI_Type_get_extent(dtype,&lb,&unitbytes);
212:   PetscMalloc(nrecvs*count*unitbytes,&fdata);
213:   tdata    = (char*)todata;
214:   PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);
215:   sendreqs = reqs + nrecvs;
216:   for (i=0; i<nrecvs; i++) {
217:     MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);
218:   }
219:   for (i=0; i<nto; i++) {
220:     MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
221:   }
222:   MPI_Waitall(nto+nrecvs,reqs,statuses);
223:   PetscMalloc1(nrecvs,&franks);
224:   for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE;
225:   PetscFree2(reqs,statuses);
226:   PetscCommDestroy(&comm);

228:   *nfrom            = nrecvs;
229:   *fromranks        = franks;
230:   *(void**)fromdata = fdata;
231:   return 0;
232: }
233: #endif

235: /*@C
236:    PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths)

238:    Collective

240:    Input Parameters:
241: +  comm - communicator
242: .  count - number of entries to send/receive (must match on all ranks)
243: .  dtype - datatype to send/receive from each rank (must match on all ranks)
244: .  nto - number of ranks to send data to
245: .  toranks - ranks to send to (array of length nto)
246: -  todata - data to send to each rank (packed)

248:    Output Parameters:
249: +  nfrom - number of ranks receiving messages from
250: .  fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
251: -  fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())

253:    Level: developer

255:    Options Database Keys:
256: .  -build_twosided <allreduce|ibarrier|redscatter> - algorithm to set up two-sided communication. Default is allreduce for communicators with <= 1024 ranks, otherwise ibarrier.

258:    Notes:
259:    This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
260:    PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other constant-size data.

262:    Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.

264:    References:
265: .  * - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in
266:    Scalable communication protocols for dynamic sparse data exchange, 2010.

268: .seealso: PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
269: @*/
270: PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
271: {
272:   PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET;

274:   PetscSysInitializePackage();
275:   PetscLogEventSync(PETSC_BuildTwoSided,comm);
276:   PetscLogEventBegin(PETSC_BuildTwoSided,0,0,0,0);
277:   PetscCommBuildTwoSidedGetType(comm,&buildtype);
278:   switch (buildtype) {
279:   case PETSC_BUILDTWOSIDED_IBARRIER:
280: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
281:     PetscCommBuildTwoSided_Ibarrier(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);
282:     break;
283: #else
284:     SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)");
285: #endif
286:   case PETSC_BUILDTWOSIDED_ALLREDUCE:
287:     PetscCommBuildTwoSided_Allreduce(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);
288:     break;
289:   case PETSC_BUILDTWOSIDED_REDSCATTER:
290: #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK)
291:     PetscCommBuildTwoSided_RedScatter(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);
292:     break;
293: #else
294:     SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Reduce_scatter_block (part of MPI-2.2)");
295: #endif
296:   default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication");
297:   }
298:   PetscLogEventEnd(PETSC_BuildTwoSided,0,0,0,0);
299:   return 0;
300: }

302: static PetscErrorCode PetscCommBuildTwoSidedFReq_Reference(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,
303:                                                            PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs,
304:                                                            PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
305:                                                            PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
306: {
307:   PetscMPIInt i,*tag;
308:   MPI_Aint    lb,unitbytes;
309:   MPI_Request *sendreq,*recvreq;

311:   PetscMalloc1(ntags,&tag);
312:   if (ntags > 0) {
313:     PetscCommDuplicate(comm,&comm,&tag[0]);
314:   }
315:   for (i=1; i<ntags; i++) {
316:     PetscCommGetNewTag(comm,&tag[i]);
317:   }

319:   /* Perform complete initial rendezvous */
320:   PetscCommBuildTwoSided(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);

322:   PetscMalloc1(nto*ntags,&sendreq);
323:   PetscMalloc1(*nfrom*ntags,&recvreq);

325:   MPI_Type_get_extent(dtype,&lb,&unitbytes);
327:   for (i=0; i<nto; i++) {
328:     PetscMPIInt k;
329:     for (k=0; k<ntags; k++) sendreq[i*ntags+k] = MPI_REQUEST_NULL;
330:     (*send)(comm,tag,i,toranks[i],((char*)todata)+count*unitbytes*i,sendreq+i*ntags,ctx);
331:   }
332:   for (i=0; i<*nfrom; i++) {
333:     void *header = (*(char**)fromdata) + count*unitbytes*i;
334:     PetscMPIInt k;
335:     for (k=0; k<ntags; k++) recvreq[i*ntags+k] = MPI_REQUEST_NULL;
336:     (*recv)(comm,tag,(*fromranks)[i],header,recvreq+i*ntags,ctx);
337:   }
338:   PetscFree(tag);
339:   PetscCommDestroy(&comm);
340:   *toreqs = sendreq;
341:   *fromreqs = recvreq;
342:   return 0;
343: }

345: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)

347: static PetscErrorCode PetscCommBuildTwoSidedFReq_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,
348:                                                           PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs,
349:                                                           PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
350:                                                           PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
351: {
352:   PetscMPIInt    nrecvs,tag,*tags,done,i;
353:   MPI_Aint       lb,unitbytes;
354:   char           *tdata;
355:   MPI_Request    *sendreqs,*usendreqs,*req,barrier;
356:   PetscSegBuffer segrank,segdata,segreq;
357:   PetscBool      barrier_started;

359:   PetscCommDuplicate(comm,&comm,&tag);
360:   PetscMalloc1(ntags,&tags);
361:   for (i=0; i<ntags; i++) {
362:     PetscCommGetNewTag(comm,&tags[i]);
363:   }
364:   MPI_Type_get_extent(dtype,&lb,&unitbytes);
366:   tdata = (char*)todata;
367:   PetscMalloc1(nto,&sendreqs);
368:   PetscMalloc1(nto*ntags,&usendreqs);
369:   /* Post synchronous sends */
370:   for (i=0; i<nto; i++) {
371:     MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
372:   }
373:   /* Post actual payloads.  These are typically larger messages.  Hopefully sending these later does not slow down the
374:    * synchronous messages above. */
375:   for (i=0; i<nto; i++) {
376:     PetscMPIInt k;
377:     for (k=0; k<ntags; k++) usendreqs[i*ntags+k] = MPI_REQUEST_NULL;
378:     (*send)(comm,tags,i,toranks[i],tdata+count*unitbytes*i,usendreqs+i*ntags,ctx);
379:   }

381:   PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);
382:   PetscSegBufferCreate(unitbytes,4*count,&segdata);
383:   PetscSegBufferCreate(sizeof(MPI_Request),4,&segreq);

385:   nrecvs  = 0;
386:   barrier = MPI_REQUEST_NULL;
387:   /* MPICH-3.2 sometimes does not create a request in some "optimized" cases.  This is arguably a standard violation,
388:    * but we need to work around it. */
389:   barrier_started = PETSC_FALSE;
390:   for (done=0; !done;) {
391:     PetscMPIInt flag;
392:     MPI_Status  status;
393:     MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);
394:     if (flag) {                 /* incoming message */
395:       PetscMPIInt *recvrank,k;
396:       void        *buf;
397:       PetscSegBufferGet(segrank,1,&recvrank);
398:       PetscSegBufferGet(segdata,count,&buf);
399:       *recvrank = status.MPI_SOURCE;
400:       MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);
401:       PetscSegBufferGet(segreq,ntags,&req);
402:       for (k=0; k<ntags; k++) req[k] = MPI_REQUEST_NULL;
403:       (*recv)(comm,tags,status.MPI_SOURCE,buf,req,ctx);
404:       nrecvs++;
405:     }
406:     if (!barrier_started) {
407:       PetscMPIInt sent,nsends;
408:       PetscMPIIntCast(nto,&nsends);
409:       MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);
410:       if (sent) {
411:         MPI_Ibarrier(comm,&barrier);
412:         barrier_started = PETSC_TRUE;
413:       }
414:     } else {
415:       MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);
416:     }
417:   }
418:   *nfrom = nrecvs;
419:   PetscSegBufferExtractAlloc(segrank,fromranks);
420:   PetscSegBufferDestroy(&segrank);
421:   PetscSegBufferExtractAlloc(segdata,fromdata);
422:   PetscSegBufferDestroy(&segdata);
423:   *toreqs = usendreqs;
424:   PetscSegBufferExtractAlloc(segreq,fromreqs);
425:   PetscSegBufferDestroy(&segreq);
426:   PetscFree(sendreqs);
427:   PetscFree(tags);
428:   PetscCommDestroy(&comm);
429:   return 0;
430: }
431: #endif

433: /*@C
434:    PetscCommBuildTwoSidedF - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous

436:    Collective

438:    Input Parameters:
439: +  comm - communicator
440: .  count - number of entries to send/receive in initial rendezvous (must match on all ranks)
441: .  dtype - datatype to send/receive from each rank (must match on all ranks)
442: .  nto - number of ranks to send data to
443: .  toranks - ranks to send to (array of length nto)
444: .  todata - data to send to each rank (packed)
445: .  ntags - number of tags needed by send/recv callbacks
446: .  send - callback invoked on sending process when ready to send primary payload
447: .  recv - callback invoked on receiving process after delivery of rendezvous message
448: -  ctx - context for callbacks

450:    Output Parameters:
451: +  nfrom - number of ranks receiving messages from
452: .  fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
453: -  fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())

455:    Level: developer

457:    Notes:
458:    This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
459:    PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other data.

461:    Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.

463:    References:
464: .  * - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in
465:    Scalable communication protocols for dynamic sparse data exchange, 2010.

467: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedFReq(), PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
468: @*/
469: PetscErrorCode PetscCommBuildTwoSidedF(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,
470:                                        PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
471:                                        PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
472: {
473:   MPI_Request    *toreqs,*fromreqs;

475:   PetscCommBuildTwoSidedFReq(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata,ntags,&toreqs,&fromreqs,send,recv,ctx);
476:   MPI_Waitall(nto*ntags,toreqs,MPI_STATUSES_IGNORE);
477:   MPI_Waitall(*nfrom*ntags,fromreqs,MPI_STATUSES_IGNORE);
478:   PetscFree(toreqs);
479:   PetscFree(fromreqs);
480:   return 0;
481: }

483: /*@C
484:    PetscCommBuildTwoSidedFReq - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous, returns requests

486:    Collective

488:    Input Parameters:
489: +  comm - communicator
490: .  count - number of entries to send/receive in initial rendezvous (must match on all ranks)
491: .  dtype - datatype to send/receive from each rank (must match on all ranks)
492: .  nto - number of ranks to send data to
493: .  toranks - ranks to send to (array of length nto)
494: .  todata - data to send to each rank (packed)
495: .  ntags - number of tags needed by send/recv callbacks
496: .  send - callback invoked on sending process when ready to send primary payload
497: .  recv - callback invoked on receiving process after delivery of rendezvous message
498: -  ctx - context for callbacks

500:    Output Parameters:
501: +  nfrom - number of ranks receiving messages from
502: .  fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
503: .  fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())
504: .  toreqs - array of nto*ntags sender requests (caller must wait on these, then PetscFree())
505: -  fromreqs - array of nfrom*ntags receiver requests (caller must wait on these, then PetscFree())

507:    Level: developer

509:    Notes:
510:    This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
511:    PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other data.

513:    Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.

515:    References:
516: .  * - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in
517:    Scalable communication protocols for dynamic sparse data exchange, 2010.

519: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedF(), PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
520: @*/
521: PetscErrorCode PetscCommBuildTwoSidedFReq(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,
522:                                           PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs,
523:                                           PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
524:                                           PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
525: {
526:   PetscErrorCode         (*f)(MPI_Comm,PetscMPIInt,MPI_Datatype,PetscMPIInt,const PetscMPIInt[],const void*,
527:                               PetscMPIInt*,PetscMPIInt**,void*,PetscMPIInt,MPI_Request**,MPI_Request**,
528:                               PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
529:                               PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx);
530:   PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET;
531:   PetscMPIInt i,size;

533:   PetscSysInitializePackage();
534:   MPI_Comm_size(comm,&size);
535:   for (i=0; i<nto; i++) {
537:   }
538:   PetscLogEventSync(PETSC_BuildTwoSidedF,comm);
539:   PetscLogEventBegin(PETSC_BuildTwoSidedF,0,0,0,0);
540:   PetscCommBuildTwoSidedGetType(comm,&buildtype);
541:   switch (buildtype) {
542:   case PETSC_BUILDTWOSIDED_IBARRIER:
543: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
544:     f = PetscCommBuildTwoSidedFReq_Ibarrier;
545:     break;
546: #else
547:     SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)");
548: #endif
549:   case PETSC_BUILDTWOSIDED_ALLREDUCE:
550:   case PETSC_BUILDTWOSIDED_REDSCATTER:
551:     f = PetscCommBuildTwoSidedFReq_Reference;
552:     break;
553:   default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication");
554:   }
555:   (*f)(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata,ntags,toreqs,fromreqs,send,recv,ctx);
556:   PetscLogEventEnd(PETSC_BuildTwoSidedF,0,0,0,0);
557:   return 0;
558: }