Actual source code: mpits.c

petsc-3.14.6 2021-03-30
Report Typos and Errors
  1: #include <petscsys.h>
  2: #include <petsc/private/petscimpl.h>

  4: PetscLogEvent PETSC_BuildTwoSided;
  5: PetscLogEvent PETSC_BuildTwoSidedF;

  7: const char *const PetscBuildTwoSidedTypes[] = {
  8:   "ALLREDUCE",
  9:   "IBARRIER",
 10:   "REDSCATTER",
 11:   "PetscBuildTwoSidedType",
 12:   "PETSC_BUILDTWOSIDED_",
 13:   NULL
 14: };

 16: static PetscBuildTwoSidedType _twosided_type = PETSC_BUILDTWOSIDED_NOTSET;

 18: /*@
 19:    PetscCommBuildTwoSidedSetType - set algorithm to use when building two-sided communication

 21:    Logically Collective

 23:    Input Arguments:
 24: +  comm - PETSC_COMM_WORLD
 25: -  twosided - algorithm to use in subsequent calls to PetscCommBuildTwoSided()

 27:    Level: developer

 29:    Note:
 30:    This option is currently global, but could be made per-communicator.

 32: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedGetType()
 33: @*/
 34: PetscErrorCode PetscCommBuildTwoSidedSetType(MPI_Comm comm,PetscBuildTwoSidedType twosided)
 35: {
 38:     PetscMPIInt ierr;
 39:     PetscMPIInt b1[2],b2[2];
 40:     b1[0] = -(PetscMPIInt)twosided;
 41:     b1[1] = (PetscMPIInt)twosided;
 42:     MPIU_Allreduce(b1,b2,2,MPI_INT,MPI_MAX,comm);
 43:     if (-b2[0] != b2[1]) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"Enum value must be same on all processes");
 44:   }
 45:   _twosided_type = twosided;
 46:   return(0);
 47: }

 49: /*@
 50:    PetscCommBuildTwoSidedGetType - set algorithm to use when building two-sided communication

 52:    Logically Collective

 54:    Output Arguments:
 55: +  comm - communicator on which to query algorithm
 56: -  twosided - algorithm to use for PetscCommBuildTwoSided()

 58:    Level: developer

 60: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedSetType()
 61: @*/
 62: PetscErrorCode PetscCommBuildTwoSidedGetType(MPI_Comm comm,PetscBuildTwoSidedType *twosided)
 63: {
 65:   PetscMPIInt    size;

 68:   *twosided = PETSC_BUILDTWOSIDED_NOTSET;
 69:   if (_twosided_type == PETSC_BUILDTWOSIDED_NOTSET) {
 70:     MPI_Comm_size(comm,&size);
 71:     _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE; /* default for small comms, see https://gitlab.com/petsc/petsc/-/merge_requests/2611 */
 72: #if defined(PETSC_HAVE_MPI_IBARRIER)
 73:     if (size > 1024) _twosided_type = PETSC_BUILDTWOSIDED_IBARRIER;
 74: #endif
 75:     PetscOptionsGetEnum(NULL,NULL,"-build_twosided",PetscBuildTwoSidedTypes,(PetscEnum*)&_twosided_type,NULL);
 76:   }
 77:   *twosided = _twosided_type;
 78:   return(0);
 79: }

 81: #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER)

 83: static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
 84: {
 86:   PetscMPIInt    nrecvs,tag,done,i;
 87:   MPI_Aint       lb,unitbytes;
 88:   char           *tdata;
 89:   MPI_Request    *sendreqs,barrier;
 90:   PetscSegBuffer segrank,segdata;
 91:   PetscBool      barrier_started;

 94:   PetscCommDuplicate(comm,&comm,&tag);
 95:   MPI_Type_get_extent(dtype,&lb,&unitbytes);
 96:   if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
 97:   tdata = (char*)todata;
 98:   PetscMalloc1(nto,&sendreqs);
 99:   for (i=0; i<nto; i++) {
100:     MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
101:   }
102:   PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);
103:   PetscSegBufferCreate(unitbytes,4*count,&segdata);

105:   nrecvs  = 0;
106:   barrier = MPI_REQUEST_NULL;
107:   /* MPICH-3.2 sometimes does not create a request in some "optimized" cases.  This is arguably a standard violation,
108:    * but we need to work around it. */
109:   barrier_started = PETSC_FALSE;
110:   for (done=0; !done;) {
111:     PetscMPIInt flag;
112:     MPI_Status  status;
113:     MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);
114:     if (flag) {                 /* incoming message */
115:       PetscMPIInt *recvrank;
116:       void        *buf;
117:       PetscSegBufferGet(segrank,1,&recvrank);
118:       PetscSegBufferGet(segdata,count,&buf);
119:       *recvrank = status.MPI_SOURCE;
120:       MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);
121:       nrecvs++;
122:     }
123:     if (!barrier_started) {
124:       PetscMPIInt sent,nsends;
125:       PetscMPIIntCast(nto,&nsends);
126:       MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);
127:       if (sent) {
128: #if defined(PETSC_HAVE_MPI_IBARRIER)
129:         MPI_Ibarrier(comm,&barrier);
130: #elif defined(PETSC_HAVE_MPIX_IBARRIER)
131:         MPIX_Ibarrier(comm,&barrier);
132: #endif
133:         barrier_started = PETSC_TRUE;
134:         PetscFree(sendreqs);
135:       }
136:     } else {
137:       MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);
138:     }
139:   }
140:   *nfrom = nrecvs;
141:   PetscSegBufferExtractAlloc(segrank,fromranks);
142:   PetscSegBufferDestroy(&segrank);
143:   PetscSegBufferExtractAlloc(segdata,fromdata);
144:   PetscSegBufferDestroy(&segdata);
145:   PetscCommDestroy(&comm);
146:   return(0);
147: }
148: #endif

150: static PetscErrorCode PetscCommBuildTwoSided_Allreduce(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
151: {
152:   PetscErrorCode   ierr;
153:   PetscMPIInt      size,rank,*iflags,nrecvs,tag,*franks,i,flg;
154:   MPI_Aint         lb,unitbytes;
155:   char             *tdata,*fdata;
156:   MPI_Request      *reqs,*sendreqs;
157:   MPI_Status       *statuses;
158:   PetscCommCounter *counter;

161:   MPI_Comm_size(comm,&size);
162:   MPI_Comm_rank(comm,&rank);
163:   PetscCommDuplicate(comm,&comm,&tag);
164:   MPI_Comm_get_attr(comm,Petsc_Counter_keyval,&counter,&flg);
165:   if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Inner PETSc communicator does not have its tag/name counter attribute set");
166:   if (!counter->iflags) {
167:     PetscCalloc1(size,&counter->iflags);
168:     iflags = counter->iflags;
169:   } else {
170:     iflags = counter->iflags;
171:     PetscArrayzero(iflags,size);
172:   }
173:   for (i=0; i<nto; i++) iflags[toranks[i]] = 1;
174:   MPIU_Allreduce(MPI_IN_PLACE,iflags,size,MPI_INT,MPI_SUM,comm);
175:   nrecvs   = iflags[rank];
176:   MPI_Type_get_extent(dtype,&lb,&unitbytes);
177:   if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
178:   PetscMalloc(nrecvs*count*unitbytes,&fdata);
179:   tdata    = (char*)todata;
180:   PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);
181:   sendreqs = reqs + nrecvs;
182:   for (i=0; i<nrecvs; i++) {
183:     MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);
184:   }
185:   for (i=0; i<nto; i++) {
186:     MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
187:   }
188:   MPI_Waitall(nto+nrecvs,reqs,statuses);
189:   PetscMalloc1(nrecvs,&franks);
190:   for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE;
191:   PetscFree2(reqs,statuses);
192:   PetscCommDestroy(&comm);

194:   *nfrom            = nrecvs;
195:   *fromranks        = franks;
196:   *(void**)fromdata = fdata;
197:   return(0);
198: }

200: #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK)
201: static PetscErrorCode PetscCommBuildTwoSided_RedScatter(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
202: {
204:   PetscMPIInt    size,*iflags,nrecvs,tag,*franks,i,flg;
205:   MPI_Aint       lb,unitbytes;
206:   char           *tdata,*fdata;
207:   MPI_Request    *reqs,*sendreqs;
208:   MPI_Status     *statuses;
209:   PetscCommCounter *counter;

212:   MPI_Comm_size(comm,&size);
213:   PetscCommDuplicate(comm,&comm,&tag);
214:   MPI_Comm_get_attr(comm,Petsc_Counter_keyval,&counter,&flg);
215:   if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Inner PETSc communicator does not have its tag/name counter attribute set");
216:   if (!counter->iflags) {
217:     PetscCalloc1(size,&counter->iflags);
218:     iflags = counter->iflags;
219:   } else {
220:     iflags = counter->iflags;
221:     PetscArrayzero(iflags,size);
222:   }
223:   for (i=0; i<nto; i++) iflags[toranks[i]] = 1;
224:   MPI_Reduce_scatter_block(iflags,&nrecvs,1,MPI_INT,MPI_SUM,comm);
225:   MPI_Type_get_extent(dtype,&lb,&unitbytes);
226:   if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
227:   PetscMalloc(nrecvs*count*unitbytes,&fdata);
228:   tdata    = (char*)todata;
229:   PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);
230:   sendreqs = reqs + nrecvs;
231:   for (i=0; i<nrecvs; i++) {
232:     MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);
233:   }
234:   for (i=0; i<nto; i++) {
235:     MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
236:   }
237:   MPI_Waitall(nto+nrecvs,reqs,statuses);
238:   PetscMalloc1(nrecvs,&franks);
239:   for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE;
240:   PetscFree2(reqs,statuses);
241:   PetscCommDestroy(&comm);

243:   *nfrom            = nrecvs;
244:   *fromranks        = franks;
245:   *(void**)fromdata = fdata;
246:   return(0);
247: }
248: #endif

250: /*@C
251:    PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths)

253:    Collective

255:    Input Arguments:
256: +  comm - communicator
257: .  count - number of entries to send/receive (must match on all ranks)
258: .  dtype - datatype to send/receive from each rank (must match on all ranks)
259: .  nto - number of ranks to send data to
260: .  toranks - ranks to send to (array of length nto)
261: -  todata - data to send to each rank (packed)

263:    Output Arguments:
264: +  nfrom - number of ranks receiving messages from
265: .  fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
266: -  fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())

268:    Level: developer

270:    Options Database Keys:
271: .  -build_twosided <allreduce|ibarrier|redscatter> - algorithm to set up two-sided communication. Default is allreduce for communicators with <= 1024 ranks, otherwise ibarrier.

273:    Notes:
274:    This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
275:    PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other constant-size data.

277:    Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.

279:    References:
280: .  1. - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in
281:    Scalable communication protocols for dynamic sparse data exchange, 2010.

283: .seealso: PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
284: @*/
285: PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
286: {
287:   PetscErrorCode         ierr;
288:   PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET;

291:   PetscSysInitializePackage();
292:   PetscLogEventSync(PETSC_BuildTwoSided,comm);
293:   PetscLogEventBegin(PETSC_BuildTwoSided,0,0,0,0);
294:   PetscCommBuildTwoSidedGetType(comm,&buildtype);
295:   switch (buildtype) {
296:   case PETSC_BUILDTWOSIDED_IBARRIER:
297: #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER)
298:     PetscCommBuildTwoSided_Ibarrier(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);
299:     break;
300: #else
301:     SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)");
302: #endif
303:   case PETSC_BUILDTWOSIDED_ALLREDUCE:
304:     PetscCommBuildTwoSided_Allreduce(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);
305:     break;
306:   case PETSC_BUILDTWOSIDED_REDSCATTER:
307: #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK)
308:     PetscCommBuildTwoSided_RedScatter(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);
309:     break;
310: #else
311:     SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Reduce_scatter_block (part of MPI-2.2)");
312: #endif
313:   default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication");
314:   }
315:   PetscLogEventEnd(PETSC_BuildTwoSided,0,0,0,0);
316:   return(0);
317: }

319: static PetscErrorCode PetscCommBuildTwoSidedFReq_Reference(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,
320:                                                            PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs,
321:                                                            PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
322:                                                            PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
323: {
325:   PetscMPIInt i,*tag;
326:   MPI_Aint    lb,unitbytes;
327:   MPI_Request *sendreq,*recvreq;

330:   PetscMalloc1(ntags,&tag);
331:   if (ntags > 0) {
332:     PetscCommDuplicate(comm,&comm,&tag[0]);
333:   }
334:   for (i=1; i<ntags; i++) {
335:     PetscCommGetNewTag(comm,&tag[i]);
336:   }

338:   /* Perform complete initial rendezvous */
339:   PetscCommBuildTwoSided(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);

341:   PetscMalloc1(nto*ntags,&sendreq);
342:   PetscMalloc1(*nfrom*ntags,&recvreq);

344:   MPI_Type_get_extent(dtype,&lb,&unitbytes);
345:   if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
346:   for (i=0; i<nto; i++) {
347:     PetscMPIInt k;
348:     for (k=0; k<ntags; k++) sendreq[i*ntags+k] = MPI_REQUEST_NULL;
349:     (*send)(comm,tag,i,toranks[i],((char*)todata)+count*unitbytes*i,sendreq+i*ntags,ctx);
350:   }
351:   for (i=0; i<*nfrom; i++) {
352:     void *header = (*(char**)fromdata) + count*unitbytes*i;
353:     PetscMPIInt k;
354:     for (k=0; k<ntags; k++) recvreq[i*ntags+k] = MPI_REQUEST_NULL;
355:     (*recv)(comm,tag,(*fromranks)[i],header,recvreq+i*ntags,ctx);
356:   }
357:   PetscFree(tag);
358:   PetscCommDestroy(&comm);
359:   *toreqs = sendreq;
360:   *fromreqs = recvreq;
361:   return(0);
362: }

364: #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER)

366: static PetscErrorCode PetscCommBuildTwoSidedFReq_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,
367:                                                           PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs,
368:                                                           PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
369:                                                           PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
370: {
372:   PetscMPIInt    nrecvs,tag,*tags,done,i;
373:   MPI_Aint       lb,unitbytes;
374:   char           *tdata;
375:   MPI_Request    *sendreqs,*usendreqs,*req,barrier;
376:   PetscSegBuffer segrank,segdata,segreq;
377:   PetscBool      barrier_started;

380:   PetscCommDuplicate(comm,&comm,&tag);
381:   PetscMalloc1(ntags,&tags);
382:   for (i=0; i<ntags; i++) {
383:     PetscCommGetNewTag(comm,&tags[i]);
384:   }
385:   MPI_Type_get_extent(dtype,&lb,&unitbytes);
386:   if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
387:   tdata = (char*)todata;
388:   PetscMalloc1(nto,&sendreqs);
389:   PetscMalloc1(nto*ntags,&usendreqs);
390:   /* Post synchronous sends */
391:   for (i=0; i<nto; i++) {
392:     MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
393:   }
394:   /* Post actual payloads.  These are typically larger messages.  Hopefully sending these later does not slow down the
395:    * synchronous messages above. */
396:   for (i=0; i<nto; i++) {
397:     PetscMPIInt k;
398:     for (k=0; k<ntags; k++) usendreqs[i*ntags+k] = MPI_REQUEST_NULL;
399:     (*send)(comm,tags,i,toranks[i],tdata+count*unitbytes*i,usendreqs+i*ntags,ctx);
400:   }

402:   PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);
403:   PetscSegBufferCreate(unitbytes,4*count,&segdata);
404:   PetscSegBufferCreate(sizeof(MPI_Request),4,&segreq);

406:   nrecvs  = 0;
407:   barrier = MPI_REQUEST_NULL;
408:   /* MPICH-3.2 sometimes does not create a request in some "optimized" cases.  This is arguably a standard violation,
409:    * but we need to work around it. */
410:   barrier_started = PETSC_FALSE;
411:   for (done=0; !done;) {
412:     PetscMPIInt flag;
413:     MPI_Status  status;
414:     MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);
415:     if (flag) {                 /* incoming message */
416:       PetscMPIInt *recvrank,k;
417:       void        *buf;
418:       PetscSegBufferGet(segrank,1,&recvrank);
419:       PetscSegBufferGet(segdata,count,&buf);
420:       *recvrank = status.MPI_SOURCE;
421:       MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);
422:       PetscSegBufferGet(segreq,ntags,&req);
423:       for (k=0; k<ntags; k++) req[k] = MPI_REQUEST_NULL;
424:       (*recv)(comm,tags,status.MPI_SOURCE,buf,req,ctx);
425:       nrecvs++;
426:     }
427:     if (!barrier_started) {
428:       PetscMPIInt sent,nsends;
429:       PetscMPIIntCast(nto,&nsends);
430:       MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);
431:       if (sent) {
432: #if defined(PETSC_HAVE_MPI_IBARRIER)
433:         MPI_Ibarrier(comm,&barrier);
434: #elif defined(PETSC_HAVE_MPIX_IBARRIER)
435:         MPIX_Ibarrier(comm,&barrier);
436: #endif
437:         barrier_started = PETSC_TRUE;
438:       }
439:     } else {
440:       MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);
441:     }
442:   }
443:   *nfrom = nrecvs;
444:   PetscSegBufferExtractAlloc(segrank,fromranks);
445:   PetscSegBufferDestroy(&segrank);
446:   PetscSegBufferExtractAlloc(segdata,fromdata);
447:   PetscSegBufferDestroy(&segdata);
448:   *toreqs = usendreqs;
449:   PetscSegBufferExtractAlloc(segreq,fromreqs);
450:   PetscSegBufferDestroy(&segreq);
451:   PetscFree(sendreqs);
452:   PetscFree(tags);
453:   PetscCommDestroy(&comm);
454:   return(0);
455: }
456: #endif

458: /*@C
459:    PetscCommBuildTwoSidedF - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous

461:    Collective

463:    Input Arguments:
464: +  comm - communicator
465: .  count - number of entries to send/receive in initial rendezvous (must match on all ranks)
466: .  dtype - datatype to send/receive from each rank (must match on all ranks)
467: .  nto - number of ranks to send data to
468: .  toranks - ranks to send to (array of length nto)
469: .  todata - data to send to each rank (packed)
470: .  ntags - number of tags needed by send/recv callbacks
471: .  send - callback invoked on sending process when ready to send primary payload
472: .  recv - callback invoked on receiving process after delivery of rendezvous message
473: -  ctx - context for callbacks

475:    Output Arguments:
476: +  nfrom - number of ranks receiving messages from
477: .  fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
478: -  fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())

480:    Level: developer

482:    Notes:
483:    This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
484:    PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other data.

486:    Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.

488:    References:
489: .  1. - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in
490:    Scalable communication protocols for dynamic sparse data exchange, 2010.

492: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedFReq(), PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
493: @*/
494: PetscErrorCode PetscCommBuildTwoSidedF(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,
495:                                        PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
496:                                        PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
497: {
499:   MPI_Request    *toreqs,*fromreqs;

502:   PetscCommBuildTwoSidedFReq(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata,ntags,&toreqs,&fromreqs,send,recv,ctx);
503:   MPI_Waitall(nto*ntags,toreqs,MPI_STATUSES_IGNORE);
504:   MPI_Waitall(*nfrom*ntags,fromreqs,MPI_STATUSES_IGNORE);
505:   PetscFree(toreqs);
506:   PetscFree(fromreqs);
507:   return(0);
508: }

510: /*@C
511:    PetscCommBuildTwoSidedFReq - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous, returns requests

513:    Collective

515:    Input Arguments:
516: +  comm - communicator
517: .  count - number of entries to send/receive in initial rendezvous (must match on all ranks)
518: .  dtype - datatype to send/receive from each rank (must match on all ranks)
519: .  nto - number of ranks to send data to
520: .  toranks - ranks to send to (array of length nto)
521: .  todata - data to send to each rank (packed)
522: .  ntags - number of tags needed by send/recv callbacks
523: .  send - callback invoked on sending process when ready to send primary payload
524: .  recv - callback invoked on receiving process after delivery of rendezvous message
525: -  ctx - context for callbacks

527:    Output Arguments:
528: +  nfrom - number of ranks receiving messages from
529: .  fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
530: .  fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())
531: .  toreqs - array of nto*ntags sender requests (caller must wait on these, then PetscFree())
532: -  fromreqs - array of nfrom*ntags receiver requests (caller must wait on these, then PetscFree())

534:    Level: developer

536:    Notes:
537:    This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
538:    PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other data.

540:    Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.

542:    References:
543: .  1. - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in
544:    Scalable communication protocols for dynamic sparse data exchange, 2010.

546: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedF(), PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
547: @*/
548: PetscErrorCode PetscCommBuildTwoSidedFReq(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,
549:                                           PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs,
550:                                           PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
551:                                           PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
552: {
553:   PetscErrorCode         ierr,(*f)(MPI_Comm,PetscMPIInt,MPI_Datatype,PetscMPIInt,const PetscMPIInt[],const void*,
554:                                    PetscMPIInt*,PetscMPIInt**,void*,PetscMPIInt,MPI_Request**,MPI_Request**,
555:                                    PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
556:                                    PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx);
557:   PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET;
558:   PetscMPIInt i,size;

561:   PetscSysInitializePackage();
562:   MPI_Comm_size(comm,&size);
563:   for (i=0; i<nto; i++) {
564:     if (toranks[i] < 0 || size <= toranks[i]) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"toranks[%d] %d not in comm size %d",i,toranks[i],size);
565:   }
566:   PetscLogEventSync(PETSC_BuildTwoSidedF,comm);
567:   PetscLogEventBegin(PETSC_BuildTwoSidedF,0,0,0,0);
568:   PetscCommBuildTwoSidedGetType(comm,&buildtype);
569:   switch (buildtype) {
570:   case PETSC_BUILDTWOSIDED_IBARRIER:
571: #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER)
572:     f = PetscCommBuildTwoSidedFReq_Ibarrier;
573:     break;
574: #else
575:     SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)");
576: #endif
577:   case PETSC_BUILDTWOSIDED_ALLREDUCE:
578:   case PETSC_BUILDTWOSIDED_REDSCATTER:
579:     f = PetscCommBuildTwoSidedFReq_Reference;
580:     break;
581:   default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication");
582:   }
583:   (*f)(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata,ntags,toreqs,fromreqs,send,recv,ctx);
584:   PetscLogEventEnd(PETSC_BuildTwoSidedF,0,0,0,0);
585:   return(0);
586: }