Actual source code: mpits.c

petsc-3.13.6 2020-09-29
Report Typos and Errors
  1:  #include <petscsys.h>
  2:  #include <petsc/private/petscimpl.h>

  4: PetscLogEvent PETSC_BuildTwoSided;
  5: PetscLogEvent PETSC_BuildTwoSidedF;

  7: const char *const PetscBuildTwoSidedTypes[] = {
  8:   "ALLREDUCE",
  9:   "IBARRIER",
 10:   "REDSCATTER",
 11:   "PetscBuildTwoSidedType",
 12:   "PETSC_BUILDTWOSIDED_",
 13:   NULL
 14: };

 16: static PetscBuildTwoSidedType _twosided_type = PETSC_BUILDTWOSIDED_NOTSET;

 18: /*@
 19:    PetscCommBuildTwoSidedSetType - set algorithm to use when building two-sided communication

 21:    Logically Collective

 23:    Input Arguments:
 24: +  comm - PETSC_COMM_WORLD
 25: -  twosided - algorithm to use in subsequent calls to PetscCommBuildTwoSided()

 27:    Level: developer

 29:    Note:
 30:    This option is currently global, but could be made per-communicator.

 32: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedGetType()
 33: @*/
 34: PetscErrorCode PetscCommBuildTwoSidedSetType(MPI_Comm comm,PetscBuildTwoSidedType twosided)
 35: {
 37: #if defined(PETSC_USE_DEBUG)
 39:     PetscMPIInt ierr;
 40:     PetscMPIInt b1[2],b2[2];
 41:     b1[0] = -(PetscMPIInt)twosided;
 42:     b1[1] = (PetscMPIInt)twosided;
 43:     MPIU_Allreduce(b1,b2,2,MPI_INT,MPI_MAX,comm);
 44:     if (-b2[0] != b2[1]) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"Enum value must be same on all processes");
 45:   }
 46: #endif
 47:   _twosided_type = twosided;
 48:   return(0);
 49: }

 51: /*@
 52:    PetscCommBuildTwoSidedGetType - set algorithm to use when building two-sided communication

 54:    Logically Collective

 56:    Output Arguments:
 57: +  comm - communicator on which to query algorithm
 58: -  twosided - algorithm to use for PetscCommBuildTwoSided()

 60:    Level: developer

 62: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedSetType()
 63: @*/
 64: PetscErrorCode PetscCommBuildTwoSidedGetType(MPI_Comm comm,PetscBuildTwoSidedType *twosided)
 65: {
 67:   PetscMPIInt    size;

 70:   *twosided = PETSC_BUILDTWOSIDED_NOTSET;
 71:   if (_twosided_type == PETSC_BUILDTWOSIDED_NOTSET) {
 72:     MPI_Comm_size(comm,&size);
 73:     _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE; /* default for small comms, see https://gitlab.com/petsc/petsc/-/merge_requests/2611 */
 74: #if defined(PETSC_HAVE_MPI_IBARRIER)
 75:     if (size > 1024) _twosided_type = PETSC_BUILDTWOSIDED_IBARRIER;
 76: #endif
 77:     PetscOptionsGetEnum(NULL,NULL,"-build_twosided",PetscBuildTwoSidedTypes,(PetscEnum*)&_twosided_type,NULL);
 78:   }
 79:   *twosided = _twosided_type;
 80:   return(0);
 81: }

 83: #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER)

 85: static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
 86: {
 88:   PetscMPIInt    nrecvs,tag,done,i;
 89:   MPI_Aint       lb,unitbytes;
 90:   char           *tdata;
 91:   MPI_Request    *sendreqs,barrier;
 92:   PetscSegBuffer segrank,segdata;
 93:   PetscBool      barrier_started;

 96:   PetscCommDuplicate(comm,&comm,&tag);
 97:   MPI_Type_get_extent(dtype,&lb,&unitbytes);
 98:   if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
 99:   tdata = (char*)todata;
100:   PetscMalloc1(nto,&sendreqs);
101:   for (i=0; i<nto; i++) {
102:     MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
103:   }
104:   PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);
105:   PetscSegBufferCreate(unitbytes,4*count,&segdata);

107:   nrecvs  = 0;
108:   barrier = MPI_REQUEST_NULL;
109:   /* MPICH-3.2 sometimes does not create a request in some "optimized" cases.  This is arguably a standard violation,
110:    * but we need to work around it. */
111:   barrier_started = PETSC_FALSE;
112:   for (done=0; !done; ) {
113:     PetscMPIInt flag;
114:     MPI_Status  status;
115:     MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);
116:     if (flag) {                 /* incoming message */
117:       PetscMPIInt *recvrank;
118:       void        *buf;
119:       PetscSegBufferGet(segrank,1,&recvrank);
120:       PetscSegBufferGet(segdata,count,&buf);
121:       *recvrank = status.MPI_SOURCE;
122:       MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);
123:       nrecvs++;
124:     }
125:     if (!barrier_started) {
126:       PetscMPIInt sent,nsends;
127:       PetscMPIIntCast(nto,&nsends);
128:       MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);
129:       if (sent) {
130: #if defined(PETSC_HAVE_MPI_IBARRIER)
131:         MPI_Ibarrier(comm,&barrier);
132: #elif defined(PETSC_HAVE_MPIX_IBARRIER)
133:         MPIX_Ibarrier(comm,&barrier);
134: #endif
135:         barrier_started = PETSC_TRUE;
136:         PetscFree(sendreqs);
137:       }
138:     } else {
139:       MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);
140:     }
141:   }
142:   *nfrom = nrecvs;
143:   PetscSegBufferExtractAlloc(segrank,fromranks);
144:   PetscSegBufferDestroy(&segrank);
145:   PetscSegBufferExtractAlloc(segdata,fromdata);
146:   PetscSegBufferDestroy(&segdata);
147:   PetscCommDestroy(&comm);
148:   return(0);
149: }
150: #endif

152: static PetscErrorCode PetscCommBuildTwoSided_Allreduce(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
153: {
154:   PetscErrorCode   ierr;
155:   PetscMPIInt      size,rank,*iflags,nrecvs,tag,*franks,i,flg;
156:   MPI_Aint         lb,unitbytes;
157:   char             *tdata,*fdata;
158:   MPI_Request      *reqs,*sendreqs;
159:   MPI_Status       *statuses;
160:   PetscCommCounter *counter;

163:   MPI_Comm_size(comm,&size);
164:   MPI_Comm_rank(comm,&rank);
165:   PetscCommDuplicate(comm,&comm,&tag);
166:   MPI_Comm_get_attr(comm,Petsc_Counter_keyval,&counter,&flg);
167:   if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Inner PETSc communicator does not have its tag/name counter attribute set");
168:   if (!counter->iflags) {
169:     PetscCalloc1(size,&counter->iflags);
170:     iflags = counter->iflags;
171:   } else {
172:     iflags = counter->iflags;
173:     PetscArrayzero(iflags,size);
174:   }
175:   for (i=0; i<nto; i++) iflags[toranks[i]] = 1;
176:   MPIU_Allreduce(MPI_IN_PLACE,iflags,size,MPI_INT,MPI_SUM,comm);
177:   nrecvs   = iflags[rank];
178:   MPI_Type_get_extent(dtype,&lb,&unitbytes);
179:   if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
180:   PetscMalloc(nrecvs*count*unitbytes,&fdata);
181:   tdata    = (char*)todata;
182:   PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);
183:   sendreqs = reqs + nrecvs;
184:   for (i=0; i<nrecvs; i++) {
185:     MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);
186:   }
187:   for (i=0; i<nto; i++) {
188:     MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
189:   }
190:   MPI_Waitall(nto+nrecvs,reqs,statuses);
191:   PetscMalloc1(nrecvs,&franks);
192:   for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE;
193:   PetscFree2(reqs,statuses);
194:   PetscCommDestroy(&comm);

196:   *nfrom            = nrecvs;
197:   *fromranks        = franks;
198:   *(void**)fromdata = fdata;
199:   return(0);
200: }

202: #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK)
203: static PetscErrorCode PetscCommBuildTwoSided_RedScatter(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
204: {
206:   PetscMPIInt    size,*iflags,nrecvs,tag,*franks,i,flg;
207:   MPI_Aint       lb,unitbytes;
208:   char           *tdata,*fdata;
209:   MPI_Request    *reqs,*sendreqs;
210:   MPI_Status     *statuses;
211:   PetscCommCounter *counter;

214:   MPI_Comm_size(comm,&size);
215:   PetscCommDuplicate(comm,&comm,&tag);
216:   MPI_Comm_get_attr(comm,Petsc_Counter_keyval,&counter,&flg);
217:   if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Inner PETSc communicator does not have its tag/name counter attribute set");
218:   if (!counter->iflags) {
219:     PetscCalloc1(size,&counter->iflags);
220:     iflags = counter->iflags;
221:   } else {
222:     iflags = counter->iflags;
223:     PetscArrayzero(iflags,size);
224:   }
225:   for (i=0; i<nto; i++) iflags[toranks[i]] = 1;
226:   MPI_Reduce_scatter_block(iflags,&nrecvs,1,MPI_INT,MPI_SUM,comm);
227:   MPI_Type_get_extent(dtype,&lb,&unitbytes);
228:   if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
229:   PetscMalloc(nrecvs*count*unitbytes,&fdata);
230:   tdata    = (char*)todata;
231:   PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);
232:   sendreqs = reqs + nrecvs;
233:   for (i=0; i<nrecvs; i++) {
234:     MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);
235:   }
236:   for (i=0; i<nto; i++) {
237:     MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
238:   }
239:   MPI_Waitall(nto+nrecvs,reqs,statuses);
240:   PetscMalloc1(nrecvs,&franks);
241:   for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE;
242:   PetscFree2(reqs,statuses);
243:   PetscCommDestroy(&comm);

245:   *nfrom            = nrecvs;
246:   *fromranks        = franks;
247:   *(void**)fromdata = fdata;
248:   return(0);
249: }
250: #endif

252: /*@C
253:    PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths)

255:    Collective

257:    Input Arguments:
258: +  comm - communicator
259: .  count - number of entries to send/receive (must match on all ranks)
260: .  dtype - datatype to send/receive from each rank (must match on all ranks)
261: .  nto - number of ranks to send data to
262: .  toranks - ranks to send to (array of length nto)
263: -  todata - data to send to each rank (packed)

265:    Output Arguments:
266: +  nfrom - number of ranks receiving messages from
267: .  fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
268: -  fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())

270:    Level: developer

272:    Options Database Keys:
273: .  -build_twosided <allreduce|ibarrier|redscatter> - algorithm to set up two-sided communication. Default is allreduce for communicators with <= 1024 ranks, otherwise ibarrier.

275:    Notes:
276:    This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
277:    PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other constant-size data.

279:    Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.

281:    References:
282: .  1. - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in
283:    Scalable communication protocols for dynamic sparse data exchange, 2010.

285: .seealso: PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
286: @*/
287: PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
288: {
289:   PetscErrorCode         ierr;
290:   PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET;

293:   PetscSysInitializePackage();
294:   PetscLogEventSync(PETSC_BuildTwoSided,comm);
295:   PetscLogEventBegin(PETSC_BuildTwoSided,0,0,0,0);
296:   PetscCommBuildTwoSidedGetType(comm,&buildtype);
297:   switch (buildtype) {
298:   case PETSC_BUILDTWOSIDED_IBARRIER:
299: #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER)
300:     PetscCommBuildTwoSided_Ibarrier(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);
301: #else
302:     SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)");
303: #endif
304:     break;
305:   case PETSC_BUILDTWOSIDED_ALLREDUCE:
306:     PetscCommBuildTwoSided_Allreduce(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);
307:     break;
308:   case PETSC_BUILDTWOSIDED_REDSCATTER:
309: #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK)
310:     PetscCommBuildTwoSided_RedScatter(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);
311: #else
312:     SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Reduce_scatter_block (part of MPI-2.2)");
313: #endif
314:     break;
315:   default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication");
316:   }
317:   PetscLogEventEnd(PETSC_BuildTwoSided,0,0,0,0);
318:   return(0);
319: }

321: static PetscErrorCode PetscCommBuildTwoSidedFReq_Reference(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,
322:                                                            PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs,
323:                                                            PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
324:                                                            PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
325: {
327:   PetscMPIInt i,*tag;
328:   MPI_Aint    lb,unitbytes;
329:   MPI_Request *sendreq,*recvreq;

332:   PetscMalloc1(ntags,&tag);
333:   if (ntags > 0) {
334:     PetscCommDuplicate(comm,&comm,&tag[0]);
335:   }
336:   for (i=1; i<ntags; i++) {
337:     PetscCommGetNewTag(comm,&tag[i]);
338:   }

340:   /* Perform complete initial rendezvous */
341:   PetscCommBuildTwoSided(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);

343:   PetscMalloc1(nto*ntags,&sendreq);
344:   PetscMalloc1(*nfrom*ntags,&recvreq);

346:   MPI_Type_get_extent(dtype,&lb,&unitbytes);
347:   if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
348:   for (i=0; i<nto; i++) {
349:     PetscMPIInt k;
350:     for (k=0; k<ntags; k++) sendreq[i*ntags+k] = MPI_REQUEST_NULL;
351:     (*send)(comm,tag,i,toranks[i],((char*)todata)+count*unitbytes*i,sendreq+i*ntags,ctx);
352:   }
353:   for (i=0; i<*nfrom; i++) {
354:     void *header = (*(char**)fromdata) + count*unitbytes*i;
355:     PetscMPIInt k;
356:     for (k=0; k<ntags; k++) recvreq[i*ntags+k] = MPI_REQUEST_NULL;
357:     (*recv)(comm,tag,(*fromranks)[i],header,recvreq+i*ntags,ctx);
358:   }
359:   PetscFree(tag);
360:   PetscCommDestroy(&comm);
361:   *toreqs = sendreq;
362:   *fromreqs = recvreq;
363:   return(0);
364: }

366: #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER)

368: static PetscErrorCode PetscCommBuildTwoSidedFReq_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,
369:                                                           PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs,
370:                                                           PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
371:                                                           PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
372: {
374:   PetscMPIInt    nrecvs,tag,*tags,done,i;
375:   MPI_Aint       lb,unitbytes;
376:   char           *tdata;
377:   MPI_Request    *sendreqs,*usendreqs,*req,barrier;
378:   PetscSegBuffer segrank,segdata,segreq;
379:   PetscBool      barrier_started;

382:   PetscCommDuplicate(comm,&comm,&tag);
383:   PetscMalloc1(ntags,&tags);
384:   for (i=0; i<ntags; i++) {
385:     PetscCommGetNewTag(comm,&tags[i]);
386:   }
387:   MPI_Type_get_extent(dtype,&lb,&unitbytes);
388:   if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
389:   tdata = (char*)todata;
390:   PetscMalloc1(nto,&sendreqs);
391:   PetscMalloc1(nto*ntags,&usendreqs);
392:   /* Post synchronous sends */
393:   for (i=0; i<nto; i++) {
394:     MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
395:   }
396:   /* Post actual payloads.  These are typically larger messages.  Hopefully sending these later does not slow down the
397:    * synchronous messages above. */
398:   for (i=0; i<nto; i++) {
399:     PetscMPIInt k;
400:     for (k=0; k<ntags; k++) usendreqs[i*ntags+k] = MPI_REQUEST_NULL;
401:     (*send)(comm,tags,i,toranks[i],tdata+count*unitbytes*i,usendreqs+i*ntags,ctx);
402:   }

404:   PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);
405:   PetscSegBufferCreate(unitbytes,4*count,&segdata);
406:   PetscSegBufferCreate(sizeof(MPI_Request),4,&segreq);

408:   nrecvs  = 0;
409:   barrier = MPI_REQUEST_NULL;
410:   /* MPICH-3.2 sometimes does not create a request in some "optimized" cases.  This is arguably a standard violation,
411:    * but we need to work around it. */
412:   barrier_started = PETSC_FALSE;
413:   for (done=0; !done; ) {
414:     PetscMPIInt flag;
415:     MPI_Status  status;
416:     MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);
417:     if (flag) {                 /* incoming message */
418:       PetscMPIInt *recvrank,k;
419:       void        *buf;
420:       PetscSegBufferGet(segrank,1,&recvrank);
421:       PetscSegBufferGet(segdata,count,&buf);
422:       *recvrank = status.MPI_SOURCE;
423:       MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);
424:       PetscSegBufferGet(segreq,ntags,&req);
425:       for (k=0; k<ntags; k++) req[k] = MPI_REQUEST_NULL;
426:       (*recv)(comm,tags,status.MPI_SOURCE,buf,req,ctx);
427:       nrecvs++;
428:     }
429:     if (!barrier_started) {
430:       PetscMPIInt sent,nsends;
431:       PetscMPIIntCast(nto,&nsends);
432:       MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);
433:       if (sent) {
434: #if defined(PETSC_HAVE_MPI_IBARRIER)
435:         MPI_Ibarrier(comm,&barrier);
436: #elif defined(PETSC_HAVE_MPIX_IBARRIER)
437:         MPIX_Ibarrier(comm,&barrier);
438: #endif
439:         barrier_started = PETSC_TRUE;
440:       }
441:     } else {
442:       MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);
443:     }
444:   }
445:   *nfrom = nrecvs;
446:   PetscSegBufferExtractAlloc(segrank,fromranks);
447:   PetscSegBufferDestroy(&segrank);
448:   PetscSegBufferExtractAlloc(segdata,fromdata);
449:   PetscSegBufferDestroy(&segdata);
450:   *toreqs = usendreqs;
451:   PetscSegBufferExtractAlloc(segreq,fromreqs);
452:   PetscSegBufferDestroy(&segreq);
453:   PetscFree(sendreqs);
454:   PetscFree(tags);
455:   PetscCommDestroy(&comm);
456:   return(0);
457: }
458: #endif

460: /*@C
461:    PetscCommBuildTwoSidedF - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous

463:    Collective

465:    Input Arguments:
466: +  comm - communicator
467: .  count - number of entries to send/receive in initial rendezvous (must match on all ranks)
468: .  dtype - datatype to send/receive from each rank (must match on all ranks)
469: .  nto - number of ranks to send data to
470: .  toranks - ranks to send to (array of length nto)
471: .  todata - data to send to each rank (packed)
472: .  ntags - number of tags needed by send/recv callbacks
473: .  send - callback invoked on sending process when ready to send primary payload
474: .  recv - callback invoked on receiving process after delivery of rendezvous message
475: -  ctx - context for callbacks

477:    Output Arguments:
478: +  nfrom - number of ranks receiving messages from
479: .  fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
480: -  fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())

482:    Level: developer

484:    Notes:
485:    This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
486:    PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other data.

488:    Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.

490:    References:
491: .  1. - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in
492:    Scalable communication protocols for dynamic sparse data exchange, 2010.

494: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedFReq(), PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
495: @*/
496: PetscErrorCode PetscCommBuildTwoSidedF(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,
497:                                        PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
498:                                        PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
499: {
501:   MPI_Request    *toreqs,*fromreqs;

504:   PetscCommBuildTwoSidedFReq(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata,ntags,&toreqs,&fromreqs,send,recv,ctx);
505:   MPI_Waitall(nto*ntags,toreqs,MPI_STATUSES_IGNORE);
506:   MPI_Waitall(*nfrom*ntags,fromreqs,MPI_STATUSES_IGNORE);
507:   PetscFree(toreqs);
508:   PetscFree(fromreqs);
509:   return(0);
510: }

512: /*@C
513:    PetscCommBuildTwoSidedFReq - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous, returns requests

515:    Collective

517:    Input Arguments:
518: +  comm - communicator
519: .  count - number of entries to send/receive in initial rendezvous (must match on all ranks)
520: .  dtype - datatype to send/receive from each rank (must match on all ranks)
521: .  nto - number of ranks to send data to
522: .  toranks - ranks to send to (array of length nto)
523: .  todata - data to send to each rank (packed)
524: .  ntags - number of tags needed by send/recv callbacks
525: .  send - callback invoked on sending process when ready to send primary payload
526: .  recv - callback invoked on receiving process after delivery of rendezvous message
527: -  ctx - context for callbacks

529:    Output Arguments:
530: +  nfrom - number of ranks receiving messages from
531: .  fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
532: .  fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())
533: .  toreqs - array of nto*ntags sender requests (caller must wait on these, then PetscFree())
534: -  fromreqs - array of nfrom*ntags receiver requests (caller must wait on these, then PetscFree())

536:    Level: developer

538:    Notes:
539:    This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
540:    PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other data.

542:    Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.

544:    References:
545: .  1. - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in
546:    Scalable communication protocols for dynamic sparse data exchange, 2010.

548: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedF(), PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
549: @*/
550: PetscErrorCode PetscCommBuildTwoSidedFReq(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,
551:                                           PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs,
552:                                           PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
553:                                           PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
554: {
555:   PetscErrorCode         ierr,(*f)(MPI_Comm,PetscMPIInt,MPI_Datatype,PetscMPIInt,const PetscMPIInt[],const void*,
556:                                    PetscMPIInt*,PetscMPIInt**,void*,PetscMPIInt,MPI_Request**,MPI_Request**,
557:                                    PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
558:                                    PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx);
559:   PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET;
560:   PetscMPIInt i,size;

563:   PetscSysInitializePackage();
564:   MPI_Comm_size(comm,&size);
565:   for (i=0; i<nto; i++) {
566:     if (toranks[i] < 0 || size <= toranks[i]) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"toranks[%d] %d not in comm size %d",i,toranks[i],size);
567:   }
568:   PetscLogEventSync(PETSC_BuildTwoSidedF,comm);
569:   PetscLogEventBegin(PETSC_BuildTwoSidedF,0,0,0,0);
570:   PetscCommBuildTwoSidedGetType(comm,&buildtype);
571:   switch (buildtype) {
572:   case PETSC_BUILDTWOSIDED_IBARRIER:
573: #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER)
574:     f = PetscCommBuildTwoSidedFReq_Ibarrier;
575: #else
576:     SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)");
577: #endif
578:     break;
579:   case PETSC_BUILDTWOSIDED_ALLREDUCE:
580:   case PETSC_BUILDTWOSIDED_REDSCATTER:
581:     f = PetscCommBuildTwoSidedFReq_Reference;
582:     break;
583:   default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication");
584:   }
585:   (*f)(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata,ntags,toreqs,fromreqs,send,recv,ctx);
586:   PetscLogEventEnd(PETSC_BuildTwoSidedF,0,0,0,0);
587:   return(0);
588: }