Actual source code: mpits.c

petsc-3.9.4 2018-09-11
Report Typos and Errors
  1:  #include <petscsys.h>

  3: PetscLogEvent PETSC_BuildTwoSided,PETSC_BuildTwoSidedF;

  5: const char *const PetscBuildTwoSidedTypes[] = {
  6:   "ALLREDUCE",
  7:   "IBARRIER",
  8:   "REDSCATTER",
  9:   "PetscBuildTwoSidedType",
 10:   "PETSC_BUILDTWOSIDED_",
 11:   0
 12: };

 14: static PetscBuildTwoSidedType _twosided_type = PETSC_BUILDTWOSIDED_NOTSET;

 16: /*@
 17:    PetscCommBuildTwoSidedSetType - set algorithm to use when building two-sided communication

 19:    Logically Collective

 21:    Input Arguments:
 22: +  comm - PETSC_COMM_WORLD
 23: -  twosided - algorithm to use in subsequent calls to PetscCommBuildTwoSided()

 25:    Level: developer

 27:    Note:
 28:    This option is currently global, but could be made per-communicator.

 30: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedGetType()
 31: @*/
 32: PetscErrorCode PetscCommBuildTwoSidedSetType(MPI_Comm comm,PetscBuildTwoSidedType twosided)
 33: {
 35: #if defined(PETSC_USE_DEBUG)
 37:     PetscMPIInt ierr;
 38:     PetscMPIInt b1[2],b2[2];
 39:     b1[0] = -(PetscMPIInt)twosided;
 40:     b1[1] = (PetscMPIInt)twosided;
 41:     MPIU_Allreduce(b1,b2,2,MPI_INT,MPI_MAX,comm);
 42:     if (-b2[0] != b2[1]) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"Enum value must be same on all processes");
 43:   }
 44: #endif
 45:   _twosided_type = twosided;
 46:   return(0);
 47: }

 49: /*@
 50:    PetscCommBuildTwoSidedGetType - set algorithm to use when building two-sided communication

 52:    Logically Collective

 54:    Output Arguments:
 55: +  comm - communicator on which to query algorithm
 56: -  twosided - algorithm to use for PetscCommBuildTwoSided()

 58:    Level: developer

 60: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedSetType()
 61: @*/
 62: PetscErrorCode PetscCommBuildTwoSidedGetType(MPI_Comm comm,PetscBuildTwoSidedType *twosided)
 63: {

 67:   *twosided = PETSC_BUILDTWOSIDED_NOTSET;
 68:   if (_twosided_type == PETSC_BUILDTWOSIDED_NOTSET) {
 69: #if defined(PETSC_HAVE_MPI_IBARRIER)
 70: #  if defined(PETSC_HAVE_MPICH_CH3_SOCK) && !defined(PETSC_HAVE_MPICH_CH3_SOCK_FIXED_NBC_PROGRESS)
 71:     /* Deadlock in Ibarrier: http://trac.mpich.org/projects/mpich/ticket/1785 */
 72:     _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE;
 73: #  else
 74:     _twosided_type = PETSC_BUILDTWOSIDED_IBARRIER;
 75: #  endif
 76: #else
 77:     _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE;
 78: #endif
 79:     PetscOptionsGetEnum(NULL,NULL,"-build_twosided",PetscBuildTwoSidedTypes,(PetscEnum*)&_twosided_type,NULL);
 80:   }
 81:   *twosided = _twosided_type;
 82:   return(0);
 83: }

 85: #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER)

 87: static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
 88: {
 90:   PetscMPIInt    nrecvs,tag,done,i;
 91:   MPI_Aint       lb,unitbytes;
 92:   char           *tdata;
 93:   MPI_Request    *sendreqs,barrier;
 94:   PetscSegBuffer segrank,segdata;
 95:   PetscBool      barrier_started;

 98:   PetscCommDuplicate(comm,&comm,&tag);
 99:   MPI_Type_get_extent(dtype,&lb,&unitbytes);
100:   if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
101:   tdata = (char*)todata;
102:   PetscMalloc1(nto,&sendreqs);
103:   for (i=0; i<nto; i++) {
104:     MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
105:   }
106:   PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);
107:   PetscSegBufferCreate(unitbytes,4*count,&segdata);

109:   nrecvs  = 0;
110:   barrier = MPI_REQUEST_NULL;
111:   /* MPICH-3.2 sometimes does not create a request in some "optimized" cases.  This is arguably a standard violation,
112:    * but we need to work around it. */
113:   barrier_started = PETSC_FALSE;
114:   for (done=0; !done; ) {
115:     PetscMPIInt flag;
116:     MPI_Status  status;
117:     MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);
118:     if (flag) {                 /* incoming message */
119:       PetscMPIInt *recvrank;
120:       void        *buf;
121:       PetscSegBufferGet(segrank,1,&recvrank);
122:       PetscSegBufferGet(segdata,count,&buf);
123:       *recvrank = status.MPI_SOURCE;
124:       MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);
125:       nrecvs++;
126:     }
127:     if (!barrier_started) {
128:       PetscMPIInt sent,nsends;
129:       PetscMPIIntCast(nto,&nsends);
130:       MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);
131:       if (sent) {
132: #if defined(PETSC_HAVE_MPI_IBARRIER)
133:         MPI_Ibarrier(comm,&barrier);
134: #elif defined(PETSC_HAVE_MPIX_IBARRIER)
135:         MPIX_Ibarrier(comm,&barrier);
136: #endif
137:         barrier_started = PETSC_TRUE;
138:         PetscFree(sendreqs);
139:       }
140:     } else {
141:       MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);
142:     }
143:   }
144:   *nfrom = nrecvs;
145:   PetscSegBufferExtractAlloc(segrank,fromranks);
146:   PetscSegBufferDestroy(&segrank);
147:   PetscSegBufferExtractAlloc(segdata,fromdata);
148:   PetscSegBufferDestroy(&segdata);
149:   PetscCommDestroy(&comm);
150:   return(0);
151: }
152: #endif

154: static PetscErrorCode PetscCommBuildTwoSided_Allreduce(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
155: {
157:   PetscMPIInt    size,*iflags,nrecvs,tag,*franks,i;
158:   MPI_Aint       lb,unitbytes;
159:   char           *tdata,*fdata;
160:   MPI_Request    *reqs,*sendreqs;
161:   MPI_Status     *statuses;

164:   MPI_Comm_size(comm,&size);
165:   PetscCalloc1(size,&iflags);
166:   for (i=0; i<nto; i++) iflags[toranks[i]] = 1;
167:   PetscGatherNumberOfMessages(comm,iflags,NULL,&nrecvs);
168:   PetscFree(iflags);

170:   PetscCommDuplicate(comm,&comm,&tag);
171:   MPI_Type_get_extent(dtype,&lb,&unitbytes);
172:   if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
173:   PetscMalloc(nrecvs*count*unitbytes,&fdata);
174:   tdata    = (char*)todata;
175:   PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);
176:   sendreqs = reqs + nrecvs;
177:   for (i=0; i<nrecvs; i++) {
178:     MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);
179:   }
180:   for (i=0; i<nto; i++) {
181:     MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
182:   }
183:   MPI_Waitall(nto+nrecvs,reqs,statuses);
184:   PetscMalloc1(nrecvs,&franks);
185:   for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE;
186:   PetscFree2(reqs,statuses);
187:   PetscCommDestroy(&comm);

189:   *nfrom            = nrecvs;
190:   *fromranks        = franks;
191:   *(void**)fromdata = fdata;
192:   return(0);
193: }

195: #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK)
196: static PetscErrorCode PetscCommBuildTwoSided_RedScatter(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
197: {
199:   PetscMPIInt    size,*iflags,nrecvs,tag,*franks,i;
200:   MPI_Aint       lb,unitbytes;
201:   char           *tdata,*fdata;
202:   MPI_Request    *reqs,*sendreqs;
203:   MPI_Status     *statuses;

206:   MPI_Comm_size(comm,&size);
207:   PetscMalloc1(size,&iflags);
208:   PetscMemzero(iflags,size*sizeof(*iflags));
209:   for (i=0; i<nto; i++) iflags[toranks[i]] = 1;
210:   MPI_Reduce_scatter_block(iflags,&nrecvs,1,MPI_INT,MPI_SUM,comm);
211:   PetscFree(iflags);

213:   PetscCommDuplicate(comm,&comm,&tag);
214:   MPI_Type_get_extent(dtype,&lb,&unitbytes);
215:   if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
216:   PetscMalloc(nrecvs*count*unitbytes,&fdata);
217:   tdata    = (char*)todata;
218:   PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);
219:   sendreqs = reqs + nrecvs;
220:   for (i=0; i<nrecvs; i++) {
221:     MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);
222:   }
223:   for (i=0; i<nto; i++) {
224:     MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
225:   }
226:   MPI_Waitall(nto+nrecvs,reqs,statuses);
227:   PetscMalloc1(nrecvs,&franks);
228:   for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE;
229:   PetscFree2(reqs,statuses);
230:   PetscCommDestroy(&comm);

232:   *nfrom            = nrecvs;
233:   *fromranks        = franks;
234:   *(void**)fromdata = fdata;
235:   return(0);
236: }
237: #endif

239: /*@C
240:    PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths)

242:    Collective on MPI_Comm

244:    Input Arguments:
245: +  comm - communicator
246: .  count - number of entries to send/receive (must match on all ranks)
247: .  dtype - datatype to send/receive from each rank (must match on all ranks)
248: .  nto - number of ranks to send data to
249: .  toranks - ranks to send to (array of length nto)
250: -  todata - data to send to each rank (packed)

252:    Output Arguments:
253: +  nfrom - number of ranks receiving messages from
254: .  fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
255: -  fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())

257:    Level: developer

259:    Options Database Keys:
260: .  -build_twosided <allreduce|ibarrier|redscatter> - algorithm to set up two-sided communication

262:    Notes:
263:    This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
264:    PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other constant-size data.

266:    Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.

268:    References:
269: .  1. - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in
270:    Scalable communication protocols for dynamic sparse data exchange, 2010.

272: .seealso: PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
273: @*/
274: PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
275: {
276:   PetscErrorCode         ierr;
277:   PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET;

280:   PetscSysInitializePackage();
281:   PetscLogEventBegin(PETSC_BuildTwoSided,0,0,0,0);
282:   PetscCommBuildTwoSidedGetType(comm,&buildtype);
283:   switch (buildtype) {
284:   case PETSC_BUILDTWOSIDED_IBARRIER:
285: #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER)
286:     PetscCommBuildTwoSided_Ibarrier(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);
287: #else
288:     SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)");
289: #endif
290:     break;
291:   case PETSC_BUILDTWOSIDED_ALLREDUCE:
292:     PetscCommBuildTwoSided_Allreduce(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);
293:     break;
294:   case PETSC_BUILDTWOSIDED_REDSCATTER:
295: #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK)
296:     PetscCommBuildTwoSided_RedScatter(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);
297: #else
298:     SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Reduce_scatter_block (part of MPI-2.2)");
299: #endif
300:     break;
301:   default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication");
302:   }
303:   PetscLogEventEnd(PETSC_BuildTwoSided,0,0,0,0);
304:   return(0);
305: }

307: static PetscErrorCode PetscCommBuildTwoSidedFReq_Reference(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,
308:                                                            PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs,
309:                                                            PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
310:                                                            PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
311: {
313:   PetscMPIInt i,*tag;
314:   MPI_Aint    lb,unitbytes;
315:   MPI_Request *sendreq,*recvreq;

318:   PetscMalloc1(ntags,&tag);
319:   if (ntags > 0) {
320:     PetscCommDuplicate(comm,&comm,&tag[0]);
321:   }
322:   for (i=1; i<ntags; i++) {
323:     PetscCommGetNewTag(comm,&tag[i]);
324:   }

326:   /* Perform complete initial rendezvous */
327:   PetscCommBuildTwoSided(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);

329:   PetscMalloc1(nto*ntags,&sendreq);
330:   PetscMalloc1(*nfrom*ntags,&recvreq);

332:   MPI_Type_get_extent(dtype,&lb,&unitbytes);
333:   if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
334:   for (i=0; i<nto; i++) {
335:     PetscMPIInt k;
336:     for (k=0; k<ntags; k++) sendreq[i*ntags+k] = MPI_REQUEST_NULL;
337:     (*send)(comm,tag,i,toranks[i],((char*)todata)+count*unitbytes*i,sendreq+i*ntags,ctx);
338:   }
339:   for (i=0; i<*nfrom; i++) {
340:     void *header = (*(char**)fromdata) + count*unitbytes*i;
341:     PetscMPIInt k;
342:     for (k=0; k<ntags; k++) recvreq[i*ntags+k] = MPI_REQUEST_NULL;
343:     (*recv)(comm,tag,(*fromranks)[i],header,recvreq+i*ntags,ctx);
344:   }
345:   PetscFree(tag);
346:   PetscCommDestroy(&comm);
347:   *toreqs = sendreq;
348:   *fromreqs = recvreq;
349:   return(0);
350: }

352: #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER)

354: static PetscErrorCode PetscCommBuildTwoSidedFReq_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,
355:                                                           PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs,
356:                                                           PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
357:                                                           PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
358: {
360:   PetscMPIInt    nrecvs,tag,*tags,done,i;
361:   MPI_Aint       lb,unitbytes;
362:   char           *tdata;
363:   MPI_Request    *sendreqs,*usendreqs,*req,barrier;
364:   PetscSegBuffer segrank,segdata,segreq;
365:   PetscBool      barrier_started;

368:   PetscCommDuplicate(comm,&comm,&tag);
369:   PetscMalloc1(ntags,&tags);
370:   for (i=0; i<ntags; i++) {
371:     PetscCommGetNewTag(comm,&tags[i]);
372:   }
373:   MPI_Type_get_extent(dtype,&lb,&unitbytes);
374:   if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
375:   tdata = (char*)todata;
376:   PetscMalloc1(nto,&sendreqs);
377:   PetscMalloc1(nto*ntags,&usendreqs);
378:   /* Post synchronous sends */
379:   for (i=0; i<nto; i++) {
380:     MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
381:   }
382:   /* Post actual payloads.  These are typically larger messages.  Hopefully sending these later does not slow down the
383:    * synchronous messages above. */
384:   for (i=0; i<nto; i++) {
385:     PetscMPIInt k;
386:     for (k=0; k<ntags; k++) usendreqs[i*ntags+k] = MPI_REQUEST_NULL;
387:     (*send)(comm,tags,i,toranks[i],tdata+count*unitbytes*i,usendreqs+i*ntags,ctx);
388:   }

390:   PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);
391:   PetscSegBufferCreate(unitbytes,4*count,&segdata);
392:   PetscSegBufferCreate(sizeof(MPI_Request),4,&segreq);

394:   nrecvs  = 0;
395:   barrier = MPI_REQUEST_NULL;
396:   /* MPICH-3.2 sometimes does not create a request in some "optimized" cases.  This is arguably a standard violation,
397:    * but we need to work around it. */
398:   barrier_started = PETSC_FALSE;
399:   for (done=0; !done; ) {
400:     PetscMPIInt flag;
401:     MPI_Status  status;
402:     MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);
403:     if (flag) {                 /* incoming message */
404:       PetscMPIInt *recvrank,k;
405:       void        *buf;
406:       PetscSegBufferGet(segrank,1,&recvrank);
407:       PetscSegBufferGet(segdata,count,&buf);
408:       *recvrank = status.MPI_SOURCE;
409:       MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);
410:       PetscSegBufferGet(segreq,ntags,&req);
411:       for (k=0; k<ntags; k++) req[k] = MPI_REQUEST_NULL;
412:       (*recv)(comm,tags,status.MPI_SOURCE,buf,req,ctx);
413:       nrecvs++;
414:     }
415:     if (!barrier_started) {
416:       PetscMPIInt sent,nsends;
417:       PetscMPIIntCast(nto,&nsends);
418:       MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);
419:       if (sent) {
420: #if defined(PETSC_HAVE_MPI_IBARRIER)
421:         MPI_Ibarrier(comm,&barrier);
422: #elif defined(PETSC_HAVE_MPIX_IBARRIER)
423:         MPIX_Ibarrier(comm,&barrier);
424: #endif
425:         barrier_started = PETSC_TRUE;
426:       }
427:     } else {
428:       MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);
429:     }
430:   }
431:   *nfrom = nrecvs;
432:   PetscSegBufferExtractAlloc(segrank,fromranks);
433:   PetscSegBufferDestroy(&segrank);
434:   PetscSegBufferExtractAlloc(segdata,fromdata);
435:   PetscSegBufferDestroy(&segdata);
436:   *toreqs = usendreqs;
437:   PetscSegBufferExtractAlloc(segreq,fromreqs);
438:   PetscSegBufferDestroy(&segreq);
439:   PetscFree(sendreqs);
440:   PetscFree(tags);
441:   PetscCommDestroy(&comm);
442:   return(0);
443: }
444: #endif

446: /*@C
447:    PetscCommBuildTwoSidedF - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous

449:    Collective on MPI_Comm

451:    Input Arguments:
452: +  comm - communicator
453: .  count - number of entries to send/receive in initial rendezvous (must match on all ranks)
454: .  dtype - datatype to send/receive from each rank (must match on all ranks)
455: .  nto - number of ranks to send data to
456: .  toranks - ranks to send to (array of length nto)
457: .  todata - data to send to each rank (packed)
458: .  ntags - number of tags needed by send/recv callbacks
459: .  send - callback invoked on sending process when ready to send primary payload
460: .  recv - callback invoked on receiving process after delivery of rendezvous message
461: -  ctx - context for callbacks

463:    Output Arguments:
464: +  nfrom - number of ranks receiving messages from
465: .  fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
466: -  fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())

468:    Level: developer

470:    Notes:
471:    This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
472:    PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other data.

474:    Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.

476:    References:
477: .  1. - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in
478:    Scalable communication protocols for dynamic sparse data exchange, 2010.

480: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedFReq(), PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
481: @*/
482: PetscErrorCode PetscCommBuildTwoSidedF(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,
483:                                        PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
484:                                        PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
485: {
487:   MPI_Request    *toreqs,*fromreqs;

490:   PetscCommBuildTwoSidedFReq(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata,ntags,&toreqs,&fromreqs,send,recv,ctx);
491:   MPI_Waitall(nto*ntags,toreqs,MPI_STATUSES_IGNORE);
492:   MPI_Waitall(*nfrom*ntags,fromreqs,MPI_STATUSES_IGNORE);
493:   PetscFree(toreqs);
494:   PetscFree(fromreqs);
495:   return(0);
496: }

498: /*@C
499:    PetscCommBuildTwoSidedFReq - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous, returns requests

501:    Collective on MPI_Comm

503:    Input Arguments:
504: +  comm - communicator
505: .  count - number of entries to send/receive in initial rendezvous (must match on all ranks)
506: .  dtype - datatype to send/receive from each rank (must match on all ranks)
507: .  nto - number of ranks to send data to
508: .  toranks - ranks to send to (array of length nto)
509: .  todata - data to send to each rank (packed)
510: .  ntags - number of tags needed by send/recv callbacks
511: .  send - callback invoked on sending process when ready to send primary payload
512: .  recv - callback invoked on receiving process after delivery of rendezvous message
513: -  ctx - context for callbacks

515:    Output Arguments:
516: +  nfrom - number of ranks receiving messages from
517: .  fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
518: .  fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())
519: .  toreqs - array of nto*ntags sender requests (caller must wait on these, then PetscFree())
520: -  fromreqs - array of nfrom*ntags receiver requests (caller must wait on these, then PetscFree())

522:    Level: developer

524:    Notes:
525:    This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
526:    PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other data.

528:    Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.

530:    References:
531: .  1. - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in
532:    Scalable communication protocols for dynamic sparse data exchange, 2010.

534: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedF(), PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
535: @*/
536: PetscErrorCode PetscCommBuildTwoSidedFReq(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,
537:                                           PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs,
538:                                           PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
539:                                           PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
540: {
541:   PetscErrorCode         ierr,(*f)(MPI_Comm,PetscMPIInt,MPI_Datatype,PetscMPIInt,const PetscMPIInt[],const void*,
542:                                    PetscMPIInt*,PetscMPIInt**,void*,PetscMPIInt,MPI_Request**,MPI_Request**,
543:                                    PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
544:                                    PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx);
545:   PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET;
546:   PetscMPIInt i,size;

549:   PetscSysInitializePackage();
550:   MPI_Comm_size(comm,&size);
551:   for (i=0; i<nto; i++) {
552:     if (toranks[i] < 0 || size <= toranks[i]) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"toranks[%d] %d not in comm size %d",i,toranks[i],size);
553:   }
554:   PetscLogEventBegin(PETSC_BuildTwoSidedF,0,0,0,0);
555:   PetscCommBuildTwoSidedGetType(comm,&buildtype);
556:   switch (buildtype) {
557:   case PETSC_BUILDTWOSIDED_IBARRIER:
558: #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER)
559:     f = PetscCommBuildTwoSidedFReq_Ibarrier;
560: #else
561:     SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)");
562: #endif
563:     break;
564:   case PETSC_BUILDTWOSIDED_ALLREDUCE:
565:   case PETSC_BUILDTWOSIDED_REDSCATTER:
566:     f = PetscCommBuildTwoSidedFReq_Reference;
567:     break;
568:   default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication");
569:   }
570:   (*f)(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata,ntags,toreqs,fromreqs,send,recv,ctx);
571:   PetscLogEventEnd(PETSC_BuildTwoSidedF,0,0,0,0);
572:   return(0);
573: }