Actual source code: mpimesg.c


  2: #include <petscsys.h>
  3: #include <petsc/private/mpiutils.h>

  5: /*@C
  6:   PetscGatherNumberOfMessages -  Computes the number of messages a node expects to receive

  8:   Collective

 10:   Input Parameters:
 11: + comm     - Communicator
 12: . iflags   - an array of integers of length sizeof(comm). A '1' in ilengths[i] represent a
 13:              message from current node to ith node. Optionally NULL
 14: - ilengths - Non zero ilengths[i] represent a message to i of length ilengths[i].
 15:              Optionally NULL.

 17:   Output Parameters:
 18: . nrecvs    - number of messages received

 20:   Level: developer

 22:   Notes:
 23:   With this info, the correct message lengths can be determined using
 24:   PetscGatherMessageLengths()

 26:   Either iflags or ilengths should be provided.  If iflags is not
 27:   provided (NULL) it can be computed from ilengths. If iflags is
 28:   provided, ilengths is not required.

 30: .seealso: PetscGatherMessageLengths()
 31: @*/
 32: PetscErrorCode  PetscGatherNumberOfMessages(MPI_Comm comm,const PetscMPIInt iflags[],const PetscMPIInt ilengths[],PetscMPIInt *nrecvs)
 33: {
 34:   PetscMPIInt    size,rank,*recv_buf,i,*iflags_local = NULL,*iflags_localm = NULL;

 36:   MPI_Comm_size(comm,&size);
 37:   MPI_Comm_rank(comm,&rank);

 39:   PetscMalloc2(size,&recv_buf,size,&iflags_localm);

 41:   /* If iflags not provided, compute iflags from ilengths */
 42:   if (!iflags) {
 44:     iflags_local = iflags_localm;
 45:     for (i=0; i<size; i++) {
 46:       if (ilengths[i]) iflags_local[i] = 1;
 47:       else iflags_local[i] = 0;
 48:     }
 49:   } else iflags_local = (PetscMPIInt*) iflags;

 51:   /* Post an allreduce to determine the numer of messages the current node will receive */
 52:   MPIU_Allreduce(iflags_local,recv_buf,size,MPI_INT,MPI_SUM,comm);
 53:   *nrecvs = recv_buf[rank];

 55:   PetscFree2(recv_buf,iflags_localm);
 56:   return 0;
 57: }

 59: /*@C
 60:   PetscGatherMessageLengths - Computes info about messages that a MPI-node will receive,
 61:   including (from-id,length) pairs for each message.

 63:   Collective

 65:   Input Parameters:
 66: + comm      - Communicator
 67: . nsends    - number of messages that are to be sent.
 68: . nrecvs    - number of messages being received
 69: - ilengths  - an array of integers of length sizeof(comm)
 70:               a non zero ilengths[i] represent a message to i of length ilengths[i]

 72:   Output Parameters:
 73: + onodes    - list of node-ids from which messages are expected
 74: - olengths  - corresponding message lengths

 76:   Level: developer

 78:   Notes:
 79:   With this info, the correct MPI_Irecv() can be posted with the correct
 80:   from-id, with a buffer with the right amount of memory required.

 82:   The calling function deallocates the memory in onodes and olengths

 84:   To determine nrecvs, one can use PetscGatherNumberOfMessages()

 86: .seealso: PetscGatherNumberOfMessages()
 87: @*/
 88: PetscErrorCode  PetscGatherMessageLengths(MPI_Comm comm,PetscMPIInt nsends,PetscMPIInt nrecvs,const PetscMPIInt ilengths[],PetscMPIInt **onodes,PetscMPIInt **olengths)
 89: {
 90:   PetscMPIInt    size,rank,tag,i,j;
 91:   MPI_Request    *s_waits  = NULL,*r_waits = NULL;
 92:   MPI_Status     *w_status = NULL;

 94:   MPI_Comm_size(comm,&size);
 95:   MPI_Comm_rank(comm,&rank);
 96:   PetscCommGetNewTag(comm,&tag);

 98:   /* cannot use PetscMalloc3() here because in the call to MPI_Waitall() they MUST be contiguous */
 99:   PetscMalloc2(nrecvs+nsends,&r_waits,nrecvs+nsends,&w_status);
100:   s_waits = r_waits+nrecvs;

102:   /* Post the Irecv to get the message length-info */
103:   PetscMalloc1(nrecvs,olengths);
104:   for (i=0; i<nrecvs; i++) {
105:     MPI_Irecv((*olengths)+i,1,MPI_INT,MPI_ANY_SOURCE,tag,comm,r_waits+i);
106:   }

108:   /* Post the Isends with the message length-info */
109:   for (i=0,j=0; i<size; ++i) {
110:     if (ilengths[i]) {
111:       MPI_Isend((void*)(ilengths+i),1,MPI_INT,i,tag,comm,s_waits+j);
112:       j++;
113:     }
114:   }

116:   /* Post waits on sends and receives */
117:   if (nrecvs+nsends) MPI_Waitall(nrecvs+nsends,r_waits,w_status);

119:   /* Pack up the received data */
120:   PetscMalloc1(nrecvs,onodes);
121:   for (i=0; i<nrecvs; ++i) {
122:     (*onodes)[i] = w_status[i].MPI_SOURCE;
123: #if defined(PETSC_HAVE_OMPI_MAJOR_VERSION)
124:     /* This line is a workaround for a bug in OpenMPI-2.1.1 distributed by Ubuntu-18.04.2 LTS.
125:        It happens in self-to-self MPI_Send/Recv using MPI_ANY_SOURCE for message matching. OpenMPI
126:        does not put correct value in recv buffer. See also
127:        https://lists.mcs.anl.gov/pipermail/petsc-dev/2019-July/024803.html
128:        https://www.mail-archive.com/users@lists.open-mpi.org//msg33383.html
129:      */
130:     if (w_status[i].MPI_SOURCE == rank) (*olengths)[i] = ilengths[rank];
131: #endif
132:   }
133:   PetscFree2(r_waits,w_status);
134:   return 0;
135: }

137: /* Same as PetscGatherNumberOfMessages(), except using PetscInt for ilengths[] */
138: PetscErrorCode  PetscGatherNumberOfMessages_Private(MPI_Comm comm,const PetscMPIInt iflags[],const PetscInt ilengths[],PetscMPIInt *nrecvs)
139: {
140:   PetscMPIInt    size,rank,*recv_buf,i,*iflags_local = NULL,*iflags_localm = NULL;

142:   MPI_Comm_size(comm,&size);
143:   MPI_Comm_rank(comm,&rank);

145:   PetscMalloc2(size,&recv_buf,size,&iflags_localm);

147:   /* If iflags not provided, compute iflags from ilengths */
148:   if (!iflags) {
150:     iflags_local = iflags_localm;
151:     for (i=0; i<size; i++) {
152:       if (ilengths[i]) iflags_local[i] = 1;
153:       else iflags_local[i] = 0;
154:     }
155:   } else iflags_local = (PetscMPIInt*) iflags;

157:   /* Post an allreduce to determine the numer of messages the current node will receive */
158:   MPIU_Allreduce(iflags_local,recv_buf,size,MPI_INT,MPI_SUM,comm);
159:   *nrecvs = recv_buf[rank];

161:   PetscFree2(recv_buf,iflags_localm);
162:   return 0;
163: }

165: /* Same as PetscGatherMessageLengths(), except using PetscInt for message lengths */
166: PetscErrorCode  PetscGatherMessageLengths_Private(MPI_Comm comm,PetscMPIInt nsends,PetscMPIInt nrecvs,const PetscInt ilengths[],PetscMPIInt **onodes,PetscInt **olengths)
167: {
168:   PetscMPIInt    size,rank,tag,i,j;
169:   MPI_Request    *s_waits  = NULL,*r_waits = NULL;
170:   MPI_Status     *w_status = NULL;

172:   MPI_Comm_size(comm,&size);
173:   MPI_Comm_rank(comm,&rank);
174:   PetscCommGetNewTag(comm,&tag);

176:   /* cannot use PetscMalloc3() here because in the call to MPI_Waitall() they MUST be contiguous */
177:   PetscMalloc2(nrecvs+nsends,&r_waits,nrecvs+nsends,&w_status);
178:   s_waits = r_waits+nrecvs;

180:   /* Post the Irecv to get the message length-info */
181:   PetscMalloc1(nrecvs,olengths);
182:   for (i=0; i<nrecvs; i++) {
183:     MPI_Irecv((*olengths)+i,1,MPIU_INT,MPI_ANY_SOURCE,tag,comm,r_waits+i);
184:   }

186:   /* Post the Isends with the message length-info */
187:   for (i=0,j=0; i<size; ++i) {
188:     if (ilengths[i]) {
189:       MPI_Isend((void*)(ilengths+i),1,MPIU_INT,i,tag,comm,s_waits+j);
190:       j++;
191:     }
192:   }

194:   /* Post waits on sends and receives */
195:   if (nrecvs+nsends) MPI_Waitall(nrecvs+nsends,r_waits,w_status);

197:   /* Pack up the received data */
198:   PetscMalloc1(nrecvs,onodes);
199:   for (i=0; i<nrecvs; ++i) {
200:     (*onodes)[i] = w_status[i].MPI_SOURCE;
201:     if (w_status[i].MPI_SOURCE == rank) (*olengths)[i] = ilengths[rank]; /* See comments in PetscGatherMessageLengths */
202:   }
203:   PetscFree2(r_waits,w_status);
204:   return 0;
205: }

207: /*@C
208:   PetscGatherMessageLengths2 - Computes info about messages that a MPI-node will receive,
209:   including (from-id,length) pairs for each message. Same functionality as PetscGatherMessageLengths()
210:   except it takes TWO ilenths and output TWO olengths.

212:   Collective

214:   Input Parameters:
215: + comm      - Communicator
216: . nsends    - number of messages that are to be sent.
217: . nrecvs    - number of messages being received
218: . ilengths1 - first array of integers of length sizeof(comm)
219: - ilengths2 - second array of integers of length sizeof(comm)

221:   Output Parameters:
222: + onodes    - list of node-ids from which messages are expected
223: . olengths1 - first corresponding message lengths
224: - olengths2 - second  message lengths

226:   Level: developer

228:   Notes:
229:   With this info, the correct MPI_Irecv() can be posted with the correct
230:   from-id, with a buffer with the right amount of memory required.

232:   The calling function deallocates the memory in onodes and olengths

234:   To determine nrecvs, one can use PetscGatherNumberOfMessages()

236: .seealso: PetscGatherMessageLengths() and PetscGatherNumberOfMessages()
237: @*/
238: PetscErrorCode  PetscGatherMessageLengths2(MPI_Comm comm,PetscMPIInt nsends,PetscMPIInt nrecvs,const PetscMPIInt ilengths1[],const PetscMPIInt ilengths2[],PetscMPIInt **onodes,PetscMPIInt **olengths1,PetscMPIInt **olengths2)
239: {
240:   PetscMPIInt    size,tag,i,j,*buf_s = NULL,*buf_r = NULL,*buf_j = NULL;
241:   MPI_Request    *s_waits  = NULL,*r_waits = NULL;
242:   MPI_Status     *w_status = NULL;

244:   MPI_Comm_size(comm,&size);
245:   PetscCommGetNewTag(comm,&tag);

247:   /* cannot use PetscMalloc5() because r_waits and s_waits must be contiguous for the call to MPI_Waitall() */
248:   PetscMalloc4(nrecvs+nsends,&r_waits,2*nrecvs,&buf_r,2*nsends,&buf_s,nrecvs+nsends,&w_status);
249:   s_waits = r_waits + nrecvs;

251:   /* Post the Irecv to get the message length-info */
252:   PetscMalloc1(nrecvs+1,olengths1);
253:   PetscMalloc1(nrecvs+1,olengths2);
254:   for (i=0; i<nrecvs; i++) {
255:     buf_j = buf_r + (2*i);
256:     MPI_Irecv(buf_j,2,MPI_INT,MPI_ANY_SOURCE,tag,comm,r_waits+i);
257:   }

259:   /* Post the Isends with the message length-info */
260:   for (i=0,j=0; i<size; ++i) {
261:     if (ilengths1[i]) {
262:       buf_j    = buf_s + (2*j);
263:       buf_j[0] = *(ilengths1+i);
264:       buf_j[1] = *(ilengths2+i);
265:       MPI_Isend(buf_j,2,MPI_INT,i,tag,comm,s_waits+j);
266:       j++;
267:     }
268:   }

271:   /* Post waits on sends and receives */
272:   if (nrecvs+nsends) MPI_Waitall(nrecvs+nsends,r_waits,w_status);

274:   /* Pack up the received data */
275:   PetscMalloc1(nrecvs+1,onodes);
276:   for (i=0; i<nrecvs; ++i) {
277:     (*onodes)[i]    = w_status[i].MPI_SOURCE;
278:     buf_j           = buf_r + (2*i);
279:     (*olengths1)[i] = buf_j[0];
280:     (*olengths2)[i] = buf_j[1];
281:   }

283:   PetscFree4(r_waits,buf_r,buf_s,w_status);
284:   return 0;
285: }

287: /*

289:   Allocate a buffer sufficient to hold messages of size specified in olengths.
290:   And post Irecvs on these buffers using node info from onodes

292:  */
293: PetscErrorCode  PetscPostIrecvInt(MPI_Comm comm,PetscMPIInt tag,PetscMPIInt nrecvs,const PetscMPIInt onodes[],const PetscMPIInt olengths[],PetscInt ***rbuf,MPI_Request **r_waits)
294: {
295:   PetscInt       **rbuf_t,i,len = 0;
296:   MPI_Request    *r_waits_t;

298:   /* compute memory required for recv buffers */
299:   for (i=0; i<nrecvs; i++) len += olengths[i];  /* each message length */

301:   /* allocate memory for recv buffers */
302:   PetscMalloc1(nrecvs+1,&rbuf_t);
303:   PetscMalloc1(len,&rbuf_t[0]);
304:   for (i=1; i<nrecvs; ++i) rbuf_t[i] = rbuf_t[i-1] + olengths[i-1];

306:   /* Post the receives */
307:   PetscMalloc1(nrecvs,&r_waits_t);
308:   for (i=0; i<nrecvs; ++i) {
309:     MPI_Irecv(rbuf_t[i],olengths[i],MPIU_INT,onodes[i],tag,comm,r_waits_t+i);
310:   }

312:   *rbuf    = rbuf_t;
313:   *r_waits = r_waits_t;
314:   return 0;
315: }

317: PetscErrorCode  PetscPostIrecvScalar(MPI_Comm comm,PetscMPIInt tag,PetscMPIInt nrecvs,const PetscMPIInt onodes[],const PetscMPIInt olengths[],PetscScalar ***rbuf,MPI_Request **r_waits)
318: {
319:   PetscMPIInt    i;
320:   PetscScalar    **rbuf_t;
321:   MPI_Request    *r_waits_t;
322:   PetscInt       len = 0;

324:   /* compute memory required for recv buffers */
325:   for (i=0; i<nrecvs; i++) len += olengths[i];  /* each message length */

327:   /* allocate memory for recv buffers */
328:   PetscMalloc1(nrecvs+1,&rbuf_t);
329:   PetscMalloc1(len,&rbuf_t[0]);
330:   for (i=1; i<nrecvs; ++i) rbuf_t[i] = rbuf_t[i-1] + olengths[i-1];

332:   /* Post the receives */
333:   PetscMalloc1(nrecvs,&r_waits_t);
334:   for (i=0; i<nrecvs; ++i) {
335:     MPI_Irecv(rbuf_t[i],olengths[i],MPIU_SCALAR,onodes[i],tag,comm,r_waits_t+i);
336:   }

338:   *rbuf    = rbuf_t;
339:   *r_waits = r_waits_t;
340:   return 0;
341: }