Actual source code: mpimesg.c


  2: #include <petscsys.h>


  5: /*@C
  6:   PetscGatherNumberOfMessages -  Computes the number of messages a node expects to receive

  8:   Collective

 10:   Input Parameters:
 11: + comm     - Communicator
 12: . iflags   - an array of integers of length sizeof(comm). A '1' in ilengths[i] represent a
 13:              message from current node to ith node. Optionally NULL
 14: - ilengths - Non zero ilengths[i] represent a message to i of length ilengths[i].
 15:              Optionally NULL.

 17:   Output Parameters:
 18: . nrecvs    - number of messages received

 20:   Level: developer

 22:   Notes:
 23:   With this info, the correct message lengths can be determined using
 24:   PetscGatherMessageLengths()

 26:   Either iflags or ilengths should be provided.  If iflags is not
 27:   provided (NULL) it can be computed from ilengths. If iflags is
 28:   provided, ilengths is not required.

 30: .seealso: PetscGatherMessageLengths()
 31: @*/
 32: PetscErrorCode  PetscGatherNumberOfMessages(MPI_Comm comm,const PetscMPIInt iflags[],const PetscMPIInt ilengths[],PetscMPIInt *nrecvs)
 33: {
 34:   PetscMPIInt    size,rank,*recv_buf,i,*iflags_local = NULL,*iflags_localm = NULL;

 38:   MPI_Comm_size(comm,&size);
 39:   MPI_Comm_rank(comm,&rank);

 41:   PetscMalloc2(size,&recv_buf,size,&iflags_localm);

 43:   /* If iflags not provided, compute iflags from ilengths */
 44:   if (!iflags) {
 45:     if (!ilengths) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Either iflags or ilengths should be provided");
 46:     iflags_local = iflags_localm;
 47:     for (i=0; i<size; i++) {
 48:       if (ilengths[i]) iflags_local[i] = 1;
 49:       else iflags_local[i] = 0;
 50:     }
 51:   } else iflags_local = (PetscMPIInt*) iflags;

 53:   /* Post an allreduce to determine the numer of messages the current node will receive */
 54:   MPIU_Allreduce(iflags_local,recv_buf,size,MPI_INT,MPI_SUM,comm);
 55:   *nrecvs = recv_buf[rank];

 57:   PetscFree2(recv_buf,iflags_localm);
 58:   return(0);
 59: }


 62: /*@C
 63:   PetscGatherMessageLengths - Computes info about messages that a MPI-node will receive,
 64:   including (from-id,length) pairs for each message.

 66:   Collective

 68:   Input Parameters:
 69: + comm      - Communicator
 70: . nsends    - number of messages that are to be sent.
 71: . nrecvs    - number of messages being received
 72: - ilengths  - an array of integers of length sizeof(comm)
 73:               a non zero ilengths[i] represent a message to i of length ilengths[i]


 76:   Output Parameters:
 77: + onodes    - list of node-ids from which messages are expected
 78: - olengths  - corresponding message lengths

 80:   Level: developer

 82:   Notes:
 83:   With this info, the correct MPI_Irecv() can be posted with the correct
 84:   from-id, with a buffer with the right amount of memory required.

 86:   The calling function deallocates the memory in onodes and olengths

 88:   To determine nrecvs, one can use PetscGatherNumberOfMessages()

 90: .seealso: PetscGatherNumberOfMessages()
 91: @*/
 92: PetscErrorCode  PetscGatherMessageLengths(MPI_Comm comm,PetscMPIInt nsends,PetscMPIInt nrecvs,const PetscMPIInt ilengths[],PetscMPIInt **onodes,PetscMPIInt **olengths)
 93: {
 95:   PetscMPIInt    size,rank,tag,i,j;
 96:   MPI_Request    *s_waits  = NULL,*r_waits = NULL;
 97:   MPI_Status     *w_status = NULL;

100:   MPI_Comm_size(comm,&size);
101:   MPI_Comm_rank(comm,&rank);
102:   PetscCommGetNewTag(comm,&tag);

104:   /* cannot use PetscMalloc3() here because in the call to MPI_Waitall() they MUST be contiguous */
105:   PetscMalloc2(nrecvs+nsends,&r_waits,nrecvs+nsends,&w_status);
106:   s_waits = r_waits+nrecvs;

108:   /* Post the Irecv to get the message length-info */
109:   PetscMalloc1(nrecvs,olengths);
110:   for (i=0; i<nrecvs; i++) {
111:     MPI_Irecv((*olengths)+i,1,MPI_INT,MPI_ANY_SOURCE,tag,comm,r_waits+i);
112:   }

114:   /* Post the Isends with the message length-info */
115:   for (i=0,j=0; i<size; ++i) {
116:     if (ilengths[i]) {
117:       MPI_Isend((void*)(ilengths+i),1,MPI_INT,i,tag,comm,s_waits+j);
118:       j++;
119:     }
120:   }

122:   /* Post waits on sends and receivs */
123:   if (nrecvs+nsends) {MPI_Waitall(nrecvs+nsends,r_waits,w_status);}

125:   /* Pack up the received data */
126:   PetscMalloc1(nrecvs,onodes);
127:   for (i=0; i<nrecvs; ++i) {
128:     (*onodes)[i] = w_status[i].MPI_SOURCE;
129: #if defined(PETSC_HAVE_OMPI_MAJOR_VERSION)
130:     /* This line is a workaround for a bug in OpenMPI-2.1.1 distributed by Ubuntu-18.04.2 LTS.
131:        It happens in self-to-self MPI_Send/Recv using MPI_ANY_SOURCE for message matching. OpenMPI
132:        does not put correct value in recv buffer. See also
133:        https://lists.mcs.anl.gov/pipermail/petsc-dev/2019-July/024803.html
134:        https://www.mail-archive.com/users@lists.open-mpi.org//msg33383.html
135:      */
136:     if (w_status[i].MPI_SOURCE == rank) (*olengths)[i] = ilengths[rank];
137: #endif
138:   }
139:   PetscFree2(r_waits,w_status);
140:   return(0);
141: }

143: /*@C
144:   PetscGatherMessageLengths2 - Computes info about messages that a MPI-node will receive,
145:   including (from-id,length) pairs for each message. Same functionality as PetscGatherMessageLengths()
146:   except it takes TWO ilenths and output TWO olengths.

148:   Collective

150:   Input Parameters:
151: + comm      - Communicator
152: . nsends    - number of messages that are to be sent.
153: . nrecvs    - number of messages being received
154: - ilengths1, ilengths2 - array of integers of length sizeof(comm)
155:               a non zero ilengths[i] represent a message to i of length ilengths[i]

157:   Output Parameters:
158: + onodes    - list of node-ids from which messages are expected
159: - olengths1, olengths2 - corresponding message lengths

161:   Level: developer

163:   Notes:
164:   With this info, the correct MPI_Irecv() can be posted with the correct
165:   from-id, with a buffer with the right amount of memory required.

167:   The calling function deallocates the memory in onodes and olengths

169:   To determine nrecvs, one can use PetscGatherNumberOfMessages()

171: .seealso: PetscGatherMessageLengths() and PetscGatherNumberOfMessages()
172: @*/
173: PetscErrorCode  PetscGatherMessageLengths2(MPI_Comm comm,PetscMPIInt nsends,PetscMPIInt nrecvs,const PetscMPIInt ilengths1[],const PetscMPIInt ilengths2[],PetscMPIInt **onodes,PetscMPIInt **olengths1,PetscMPIInt **olengths2)
174: {
176:   PetscMPIInt    size,tag,i,j,*buf_s = NULL,*buf_r = NULL,*buf_j = NULL;
177:   MPI_Request    *s_waits  = NULL,*r_waits = NULL;
178:   MPI_Status     *w_status = NULL;

181:   MPI_Comm_size(comm,&size);
182:   PetscCommGetNewTag(comm,&tag);

184:   /* cannot use PetscMalloc5() because r_waits and s_waits must be contiguous for the call to MPI_Waitall() */
185:   PetscMalloc4(nrecvs+nsends,&r_waits,2*nrecvs,&buf_r,2*nsends,&buf_s,nrecvs+nsends,&w_status);
186:   s_waits = r_waits + nrecvs;

188:   /* Post the Irecv to get the message length-info */
189:   PetscMalloc1(nrecvs+1,olengths1);
190:   PetscMalloc1(nrecvs+1,olengths2);
191:   for (i=0; i<nrecvs; i++) {
192:     buf_j = buf_r + (2*i);
193:     MPI_Irecv(buf_j,2,MPI_INT,MPI_ANY_SOURCE,tag,comm,r_waits+i);
194:   }

196:   /* Post the Isends with the message length-info */
197:   for (i=0,j=0; i<size; ++i) {
198:     if (ilengths1[i]) {
199:       buf_j    = buf_s + (2*j);
200:       buf_j[0] = *(ilengths1+i);
201:       buf_j[1] = *(ilengths2+i);
202:       MPI_Isend(buf_j,2,MPI_INT,i,tag,comm,s_waits+j);
203:       j++;
204:     }
205:   }
206:   if (j != nsends) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"j %d not equal to expected number of sends %d\n",j,nsends);

208:   /* Post waits on sends and receivs */
209:   if (nrecvs+nsends) {MPI_Waitall(nrecvs+nsends,r_waits,w_status);}


212:   /* Pack up the received data */
213:   PetscMalloc1(nrecvs+1,onodes);
214:   for (i=0; i<nrecvs; ++i) {
215:     (*onodes)[i]    = w_status[i].MPI_SOURCE;
216:     buf_j           = buf_r + (2*i);
217:     (*olengths1)[i] = buf_j[0];
218:     (*olengths2)[i] = buf_j[1];
219:   }

221:   PetscFree4(r_waits,buf_r,buf_s,w_status);
222:   return(0);
223: }

225: /*

227:   Allocate a bufffer sufficient to hold messages of size specified in olengths.
228:   And post Irecvs on these buffers using node info from onodes

230:  */
231: PetscErrorCode  PetscPostIrecvInt(MPI_Comm comm,PetscMPIInt tag,PetscMPIInt nrecvs,const PetscMPIInt onodes[],const PetscMPIInt olengths[],PetscInt ***rbuf,MPI_Request **r_waits)
232: {
234:   PetscInt       **rbuf_t,i,len = 0;
235:   MPI_Request    *r_waits_t;

238:   /* compute memory required for recv buffers */
239:   for (i=0; i<nrecvs; i++) len += olengths[i];  /* each message length */

241:   /* allocate memory for recv buffers */
242:   PetscMalloc1(nrecvs+1,&rbuf_t);
243:   PetscMalloc1(len,&rbuf_t[0]);
244:   for (i=1; i<nrecvs; ++i) rbuf_t[i] = rbuf_t[i-1] + olengths[i-1];

246:   /* Post the receives */
247:   PetscMalloc1(nrecvs,&r_waits_t);
248:   for (i=0; i<nrecvs; ++i) {
249:     MPI_Irecv(rbuf_t[i],olengths[i],MPIU_INT,onodes[i],tag,comm,r_waits_t+i);
250:   }

252:   *rbuf    = rbuf_t;
253:   *r_waits = r_waits_t;
254:   return(0);
255: }

257: PetscErrorCode  PetscPostIrecvScalar(MPI_Comm comm,PetscMPIInt tag,PetscMPIInt nrecvs,const PetscMPIInt onodes[],const PetscMPIInt olengths[],PetscScalar ***rbuf,MPI_Request **r_waits)
258: {
260:   PetscMPIInt    i;
261:   PetscScalar    **rbuf_t;
262:   MPI_Request    *r_waits_t;
263:   PetscInt       len = 0;

266:   /* compute memory required for recv buffers */
267:   for (i=0; i<nrecvs; i++) len += olengths[i];  /* each message length */

269:   /* allocate memory for recv buffers */
270:   PetscMalloc1(nrecvs+1,&rbuf_t);
271:   PetscMalloc1(len,&rbuf_t[0]);
272:   for (i=1; i<nrecvs; ++i) rbuf_t[i] = rbuf_t[i-1] + olengths[i-1];

274:   /* Post the receives */
275:   PetscMalloc1(nrecvs,&r_waits_t);
276:   for (i=0; i<nrecvs; ++i) {
277:     MPI_Irecv(rbuf_t[i],olengths[i],MPIU_SCALAR,onodes[i],tag,comm,r_waits_t+i);
278:   }

280:   *rbuf    = rbuf_t;
281:   *r_waits = r_waits_t;
282:   return(0);
283: }