Actual source code: aomemscalable.c

petsc-3.11.4 2019-09-28
Report Typos and Errors

  2: /*
  3:     The memory scalable AO application ordering routines. These store the
  4:   orderings on each processor for that processor's range of values
  5: */

  7:  #include <../src/vec/is/ao/aoimpl.h>

  9: typedef struct {
 10:   PetscInt    *app_loc;    /* app_loc[i] is the partner for the ith local PETSc slot */
 11:   PetscInt    *petsc_loc;  /* petsc_loc[j] is the partner for the jth local app slot */
 12:   PetscLayout map;         /* determines the local sizes of ao */
 13: } AO_MemoryScalable;

 15: /*
 16:        All processors ship the data to process 0 to be printed; note that this is not scalable because
 17:        process 0 allocates space for all the orderings entry across all the processes
 18: */
 19: PetscErrorCode AOView_MemoryScalable(AO ao,PetscViewer viewer)
 20: {
 21:   PetscErrorCode    ierr;
 22:   PetscMPIInt       rank,size;
 23:   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
 24:   PetscBool         iascii;
 25:   PetscMPIInt       tag_app,tag_petsc;
 26:   PetscLayout       map = aomems->map;
 27:   PetscInt          *app,*app_loc,*petsc,*petsc_loc,len,i,j;
 28:   MPI_Status        status;

 31:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
 32:   if (!iascii) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not supported for AO MemoryScalable",((PetscObject)viewer)->type_name);

 34:   MPI_Comm_rank(PetscObjectComm((PetscObject)ao),&rank);
 35:   MPI_Comm_size(PetscObjectComm((PetscObject)ao),&size);

 37:   PetscObjectGetNewTag((PetscObject)ao,&tag_app);
 38:   PetscObjectGetNewTag((PetscObject)ao,&tag_petsc);

 40:   if (!rank) {
 41:     PetscViewerASCIIPrintf(viewer,"Number of elements in ordering %D\n",ao->N);
 42:     PetscViewerASCIIPrintf(viewer,  "PETSc->App  App->PETSc\n");

 44:     PetscMalloc2(map->N,&app,map->N,&petsc);
 45:     len  = map->n;
 46:     /* print local AO */
 47:     PetscViewerASCIIPrintf(viewer,"Process [%d]\n",rank);
 48:     for (i=0; i<len; i++) {
 49:       PetscViewerASCIIPrintf(viewer,"%3D  %3D    %3D  %3D\n",i,aomems->app_loc[i],i,aomems->petsc_loc[i]);
 50:     }

 52:     /* recv and print off-processor's AO */
 53:     for (i=1; i<size; i++) {
 54:       len       = map->range[i+1] - map->range[i];
 55:       app_loc   = app  + map->range[i];
 56:       petsc_loc = petsc+ map->range[i];
 57:       MPI_Recv(app_loc,(PetscMPIInt)len,MPIU_INT,i,tag_app,PetscObjectComm((PetscObject)ao),&status);
 58:       MPI_Recv(petsc_loc,(PetscMPIInt)len,MPIU_INT,i,tag_petsc,PetscObjectComm((PetscObject)ao),&status);
 59:       PetscViewerASCIIPrintf(viewer,"Process [%D]\n",i);
 60:       for (j=0; j<len; j++) {
 61:         PetscViewerASCIIPrintf(viewer,"%3D  %3D    %3D  %3D\n",map->range[i]+j,app_loc[j],map->range[i]+j,petsc_loc[j]);
 62:       }
 63:     }
 64:     PetscFree2(app,petsc);

 66:   } else {
 67:     /* send values */
 68:     MPI_Send((void*)aomems->app_loc,map->n,MPIU_INT,0,tag_app,PetscObjectComm((PetscObject)ao));
 69:     MPI_Send((void*)aomems->petsc_loc,map->n,MPIU_INT,0,tag_petsc,PetscObjectComm((PetscObject)ao));
 70:   }
 71:   PetscViewerFlush(viewer);
 72:   return(0);
 73: }

 75: PetscErrorCode AODestroy_MemoryScalable(AO ao)
 76: {
 77:   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
 78:   PetscErrorCode    ierr;

 81:   PetscFree2(aomems->app_loc,aomems->petsc_loc);
 82:   PetscLayoutDestroy(&aomems->map);
 83:   PetscFree(aomems);
 84:   return(0);
 85: }

 87: /*
 88:    Input Parameters:
 89: +   ao - the application ordering context
 90: .   n  - the number of integers in ia[]
 91: .   ia - the integers; these are replaced with their mapped value
 92: -   maploc - app_loc or petsc_loc in struct "AO_MemoryScalable"

 94:    Output Parameter:
 95: .   ia - the mapped interges
 96:  */
 97: PetscErrorCode AOMap_MemoryScalable_private(AO ao,PetscInt n,PetscInt *ia,const PetscInt *maploc)
 98: {
 99:   PetscErrorCode    ierr;
100:   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
101:   MPI_Comm          comm;
102:   PetscMPIInt       rank,size,tag1,tag2;
103:   PetscInt          *owner,*start,*sizes,nsends,nreceives;
104:   PetscInt          nmax,count,*sindices,*rindices,i,j,idx,lastidx,*sindices2,*rindices2;
105:   const PetscInt    *owners = aomems->map->range;
106:   MPI_Request       *send_waits,*recv_waits,*send_waits2,*recv_waits2;
107:   MPI_Status        recv_status;
108:   PetscMPIInt       nindices,source,widx;
109:   PetscInt          *rbuf,*sbuf;
110:   MPI_Status        *send_status,*send_status2;

113:   PetscObjectGetComm((PetscObject)ao,&comm);
114:   MPI_Comm_rank(comm,&rank);
115:   MPI_Comm_size(comm,&size);

117:   /*  first count number of contributors to each processor */
118:   PetscMalloc2(2*size,&sizes,size,&start);
119:   PetscMemzero(sizes,2*size*sizeof(PetscInt));
120:   PetscCalloc1(n,&owner);

122:   j       = 0;
123:   lastidx = -1;
124:   for (i=0; i<n; i++) {
125:     if (ia[i] < 0) owner[i] = -1; /* mark negative entries (which are not to be mapped) with a special negative value */
126:     if (ia[i] >= ao->N) owner[i] = -2; /* mark out of range entries with special negative value */
127:     else {
128:       /* if indices are NOT locally sorted, need to start search at the beginning */
129:       if (lastidx > (idx = ia[i])) j = 0;
130:       lastidx = idx;
131:       for (; j<size; j++) {
132:         if (idx >= owners[j] && idx < owners[j+1]) {
133:           sizes[2*j]++;     /* num of indices to be sent */
134:           sizes[2*j+1] = 1; /* send to proc[j] */
135:           owner[i]     = j;
136:           break;
137:         }
138:       }
139:     }
140:   }
141:   sizes[2*rank]=sizes[2*rank+1]=0; /* do not receive from self! */
142:   nsends        = 0;
143:   for (i=0; i<size; i++) nsends += sizes[2*i+1];

145:   /* inform other processors of number of messages and max length*/
146:   PetscMaxSum(comm,sizes,&nmax,&nreceives);

148:   /* allocate arrays */
149:   PetscObjectGetNewTag((PetscObject)ao,&tag1);
150:   PetscObjectGetNewTag((PetscObject)ao,&tag2);

152:   PetscMalloc2(nreceives*nmax,&rindices,nreceives,&recv_waits);
153:   PetscMalloc2(nsends*nmax,&rindices2,nsends,&recv_waits2);

155:   PetscMalloc3(n,&sindices,nsends,&send_waits,nsends,&send_status);
156:   PetscMalloc3(n,&sindices2,nreceives,&send_waits2,nreceives,&send_status2);

158:   /* post 1st receives: receive others requests
159:      since we don't know how long each individual message is we
160:      allocate the largest needed buffer for each receive. Potentially
161:      this is a lot of wasted space.
162:   */
163:   for (i=0,count=0; i<nreceives; i++) {
164:     MPI_Irecv(rindices+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,recv_waits+count++);
165:   }

167:   /* do 1st sends:
168:       1) starts[i] gives the starting index in svalues for stuff going to
169:          the ith processor
170:   */
171:   start[0] = 0;
172:   for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2];
173:   for (i=0; i<n; i++) {
174:     j = owner[i];
175:     if (j == -1) continue; /* do not remap negative entries in ia[] */
176:     else if (j == -2) { /* out of range entries get mapped to -1 */
177:       ia[i] = -1;
178:       continue;
179:     } else if (j != rank) {
180:       sindices[start[j]++]  = ia[i];
181:     } else { /* compute my own map */
182:       ia[i] = maploc[ia[i]-owners[rank]];
183:     }
184:   }

186:   start[0] = 0;
187:   for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2];
188:   for (i=0,count=0; i<size; i++) {
189:     if (sizes[2*i+1]) {
190:       /* send my request to others */
191:       MPI_Isend(sindices+start[i],sizes[2*i],MPIU_INT,i,tag1,comm,send_waits+count);
192:       /* post receive for the answer of my request */
193:       MPI_Irecv(sindices2+start[i],sizes[2*i],MPIU_INT,i,tag2,comm,recv_waits2+count);
194:       count++;
195:     }
196:   }
197:   if (nsends != count) SETERRQ2(comm,PETSC_ERR_SUP,"nsends %d != count %d",nsends,count);

199:   /* wait on 1st sends */
200:   if (nsends) {
201:     MPI_Waitall(nsends,send_waits,send_status);
202:   }

204:   /* 1st recvs: other's requests */
205:   for (j=0; j< nreceives; j++) {
206:     MPI_Waitany(nreceives,recv_waits,&widx,&recv_status); /* idx: index of handle for operation that completed */
207:     MPI_Get_count(&recv_status,MPIU_INT,&nindices);
208:     rbuf   = rindices+nmax*widx; /* global index */
209:     source = recv_status.MPI_SOURCE;

211:     /* compute mapping */
212:     sbuf = rbuf;
213:     for (i=0; i<nindices; i++) sbuf[i] = maploc[rbuf[i]-owners[rank]];

215:     /* send mapping back to the sender */
216:     MPI_Isend(sbuf,nindices,MPIU_INT,source,tag2,comm,send_waits2+widx);
217:   }

219:   /* wait on 2nd sends */
220:   if (nreceives) {
221:     MPI_Waitall(nreceives,send_waits2,send_status2);
222:   }

224:   /* 2nd recvs: for the answer of my request */
225:   for (j=0; j< nsends; j++) {
226:     MPI_Waitany(nsends,recv_waits2,&widx,&recv_status);
227:     MPI_Get_count(&recv_status,MPIU_INT,&nindices);
228:     source = recv_status.MPI_SOURCE;
229:     /* pack output ia[] */
230:     rbuf  = sindices2+start[source];
231:     count = 0;
232:     for (i=0; i<n; i++) {
233:       if (source == owner[i]) ia[i] = rbuf[count++];
234:     }
235:   }

237:   /* free arrays */
238:   PetscFree2(sizes,start);
239:   PetscFree(owner);
240:   PetscFree2(rindices,recv_waits);
241:   PetscFree2(rindices2,recv_waits2);
242:   PetscFree3(sindices,send_waits,send_status);
243:   PetscFree3(sindices2,send_waits2,send_status2);
244:   return(0);
245: }

247: PetscErrorCode AOPetscToApplication_MemoryScalable(AO ao,PetscInt n,PetscInt *ia)
248: {
249:   PetscErrorCode    ierr;
250:   AO_MemoryScalable *aomems  = (AO_MemoryScalable*)ao->data;
251:   PetscInt          *app_loc = aomems->app_loc;

254:   AOMap_MemoryScalable_private(ao,n,ia,app_loc);
255:   return(0);
256: }

258: PetscErrorCode AOApplicationToPetsc_MemoryScalable(AO ao,PetscInt n,PetscInt *ia)
259: {
260:   PetscErrorCode    ierr;
261:   AO_MemoryScalable *aomems    = (AO_MemoryScalable*)ao->data;
262:   PetscInt          *petsc_loc = aomems->petsc_loc;

265:   AOMap_MemoryScalable_private(ao,n,ia,petsc_loc);
266:   return(0);
267: }

269: static struct _AOOps AOOps_MemoryScalable = {
270:   AOView_MemoryScalable,
271:   AODestroy_MemoryScalable,
272:   AOPetscToApplication_MemoryScalable,
273:   AOApplicationToPetsc_MemoryScalable,
274:   0,
275:   0,
276:   0,
277:   0
278: };

280: PetscErrorCode  AOCreateMemoryScalable_private(MPI_Comm comm,PetscInt napp,const PetscInt from_array[],const PetscInt to_array[],AO ao, PetscInt *aomap_loc)
281: {
282:   PetscErrorCode    ierr;
283:   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
284:   PetscLayout       map     = aomems->map;
285:   PetscInt          n_local = map->n,i,j;
286:   PetscMPIInt       rank,size,tag;
287:   PetscInt          *owner,*start,*sizes,nsends,nreceives;
288:   PetscInt          nmax,count,*sindices,*rindices,idx,lastidx;
289:   PetscInt          *owners = aomems->map->range;
290:   MPI_Request       *send_waits,*recv_waits;
291:   MPI_Status        recv_status;
292:   PetscMPIInt       nindices,widx;
293:   PetscInt          *rbuf;
294:   PetscInt          n=napp,ip,ia;
295:   MPI_Status        *send_status;

298:   PetscMemzero(aomap_loc,n_local*sizeof(PetscInt));

300:   MPI_Comm_rank(comm,&rank);
301:   MPI_Comm_size(comm,&size);

303:   /*  first count number of contributors (of from_array[]) to each processor */
304:   PetscCalloc1(2*size,&sizes);
305:   PetscMalloc1(n,&owner);

307:   j       = 0;
308:   lastidx = -1;
309:   for (i=0; i<n; i++) {
310:     /* if indices are NOT locally sorted, need to start search at the beginning */
311:     if (lastidx > (idx = from_array[i])) j = 0;
312:     lastidx = idx;
313:     for (; j<size; j++) {
314:       if (idx >= owners[j] && idx < owners[j+1]) {
315:         sizes[2*j]  += 2; /* num of indices to be sent - in pairs (ip,ia) */
316:         sizes[2*j+1] = 1; /* send to proc[j] */
317:         owner[i]      = j;
318:         break;
319:       }
320:     }
321:   }
322:   sizes[2*rank]=sizes[2*rank+1]=0; /* do not receive from self! */
323:   nsends        = 0;
324:   for (i=0; i<size; i++) nsends += sizes[2*i+1];

326:   /* inform other processors of number of messages and max length*/
327:   PetscMaxSum(comm,sizes,&nmax,&nreceives);

329:   /* allocate arrays */
330:   PetscObjectGetNewTag((PetscObject)ao,&tag);
331:   PetscMalloc2(nreceives*nmax,&rindices,nreceives,&recv_waits);
332:   PetscMalloc3(2*n,&sindices,nsends,&send_waits,nsends,&send_status);
333:   PetscMalloc1(size,&start);

335:   /* post receives: */
336:   for (i=0; i<nreceives; i++) {
337:     MPI_Irecv(rindices+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
338:   }

340:   /* do sends:
341:       1) starts[i] gives the starting index in svalues for stuff going to
342:          the ith processor
343:   */
344:   start[0] = 0;
345:   for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2];
346:   for (i=0; i<n; i++) {
347:     j = owner[i];
348:     if (j != rank) {
349:       ip                   = from_array[i];
350:       ia                   = to_array[i];
351:       sindices[start[j]++] = ip;
352:       sindices[start[j]++] = ia;
353:     } else { /* compute my own map */
354:       ip            = from_array[i] - owners[rank];
355:       ia            = to_array[i];
356:       aomap_loc[ip] = ia;
357:     }
358:   }

360:   start[0] = 0;
361:   for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2];
362:   for (i=0,count=0; i<size; i++) {
363:     if (sizes[2*i+1]) {
364:       MPI_Isend(sindices+start[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count);
365:       count++;
366:     }
367:   }
368:   if (nsends != count) SETERRQ2(comm,PETSC_ERR_SUP,"nsends %d != count %d",nsends,count);

370:   /* wait on sends */
371:   if (nsends) {
372:     MPI_Waitall(nsends,send_waits,send_status);
373:   }

375:   /* recvs */
376:   count=0;
377:   for (j= nreceives; j>0; j--) {
378:     MPI_Waitany(nreceives,recv_waits,&widx,&recv_status);
379:     MPI_Get_count(&recv_status,MPIU_INT,&nindices);
380:     rbuf = rindices+nmax*widx; /* global index */

382:     /* compute local mapping */
383:     for (i=0; i<nindices; i+=2) { /* pack aomap_loc */
384:       ip            = rbuf[i] - owners[rank]; /* local index */
385:       ia            = rbuf[i+1];
386:       aomap_loc[ip] = ia;
387:     }
388:     count++;
389:   }

391:   PetscFree(start);
392:   PetscFree3(sindices,send_waits,send_status);
393:   PetscFree2(rindices,recv_waits);
394:   PetscFree(owner);
395:   PetscFree(sizes);
396:   return(0);
397: }

399: PETSC_EXTERN PetscErrorCode AOCreate_MemoryScalable(AO ao)
400: {
401:   PetscErrorCode    ierr;
402:   IS                isapp=ao->isapp,ispetsc=ao->ispetsc;
403:   const PetscInt    *mypetsc,*myapp;
404:   PetscInt          napp,n_local,N,i,start,*petsc,*lens,*disp;
405:   MPI_Comm          comm;
406:   AO_MemoryScalable *aomems;
407:   PetscLayout       map;
408:   PetscMPIInt       size,rank;

411:   if (!isapp) SETERRQ(PetscObjectComm((PetscObject)ao),PETSC_ERR_ARG_WRONGSTATE,"AOSetIS() must be called before AOSetType()");
412:   /* create special struct aomems */
413:   PetscNewLog(ao,&aomems);
414:   ao->data = (void*) aomems;
415:   PetscMemcpy(ao->ops,&AOOps_MemoryScalable,sizeof(struct _AOOps));
416:   PetscObjectChangeTypeName((PetscObject)ao,AOMEMORYSCALABLE);

418:   /* transmit all local lengths of isapp to all processors */
419:   PetscObjectGetComm((PetscObject)isapp,&comm);
420:   MPI_Comm_size(comm, &size);
421:   MPI_Comm_rank(comm, &rank);
422:   PetscMalloc2(size,&lens,size,&disp);
423:   ISGetLocalSize(isapp,&napp);
424:   MPI_Allgather(&napp, 1, MPIU_INT, lens, 1, MPIU_INT, comm);

426:   N = 0;
427:   for (i = 0; i < size; i++) {
428:     disp[i] = N;
429:     N      += lens[i];
430:   }

432:   /* If ispetsc is 0 then use "natural" numbering */
433:   if (napp) {
434:     if (!ispetsc) {
435:       start = disp[rank];
436:       PetscMalloc1(napp+1, &petsc);
437:       for (i=0; i<napp; i++) petsc[i] = start + i;
438:     } else {
439:       ISGetIndices(ispetsc,&mypetsc);
440:       petsc = (PetscInt*)mypetsc;
441:     }
442:   } else {
443:     petsc = NULL;
444:   }

446:   /* create a map with global size N - used to determine the local sizes of ao - shall we use local napp instead of N? */
447:   PetscLayoutCreate(comm,&map);
448:   map->bs = 1;
449:   map->N  = N;
450:   PetscLayoutSetUp(map);

452:   ao->N       = N;
453:   ao->n       = map->n;
454:   aomems->map = map;

456:   /* create distributed indices app_loc: petsc->app and petsc_loc: app->petsc */
457:   n_local = map->n;
458:   PetscMalloc2(n_local, &aomems->app_loc,n_local,&aomems->petsc_loc);
459:   PetscLogObjectMemory((PetscObject)ao,2*n_local*sizeof(PetscInt));
460:   PetscMemzero(aomems->app_loc,n_local*sizeof(PetscInt));
461:   PetscMemzero(aomems->petsc_loc,n_local*sizeof(PetscInt));
462:   ISGetIndices(isapp,&myapp);

464:   AOCreateMemoryScalable_private(comm,napp,petsc,myapp,ao,aomems->app_loc);
465:   AOCreateMemoryScalable_private(comm,napp,myapp,petsc,ao,aomems->petsc_loc);

467:   ISRestoreIndices(isapp,&myapp);
468:   if (napp) {
469:     if (ispetsc) {
470:       ISRestoreIndices(ispetsc,&mypetsc);
471:     } else {
472:       PetscFree(petsc);
473:     }
474:   }
475:   PetscFree2(lens,disp);
476:   return(0);
477: }

479: /*@C
480:    AOCreateMemoryScalable - Creates a memory scalable application ordering using two integer arrays.

482:    Collective on MPI_Comm

484:    Input Parameters:
485: +  comm - MPI communicator that is to share AO
486: .  napp - size of integer arrays
487: .  myapp - integer array that defines an ordering
488: -  mypetsc - integer array that defines another ordering (may be NULL to
489:              indicate the natural ordering, that is 0,1,2,3,...)

491:    Output Parameter:
492: .  aoout - the new application ordering

494:    Level: beginner

496:     Notes:
497:     The arrays myapp and mypetsc must contain the all the integers 0 to napp-1 with no duplicates; that is there cannot be any "holes"
498:            in the indices. Use AOCreateMapping() or AOCreateMappingIS() if you wish to have "holes" in the indices.
499:            Comparing with AOCreateBasic(), this routine trades memory with message communication.

501: .keywords: AO, create

503: .seealso: AOCreateMemoryScalableIS(), AODestroy(), AOPetscToApplication(), AOApplicationToPetsc()
504: @*/
505: PetscErrorCode AOCreateMemoryScalable(MPI_Comm comm,PetscInt napp,const PetscInt myapp[],const PetscInt mypetsc[],AO *aoout)
506: {
508:   IS             isapp,ispetsc;
509:   const PetscInt *app=myapp,*petsc=mypetsc;

512:   ISCreateGeneral(comm,napp,app,PETSC_USE_POINTER,&isapp);
513:   if (mypetsc) {
514:     ISCreateGeneral(comm,napp,petsc,PETSC_USE_POINTER,&ispetsc);
515:   } else {
516:     ispetsc = NULL;
517:   }
518:   AOCreateMemoryScalableIS(isapp,ispetsc,aoout);
519:   ISDestroy(&isapp);
520:   if (mypetsc) {
521:     ISDestroy(&ispetsc);
522:   }
523:   return(0);
524: }

526: /*@C
527:    AOCreateMemoryScalableIS - Creates a memory scalable application ordering using two index sets.

529:    Collective on IS

531:    Input Parameters:
532: +  isapp - index set that defines an ordering
533: -  ispetsc - index set that defines another ordering (may be NULL to use the
534:              natural ordering)

536:    Output Parameter:
537: .  aoout - the new application ordering

539:    Level: beginner

541:     Notes:
542:     The index sets isapp and ispetsc must contain the all the integers 0 to napp-1 (where napp is the length of the index sets) with no duplicates;
543:            that is there cannot be any "holes".
544:            Comparing with AOCreateBasicIS(), this routine trades memory with message communication.
545: .keywords: AO, create

547: .seealso: AOCreateMemoryScalable(),  AODestroy()
548: @*/
549: PetscErrorCode  AOCreateMemoryScalableIS(IS isapp,IS ispetsc,AO *aoout)
550: {
552:   MPI_Comm       comm;
553:   AO             ao;

556:   PetscObjectGetComm((PetscObject)isapp,&comm);
557:   AOCreate(comm,&ao);
558:   AOSetIS(ao,isapp,ispetsc);
559:   AOSetType(ao,AOMEMORYSCALABLE);
560:   AOViewFromOptions(ao,NULL,"-ao_view");
561:   *aoout = ao;
562:   return(0);
563: }