Actual source code: aomemscalable.c

petsc-3.9.4 2018-09-11
Report Typos and Errors

  2: /*
  3:     The memory scalable AO application ordering routines. These store the
  4:   local orderings on each processor.
  5: */

  7:  #include <../src/vec/is/ao/aoimpl.h>

  9: typedef struct {
 10:   PetscInt    *app_loc;    /* app_loc[i] is the partner for the ith local PETSc slot */
 11:   PetscInt    *petsc_loc;  /* petsc_loc[j] is the partner for the jth local app slot */
 12:   PetscLayout map;         /* determines the local sizes of ao */
 13: } AO_MemoryScalable;

 15: /*
 16:        All processors have the same data so processor 1 prints it
 17: */
 18: PetscErrorCode AOView_MemoryScalable(AO ao,PetscViewer viewer)
 19: {
 20:   PetscErrorCode    ierr;
 21:   PetscMPIInt       rank,size;
 22:   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
 23:   PetscBool         iascii;
 24:   PetscMPIInt       tag_app,tag_petsc;
 25:   PetscLayout       map = aomems->map;
 26:   PetscInt          *app,*app_loc,*petsc,*petsc_loc,len,i,j;
 27:   MPI_Status        status;

 30:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
 31:   if (!iascii) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not supported for AO MemoryScalable",((PetscObject)viewer)->type_name);

 33:   MPI_Comm_rank(PetscObjectComm((PetscObject)ao),&rank);
 34:   MPI_Comm_size(PetscObjectComm((PetscObject)ao),&size);

 36:   PetscObjectGetNewTag((PetscObject)ao,&tag_app);
 37:   PetscObjectGetNewTag((PetscObject)ao,&tag_petsc);

 39:   if (!rank) {
 40:     PetscViewerASCIIPrintf(viewer,"Number of elements in ordering %D\n",ao->N);
 41:     PetscViewerASCIIPrintf(viewer,  "PETSc->App  App->PETSc\n");

 43:     PetscMalloc2(map->N,&app,map->N,&petsc);
 44:     len  = map->n;
 45:     /* print local AO */
 46:     PetscViewerASCIIPrintf(viewer,"Process [%D]\n",rank);
 47:     for (i=0; i<len; i++) {
 48:       PetscViewerASCIIPrintf(viewer,"%3D  %3D    %3D  %3D\n",i,aomems->app_loc[i],i,aomems->petsc_loc[i]);
 49:     }

 51:     /* recv and print off-processor's AO */
 52:     for (i=1; i<size; i++) {
 53:       len       = map->range[i+1] - map->range[i];
 54:       app_loc   = app  + map->range[i];
 55:       petsc_loc = petsc+ map->range[i];
 56:       MPI_Recv(app_loc,(PetscMPIInt)len,MPIU_INT,i,tag_app,PetscObjectComm((PetscObject)ao),&status);
 57:       MPI_Recv(petsc_loc,(PetscMPIInt)len,MPIU_INT,i,tag_petsc,PetscObjectComm((PetscObject)ao),&status);
 58:       PetscViewerASCIIPrintf(viewer,"Process [%D]\n",i);
 59:       for (j=0; j<len; j++) {
 60:         PetscViewerASCIIPrintf(viewer,"%3D  %3D    %3D  %3D\n",map->range[i]+j,app_loc[j],map->range[i]+j,petsc_loc[j]);
 61:       }
 62:     }
 63:     PetscFree2(app,petsc);

 65:   } else {
 66:     /* send values */
 67:     MPI_Send((void*)aomems->app_loc,map->n,MPIU_INT,0,tag_app,PetscObjectComm((PetscObject)ao));
 68:     MPI_Send((void*)aomems->petsc_loc,map->n,MPIU_INT,0,tag_petsc,PetscObjectComm((PetscObject)ao));
 69:   }
 70:   PetscViewerFlush(viewer);
 71:   return(0);
 72: }

 74: PetscErrorCode AODestroy_MemoryScalable(AO ao)
 75: {
 76:   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
 77:   PetscErrorCode    ierr;

 80:   PetscFree2(aomems->app_loc,aomems->petsc_loc);
 81:   PetscLayoutDestroy(&aomems->map);
 82:   PetscFree(aomems);
 83:   return(0);
 84: }

 86: /*
 87:    Input Parameters:
 88: +   ao - the application ordering context
 89: .   n  - the number of integers in ia[]
 90: .   ia - the integers; these are replaced with their mapped value
 91: -   maploc - app_loc or petsc_loc in struct "AO_MemoryScalable"

 93:    Output Parameter:
 94: .   ia - the mapped interges
 95:  */
 96: PetscErrorCode AOMap_MemoryScalable_private(AO ao,PetscInt n,PetscInt *ia,PetscInt *maploc)
 97: {
 98:   PetscErrorCode    ierr;
 99:   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
100:   MPI_Comm          comm;
101:   PetscMPIInt       rank,size,tag1,tag2;
102:   PetscInt          *owner,*start,*sizes,nsends,nreceives;
103:   PetscInt          nmax,count,*sindices,*rindices,i,j,idx,lastidx,*sindices2,*rindices2;
104:   PetscInt          *owners = aomems->map->range;
105:   MPI_Request       *send_waits,*recv_waits,*send_waits2,*recv_waits2;
106:   MPI_Status        recv_status;
107:   PetscMPIInt       nindices,source,widx;
108:   PetscInt          *rbuf,*sbuf;
109:   MPI_Status        *send_status,*send_status2;

112:   PetscObjectGetComm((PetscObject)ao,&comm);
113:   MPI_Comm_rank(comm,&rank);
114:   MPI_Comm_size(comm,&size);

116:   /*  first count number of contributors to each processor */
117:   PetscMalloc2(2*size,&sizes,size,&start);
118:   PetscMemzero(sizes,2*size*sizeof(PetscInt));
119:   PetscCalloc1(n,&owner);

121:   j       = 0;
122:   lastidx = -1;
123:   for (i=0; i<n; i++) {
124:     /* if indices are NOT locally sorted, need to start search at the beginning */
125:     if (lastidx > (idx = ia[i])) j = 0;
126:     lastidx = idx;
127:     for (; j<size; j++) {
128:       if (idx >= owners[j] && idx < owners[j+1]) {
129:         sizes[2*j]++;     /* num of indices to be sent */
130:         sizes[2*j+1] = 1; /* send to proc[j] */
131:         owner[i]      = j;
132:         break;
133:       }
134:     }
135:   }
136:   sizes[2*rank]=sizes[2*rank+1]=0; /* do not receive from self! */
137:   nsends        = 0;
138:   for (i=0; i<size; i++) nsends += sizes[2*i+1];

140:   /* inform other processors of number of messages and max length*/
141:   PetscMaxSum(comm,sizes,&nmax,&nreceives);

143:   /* allocate arrays */
144:   PetscObjectGetNewTag((PetscObject)ao,&tag1);
145:   PetscObjectGetNewTag((PetscObject)ao,&tag2);

147:   PetscMalloc2(nreceives*nmax,&rindices,nreceives,&recv_waits);
148:   PetscMalloc2(nsends*nmax,&rindices2,nsends,&recv_waits2);

150:   PetscMalloc3(n,&sindices,nsends,&send_waits,nsends,&send_status);
151:   PetscMalloc3(n,&sindices2,nreceives,&send_waits2,nreceives,&send_status2);

153:   /* post 1st receives: receive others requests
154:      since we don't know how long each individual message is we
155:      allocate the largest needed buffer for each receive. Potentially
156:      this is a lot of wasted space.
157:   */
158:   for (i=0,count=0; i<nreceives; i++) {
159:     MPI_Irecv(rindices+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,recv_waits+count++);
160:   }

162:   /* do 1st sends:
163:       1) starts[i] gives the starting index in svalues for stuff going to
164:          the ith processor
165:   */
166:   start[0] = 0;
167:   for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2];
168:   for (i=0; i<n; i++) {
169:     j = owner[i];
170:     if (j != rank) {
171:       sindices[start[j]++]  = ia[i];
172:     } else { /* compute my own map */
173:       if (ia[i] >= owners[rank] && ia[i] < owners[rank+1]) {
174:         ia[i] = maploc[ia[i]-owners[rank]];
175:       } else {
176:         ia[i] = -1;  /* ia[i] is not in the range of 0 and N-1, maps it to -1 */
177:       }
178:     }
179:   }

181:   start[0] = 0;
182:   for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2];
183:   for (i=0,count=0; i<size; i++) {
184:     if (sizes[2*i+1]) {
185:       /* send my request to others */
186:       MPI_Isend(sindices+start[i],sizes[2*i],MPIU_INT,i,tag1,comm,send_waits+count);
187:       /* post receive for the answer of my request */
188:       MPI_Irecv(sindices2+start[i],sizes[2*i],MPIU_INT,i,tag2,comm,recv_waits2+count);
189:       count++;
190:     }
191:   }
192:   if (nsends != count) SETERRQ2(comm,PETSC_ERR_SUP,"nsends %d != count %d",nsends,count);

194:   /* wait on 1st sends */
195:   if (nsends) {
196:     MPI_Waitall(nsends,send_waits,send_status);
197:   }

199:   /* 1st recvs: other's requests */
200:   for (j=0; j< nreceives; j++) {
201:     MPI_Waitany(nreceives,recv_waits,&widx,&recv_status); /* idx: index of handle for operation that completed */
202:     MPI_Get_count(&recv_status,MPIU_INT,&nindices);
203:     rbuf   = rindices+nmax*widx; /* global index */
204:     source = recv_status.MPI_SOURCE;

206:     /* compute mapping */
207:     sbuf = rbuf;
208:     for (i=0; i<nindices; i++) sbuf[i] = maploc[rbuf[i]-owners[rank]];

210:     /* send mapping back to the sender */
211:     MPI_Isend(sbuf,nindices,MPIU_INT,source,tag2,comm,send_waits2+widx);
212:   }

214:   /* wait on 2nd sends */
215:   if (nreceives) {
216:     MPI_Waitall(nreceives,send_waits2,send_status2);
217:   }

219:   /* 2nd recvs: for the answer of my request */
220:   for (j=0; j< nsends; j++) {
221:     MPI_Waitany(nsends,recv_waits2,&widx,&recv_status);
222:     MPI_Get_count(&recv_status,MPIU_INT,&nindices);
223:     source = recv_status.MPI_SOURCE;
224:     /* pack output ia[] */
225:     rbuf  = sindices2+start[source];
226:     count = 0;
227:     for (i=0; i<n; i++) {
228:       if (source == owner[i]) ia[i] = rbuf[count++];
229:     }
230:   }

232:   /* free arrays */
233:   PetscFree2(sizes,start);
234:   PetscFree(owner);
235:   PetscFree2(rindices,recv_waits);
236:   PetscFree2(rindices2,recv_waits2);
237:   PetscFree3(sindices,send_waits,send_status);
238:   PetscFree3(sindices2,send_waits2,send_status2);
239:   return(0);
240: }

242: PetscErrorCode AOPetscToApplication_MemoryScalable(AO ao,PetscInt n,PetscInt *ia)
243: {
244:   PetscErrorCode    ierr;
245:   AO_MemoryScalable *aomems  = (AO_MemoryScalable*)ao->data;
246:   PetscInt          *app_loc = aomems->app_loc;

249:   AOMap_MemoryScalable_private(ao,n,ia,app_loc);
250:   return(0);
251: }

253: PetscErrorCode AOApplicationToPetsc_MemoryScalable(AO ao,PetscInt n,PetscInt *ia)
254: {
255:   PetscErrorCode    ierr;
256:   AO_MemoryScalable *aomems    = (AO_MemoryScalable*)ao->data;
257:   PetscInt          *petsc_loc = aomems->petsc_loc;

260:   AOMap_MemoryScalable_private(ao,n,ia,petsc_loc);
261:   return(0);
262: }

264: static struct _AOOps AOOps_MemoryScalable = {
265:   AOView_MemoryScalable,
266:   AODestroy_MemoryScalable,
267:   AOPetscToApplication_MemoryScalable,
268:   AOApplicationToPetsc_MemoryScalable,
269:   0,
270:   0,
271:   0,
272:   0
273: };

275: PetscErrorCode  AOCreateMemoryScalable_private(MPI_Comm comm,PetscInt napp,const PetscInt from_array[],const PetscInt to_array[],AO ao, PetscInt *aomap_loc)
276: {
277:   PetscErrorCode    ierr;
278:   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
279:   PetscLayout       map     = aomems->map;
280:   PetscInt          n_local = map->n,i,j;
281:   PetscMPIInt       rank,size,tag;
282:   PetscInt          *owner,*start,*sizes,nsends,nreceives;
283:   PetscInt          nmax,count,*sindices,*rindices,idx,lastidx;
284:   PetscInt          *owners = aomems->map->range;
285:   MPI_Request       *send_waits,*recv_waits;
286:   MPI_Status        recv_status;
287:   PetscMPIInt       nindices,widx;
288:   PetscInt          *rbuf;
289:   PetscInt          n=napp,ip,ia;
290:   MPI_Status        *send_status;

293:   PetscMemzero(aomap_loc,n_local*sizeof(PetscInt));

295:   MPI_Comm_rank(comm,&rank);
296:   MPI_Comm_size(comm,&size);

298:   /*  first count number of contributors (of from_array[]) to each processor */
299:   PetscCalloc1(2*size,&sizes);
300:   PetscMalloc1(n,&owner);

302:   j       = 0;
303:   lastidx = -1;
304:   for (i=0; i<n; i++) {
305:     /* if indices are NOT locally sorted, need to start search at the beginning */
306:     if (lastidx > (idx = from_array[i])) j = 0;
307:     lastidx = idx;
308:     for (; j<size; j++) {
309:       if (idx >= owners[j] && idx < owners[j+1]) {
310:         sizes[2*j]  += 2; /* num of indices to be sent - in pairs (ip,ia) */
311:         sizes[2*j+1] = 1; /* send to proc[j] */
312:         owner[i]      = j;
313:         break;
314:       }
315:     }
316:   }
317:   sizes[2*rank]=sizes[2*rank+1]=0; /* do not receive from self! */
318:   nsends        = 0;
319:   for (i=0; i<size; i++) nsends += sizes[2*i+1];

321:   /* inform other processors of number of messages and max length*/
322:   PetscMaxSum(comm,sizes,&nmax,&nreceives);

324:   /* allocate arrays */
325:   PetscObjectGetNewTag((PetscObject)ao,&tag);
326:   PetscMalloc2(nreceives*nmax,&rindices,nreceives,&recv_waits);
327:   PetscMalloc3(2*n,&sindices,nsends,&send_waits,nsends,&send_status);
328:   PetscMalloc1(size,&start);

330:   /* post receives: */
331:   for (i=0; i<nreceives; i++) {
332:     MPI_Irecv(rindices+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
333:   }

335:   /* do sends:
336:       1) starts[i] gives the starting index in svalues for stuff going to
337:          the ith processor
338:   */
339:   start[0] = 0;
340:   for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2];
341:   for (i=0; i<n; i++) {
342:     j = owner[i];
343:     if (j != rank) {
344:       ip                   = from_array[i];
345:       ia                   = to_array[i];
346:       sindices[start[j]++] = ip;
347:       sindices[start[j]++] = ia;
348:     } else { /* compute my own map */
349:       ip            = from_array[i] - owners[rank];
350:       ia            = to_array[i];
351:       aomap_loc[ip] = ia;
352:     }
353:   }

355:   start[0] = 0;
356:   for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2];
357:   for (i=0,count=0; i<size; i++) {
358:     if (sizes[2*i+1]) {
359:       MPI_Isend(sindices+start[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count);
360:       count++;
361:     }
362:   }
363:   if (nsends != count) SETERRQ2(comm,PETSC_ERR_SUP,"nsends %d != count %d",nsends,count);

365:   /* wait on sends */
366:   if (nsends) {
367:     MPI_Waitall(nsends,send_waits,send_status);
368:   }

370:   /* recvs */
371:   count=0;
372:   for (j= nreceives; j>0; j--) {
373:     MPI_Waitany(nreceives,recv_waits,&widx,&recv_status);
374:     MPI_Get_count(&recv_status,MPIU_INT,&nindices);
375:     rbuf = rindices+nmax*widx; /* global index */

377:     /* compute local mapping */
378:     for (i=0; i<nindices; i+=2) { /* pack aomap_loc */
379:       ip            = rbuf[i] - owners[rank]; /* local index */
380:       ia            = rbuf[i+1];
381:       aomap_loc[ip] = ia;
382:     }
383:     count++;
384:   }

386:   PetscFree(start);
387:   PetscFree3(sindices,send_waits,send_status);
388:   PetscFree2(rindices,recv_waits);
389:   PetscFree(owner);
390:   PetscFree(sizes);
391:   return(0);
392: }

394: PETSC_EXTERN PetscErrorCode AOCreate_MemoryScalable(AO ao)
395: {
396:   PetscErrorCode    ierr;
397:   IS                isapp=ao->isapp,ispetsc=ao->ispetsc;
398:   const PetscInt    *mypetsc,*myapp;
399:   PetscInt          napp,n_local,N,i,start,*petsc,*lens,*disp;
400:   MPI_Comm          comm;
401:   AO_MemoryScalable *aomems;
402:   PetscLayout       map;
403:   PetscMPIInt       size,rank;

406:   if (!isapp) SETERRQ(PetscObjectComm((PetscObject)ao),PETSC_ERR_ARG_WRONGSTATE,"AOSetIS() must be called before AOSetType()");
407:   /* create special struct aomems */
408:   PetscNewLog(ao,&aomems);
409:   ao->data = (void*) aomems;
410:   PetscMemcpy(ao->ops,&AOOps_MemoryScalable,sizeof(struct _AOOps));
411:   PetscObjectChangeTypeName((PetscObject)ao,AOMEMORYSCALABLE);

413:   /* transmit all local lengths of isapp to all processors */
414:   PetscObjectGetComm((PetscObject)isapp,&comm);
415:   MPI_Comm_size(comm, &size);
416:   MPI_Comm_rank(comm, &rank);
417:   PetscMalloc2(size,&lens,size,&disp);
418:   ISGetLocalSize(isapp,&napp);
419:   MPI_Allgather(&napp, 1, MPIU_INT, lens, 1, MPIU_INT, comm);

421:   N = 0;
422:   for (i = 0; i < size; i++) {
423:     disp[i] = N;
424:     N      += lens[i];
425:   }

427:   /* If ispetsc is 0 then use "natural" numbering */
428:   if (napp) {
429:     if (!ispetsc) {
430:       start = disp[rank];
431:       PetscMalloc1(napp+1, &petsc);
432:       for (i=0; i<napp; i++) petsc[i] = start + i;
433:     } else {
434:       ISGetIndices(ispetsc,&mypetsc);
435:       petsc = (PetscInt*)mypetsc;
436:     }
437:   } else {
438:     petsc = NULL;
439:   }

441:   /* create a map with global size N - used to determine the local sizes of ao - shall we use local napp instead of N? */
442:   PetscLayoutCreate(comm,&map);
443:   map->bs = 1;
444:   map->N  = N;
445:   PetscLayoutSetUp(map);

447:   ao->N       = N;
448:   ao->n       = map->n;
449:   aomems->map = map;

451:   /* create distributed indices app_loc: petsc->app and petsc_loc: app->petsc */
452:   n_local = map->n;
453:   PetscMalloc2(n_local, &aomems->app_loc,n_local,&aomems->petsc_loc);
454:   PetscLogObjectMemory((PetscObject)ao,2*n_local*sizeof(PetscInt));
455:   PetscMemzero(aomems->app_loc,n_local*sizeof(PetscInt));
456:   PetscMemzero(aomems->petsc_loc,n_local*sizeof(PetscInt));
457:   ISGetIndices(isapp,&myapp);

459:   AOCreateMemoryScalable_private(comm,napp,petsc,myapp,ao,aomems->app_loc);
460:   AOCreateMemoryScalable_private(comm,napp,myapp,petsc,ao,aomems->petsc_loc);

462:   ISRestoreIndices(isapp,&myapp);
463:   if (napp) {
464:     if (ispetsc) {
465:       ISRestoreIndices(ispetsc,&mypetsc);
466:     } else {
467:       PetscFree(petsc);
468:     }
469:   }
470:   PetscFree2(lens,disp);
471:   return(0);
472: }

474: /*@C
475:    AOCreateMemoryScalable - Creates a memory scalable application ordering using two integer arrays.

477:    Collective on MPI_Comm

479:    Input Parameters:
480: +  comm - MPI communicator that is to share AO
481: .  napp - size of integer arrays
482: .  myapp - integer array that defines an ordering
483: -  mypetsc - integer array that defines another ordering (may be NULL to
484:              indicate the natural ordering, that is 0,1,2,3,...)

486:    Output Parameter:
487: .  aoout - the new application ordering

489:    Level: beginner

491:     Notes: The arrays myapp and mypetsc must contain the all the integers 0 to napp-1 with no duplicates; that is there cannot be any "holes"
492:            in the indices. Use AOCreateMapping() or AOCreateMappingIS() if you wish to have "holes" in the indices.
493:            Comparing with AOCreateBasic(), this routine trades memory with message communication.

495: .keywords: AO, create

497: .seealso: AOCreateMemoryScalableIS(), AODestroy(), AOPetscToApplication(), AOApplicationToPetsc()
498: @*/
499: PetscErrorCode AOCreateMemoryScalable(MPI_Comm comm,PetscInt napp,const PetscInt myapp[],const PetscInt mypetsc[],AO *aoout)
500: {
502:   IS             isapp,ispetsc;
503:   const PetscInt *app=myapp,*petsc=mypetsc;

506:   ISCreateGeneral(comm,napp,app,PETSC_USE_POINTER,&isapp);
507:   if (mypetsc) {
508:     ISCreateGeneral(comm,napp,petsc,PETSC_USE_POINTER,&ispetsc);
509:   } else {
510:     ispetsc = NULL;
511:   }
512:   AOCreateMemoryScalableIS(isapp,ispetsc,aoout);
513:   ISDestroy(&isapp);
514:   if (mypetsc) {
515:     ISDestroy(&ispetsc);
516:   }
517:   return(0);
518: }

520: /*@C
521:    AOCreateMemoryScalableIS - Creates a memory scalable application ordering using two index sets.

523:    Collective on IS

525:    Input Parameters:
526: +  isapp - index set that defines an ordering
527: -  ispetsc - index set that defines another ordering (may be NULL to use the
528:              natural ordering)

530:    Output Parameter:
531: .  aoout - the new application ordering

533:    Level: beginner

535:     Notes: The index sets isapp and ispetsc must contain the all the integers 0 to napp-1 (where napp is the length of the index sets) with no duplicates;
536:            that is there cannot be any "holes".
537:            Comparing with AOCreateBasicIS(), this routine trades memory with message communication.
538: .keywords: AO, create

540: .seealso: AOCreateMemoryScalable(),  AODestroy()
541: @*/
542: PetscErrorCode  AOCreateMemoryScalableIS(IS isapp,IS ispetsc,AO *aoout)
543: {
545:   MPI_Comm       comm;
546:   AO             ao;

549:   PetscObjectGetComm((PetscObject)isapp,&comm);
550:   AOCreate(comm,&ao);
551:   AOSetIS(ao,isapp,ispetsc);
552:   AOSetType(ao,AOMEMORYSCALABLE);
553:   AOViewFromOptions(ao,NULL,"-ao_view");
554:   *aoout = ao;
555:   return(0);
556: }