Actual source code: aomemscalable.c
petsc-3.13.6 2020-09-29
2: /*
3: The memory scalable AO Section 1.5 Writing Application Codes with PETSc ordering routines. These store the
4: orderings on each processor for that processor's range of values
5: */
7: #include <../src/vec/is/ao/aoimpl.h>
9: typedef struct {
10: PetscInt *app_loc; /* app_loc[i] is the partner for the ith local PETSc slot */
11: PetscInt *petsc_loc; /* petsc_loc[j] is the partner for the jth local app slot */
12: PetscLayout map; /* determines the local sizes of ao */
13: } AO_MemoryScalable;
15: /*
16: All processors ship the data to process 0 to be printed; note that this is not scalable because
17: process 0 allocates space for all the orderings entry across all the processes
18: */
19: PetscErrorCode AOView_MemoryScalable(AO ao,PetscViewer viewer)
20: {
21: PetscErrorCode ierr;
22: PetscMPIInt rank,size;
23: AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
24: PetscBool iascii;
25: PetscMPIInt tag_app,tag_petsc;
26: PetscLayout map = aomems->map;
27: PetscInt *app,*app_loc,*petsc,*petsc_loc,len,i,j;
28: MPI_Status status;
31: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
32: if (!iascii) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not supported for AO MemoryScalable",((PetscObject)viewer)->type_name);
34: MPI_Comm_rank(PetscObjectComm((PetscObject)ao),&rank);
35: MPI_Comm_size(PetscObjectComm((PetscObject)ao),&size);
37: PetscObjectGetNewTag((PetscObject)ao,&tag_app);
38: PetscObjectGetNewTag((PetscObject)ao,&tag_petsc);
40: if (!rank) {
41: PetscViewerASCIIPrintf(viewer,"Number of elements in ordering %D\n",ao->N);
42: PetscViewerASCIIPrintf(viewer, "PETSc->App App->PETSc\n");
44: PetscMalloc2(map->N,&app,map->N,&petsc);
45: len = map->n;
46: /* print local AO */
47: PetscViewerASCIIPrintf(viewer,"Process [%d]\n",rank);
48: for (i=0; i<len; i++) {
49: PetscViewerASCIIPrintf(viewer,"%3D %3D %3D %3D\n",i,aomems->app_loc[i],i,aomems->petsc_loc[i]);
50: }
52: /* recv and print off-processor's AO */
53: for (i=1; i<size; i++) {
54: len = map->range[i+1] - map->range[i];
55: app_loc = app + map->range[i];
56: petsc_loc = petsc+ map->range[i];
57: MPI_Recv(app_loc,(PetscMPIInt)len,MPIU_INT,i,tag_app,PetscObjectComm((PetscObject)ao),&status);
58: MPI_Recv(petsc_loc,(PetscMPIInt)len,MPIU_INT,i,tag_petsc,PetscObjectComm((PetscObject)ao),&status);
59: PetscViewerASCIIPrintf(viewer,"Process [%D]\n",i);
60: for (j=0; j<len; j++) {
61: PetscViewerASCIIPrintf(viewer,"%3D %3D %3D %3D\n",map->range[i]+j,app_loc[j],map->range[i]+j,petsc_loc[j]);
62: }
63: }
64: PetscFree2(app,petsc);
66: } else {
67: /* send values */
68: MPI_Send((void*)aomems->app_loc,map->n,MPIU_INT,0,tag_app,PetscObjectComm((PetscObject)ao));
69: MPI_Send((void*)aomems->petsc_loc,map->n,MPIU_INT,0,tag_petsc,PetscObjectComm((PetscObject)ao));
70: }
71: PetscViewerFlush(viewer);
72: return(0);
73: }
75: PetscErrorCode AODestroy_MemoryScalable(AO ao)
76: {
77: AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
78: PetscErrorCode ierr;
81: PetscFree2(aomems->app_loc,aomems->petsc_loc);
82: PetscLayoutDestroy(&aomems->map);
83: PetscFree(aomems);
84: return(0);
85: }
87: /*
88: Input Parameters:
89: + ao - the Section 1.5 Writing Application Codes with PETSc ordering context
90: . n - the number of integers in ia[]
91: . ia - the integers; these are replaced with their mapped value
92: - maploc - app_loc or petsc_loc in struct "AO_MemoryScalable"
94: Output Parameter:
95: . ia - the mapped interges
96: */
97: PetscErrorCode AOMap_MemoryScalable_private(AO ao,PetscInt n,PetscInt *ia,const PetscInt *maploc)
98: {
99: PetscErrorCode ierr;
100: AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
101: MPI_Comm comm;
102: PetscMPIInt rank,size,tag1,tag2;
103: PetscInt *owner,*start,*sizes,nsends,nreceives;
104: PetscInt nmax,count,*sindices,*rindices,i,j,idx,lastidx,*sindices2,*rindices2;
105: const PetscInt *owners = aomems->map->range;
106: MPI_Request *send_waits,*recv_waits,*send_waits2,*recv_waits2;
107: MPI_Status recv_status;
108: PetscMPIInt nindices,source,widx;
109: PetscInt *rbuf,*sbuf;
110: MPI_Status *send_status,*send_status2;
113: PetscObjectGetComm((PetscObject)ao,&comm);
114: MPI_Comm_rank(comm,&rank);
115: MPI_Comm_size(comm,&size);
117: /* first count number of contributors to each processor */
118: PetscMalloc1(size,&start);
119: PetscCalloc2(2*size,&sizes,n,&owner);
121: j = 0;
122: lastidx = -1;
123: for (i=0; i<n; i++) {
124: if (ia[i] < 0) owner[i] = -1; /* mark negative entries (which are not to be mapped) with a special negative value */
125: if (ia[i] >= ao->N) owner[i] = -2; /* mark out of range entries with special negative value */
126: else {
127: /* if indices are NOT locally sorted, need to start search at the beginning */
128: if (lastidx > (idx = ia[i])) j = 0;
129: lastidx = idx;
130: for (; j<size; j++) {
131: if (idx >= owners[j] && idx < owners[j+1]) {
132: sizes[2*j]++; /* num of indices to be sent */
133: sizes[2*j+1] = 1; /* send to proc[j] */
134: owner[i] = j;
135: break;
136: }
137: }
138: }
139: }
140: sizes[2*rank]=sizes[2*rank+1]=0; /* do not receive from self! */
141: nsends = 0;
142: for (i=0; i<size; i++) nsends += sizes[2*i+1];
144: /* inform other processors of number of messages and max length*/
145: PetscMaxSum(comm,sizes,&nmax,&nreceives);
147: /* allocate arrays */
148: PetscObjectGetNewTag((PetscObject)ao,&tag1);
149: PetscObjectGetNewTag((PetscObject)ao,&tag2);
151: PetscMalloc2(nreceives*nmax,&rindices,nreceives,&recv_waits);
152: PetscMalloc2(nsends*nmax,&rindices2,nsends,&recv_waits2);
154: PetscMalloc3(n,&sindices,nsends,&send_waits,nsends,&send_status);
155: PetscMalloc3(n,&sindices2,nreceives,&send_waits2,nreceives,&send_status2);
157: /* post 1st receives: receive others requests
158: since we don't know how long each individual message is we
159: allocate the largest needed buffer for each receive. Potentially
160: this is a lot of wasted space.
161: */
162: for (i=0,count=0; i<nreceives; i++) {
163: MPI_Irecv(rindices+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,recv_waits+count++);
164: }
166: /* do 1st sends:
167: 1) starts[i] gives the starting index in svalues for stuff going to
168: the ith processor
169: */
170: start[0] = 0;
171: for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2];
172: for (i=0; i<n; i++) {
173: j = owner[i];
174: if (j == -1) continue; /* do not remap negative entries in ia[] */
175: else if (j == -2) { /* out of range entries get mapped to -1 */
176: ia[i] = -1;
177: continue;
178: } else if (j != rank) {
179: sindices[start[j]++] = ia[i];
180: } else { /* compute my own map */
181: ia[i] = maploc[ia[i]-owners[rank]];
182: }
183: }
185: start[0] = 0;
186: for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2];
187: for (i=0,count=0; i<size; i++) {
188: if (sizes[2*i+1]) {
189: /* send my request to others */
190: MPI_Isend(sindices+start[i],sizes[2*i],MPIU_INT,i,tag1,comm,send_waits+count);
191: /* post receive for the answer of my request */
192: MPI_Irecv(sindices2+start[i],sizes[2*i],MPIU_INT,i,tag2,comm,recv_waits2+count);
193: count++;
194: }
195: }
196: if (nsends != count) SETERRQ2(comm,PETSC_ERR_SUP,"nsends %d != count %d",nsends,count);
198: /* wait on 1st sends */
199: if (nsends) {
200: MPI_Waitall(nsends,send_waits,send_status);
201: }
203: /* 1st recvs: other's requests */
204: for (j=0; j< nreceives; j++) {
205: MPI_Waitany(nreceives,recv_waits,&widx,&recv_status); /* idx: index of handle for operation that completed */
206: MPI_Get_count(&recv_status,MPIU_INT,&nindices);
207: rbuf = rindices+nmax*widx; /* global index */
208: source = recv_status.MPI_SOURCE;
210: /* compute mapping */
211: sbuf = rbuf;
212: for (i=0; i<nindices; i++) sbuf[i] = maploc[rbuf[i]-owners[rank]];
214: /* send mapping back to the sender */
215: MPI_Isend(sbuf,nindices,MPIU_INT,source,tag2,comm,send_waits2+widx);
216: }
218: /* wait on 2nd sends */
219: if (nreceives) {
220: MPI_Waitall(nreceives,send_waits2,send_status2);
221: }
223: /* 2nd recvs: for the answer of my request */
224: for (j=0; j< nsends; j++) {
225: MPI_Waitany(nsends,recv_waits2,&widx,&recv_status);
226: MPI_Get_count(&recv_status,MPIU_INT,&nindices);
227: source = recv_status.MPI_SOURCE;
228: /* pack output ia[] */
229: rbuf = sindices2+start[source];
230: count = 0;
231: for (i=0; i<n; i++) {
232: if (source == owner[i]) ia[i] = rbuf[count++];
233: }
234: }
236: /* free arrays */
237: PetscFree(start);
238: PetscFree2(sizes,owner);
239: PetscFree2(rindices,recv_waits);
240: PetscFree2(rindices2,recv_waits2);
241: PetscFree3(sindices,send_waits,send_status);
242: PetscFree3(sindices2,send_waits2,send_status2);
243: return(0);
244: }
246: PetscErrorCode AOPetscToApplication_MemoryScalable(AO ao,PetscInt n,PetscInt *ia)
247: {
248: PetscErrorCode ierr;
249: AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
250: PetscInt *app_loc = aomems->app_loc;
253: AOMap_MemoryScalable_private(ao,n,ia,app_loc);
254: return(0);
255: }
257: PetscErrorCode AOApplicationToPetsc_MemoryScalable(AO ao,PetscInt n,PetscInt *ia)
258: {
259: PetscErrorCode ierr;
260: AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
261: PetscInt *petsc_loc = aomems->petsc_loc;
264: AOMap_MemoryScalable_private(ao,n,ia,petsc_loc);
265: return(0);
266: }
268: static struct _AOOps AOOps_MemoryScalable = {
269: AOView_MemoryScalable,
270: AODestroy_MemoryScalable,
271: AOPetscToApplication_MemoryScalable,
272: AOApplicationToPetsc_MemoryScalable,
273: 0,
274: 0,
275: 0,
276: 0
277: };
279: PetscErrorCode AOCreateMemoryScalable_private(MPI_Comm comm,PetscInt napp,const PetscInt from_array[],const PetscInt to_array[],AO ao, PetscInt *aomap_loc)
280: {
281: PetscErrorCode ierr;
282: AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
283: PetscLayout map = aomems->map;
284: PetscInt n_local = map->n,i,j;
285: PetscMPIInt rank,size,tag;
286: PetscInt *owner,*start,*sizes,nsends,nreceives;
287: PetscInt nmax,count,*sindices,*rindices,idx,lastidx;
288: PetscInt *owners = aomems->map->range;
289: MPI_Request *send_waits,*recv_waits;
290: MPI_Status recv_status;
291: PetscMPIInt nindices,widx;
292: PetscInt *rbuf;
293: PetscInt n=napp,ip,ia;
294: MPI_Status *send_status;
297: PetscArrayzero(aomap_loc,n_local);
299: MPI_Comm_rank(comm,&rank);
300: MPI_Comm_size(comm,&size);
302: /* first count number of contributors (of from_array[]) to each processor */
303: PetscCalloc1(2*size,&sizes);
304: PetscMalloc1(n,&owner);
306: j = 0;
307: lastidx = -1;
308: for (i=0; i<n; i++) {
309: /* if indices are NOT locally sorted, need to start search at the beginning */
310: if (lastidx > (idx = from_array[i])) j = 0;
311: lastidx = idx;
312: for (; j<size; j++) {
313: if (idx >= owners[j] && idx < owners[j+1]) {
314: sizes[2*j] += 2; /* num of indices to be sent - in pairs (ip,ia) */
315: sizes[2*j+1] = 1; /* send to proc[j] */
316: owner[i] = j;
317: break;
318: }
319: }
320: }
321: sizes[2*rank]=sizes[2*rank+1]=0; /* do not receive from self! */
322: nsends = 0;
323: for (i=0; i<size; i++) nsends += sizes[2*i+1];
325: /* inform other processors of number of messages and max length*/
326: PetscMaxSum(comm,sizes,&nmax,&nreceives);
328: /* allocate arrays */
329: PetscObjectGetNewTag((PetscObject)ao,&tag);
330: PetscMalloc2(nreceives*nmax,&rindices,nreceives,&recv_waits);
331: PetscMalloc3(2*n,&sindices,nsends,&send_waits,nsends,&send_status);
332: PetscMalloc1(size,&start);
334: /* post receives: */
335: for (i=0; i<nreceives; i++) {
336: MPI_Irecv(rindices+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
337: }
339: /* do sends:
340: 1) starts[i] gives the starting index in svalues for stuff going to
341: the ith processor
342: */
343: start[0] = 0;
344: for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2];
345: for (i=0; i<n; i++) {
346: j = owner[i];
347: if (j != rank) {
348: ip = from_array[i];
349: ia = to_array[i];
350: sindices[start[j]++] = ip;
351: sindices[start[j]++] = ia;
352: } else { /* compute my own map */
353: ip = from_array[i] - owners[rank];
354: ia = to_array[i];
355: aomap_loc[ip] = ia;
356: }
357: }
359: start[0] = 0;
360: for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2];
361: for (i=0,count=0; i<size; i++) {
362: if (sizes[2*i+1]) {
363: MPI_Isend(sindices+start[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count);
364: count++;
365: }
366: }
367: if (nsends != count) SETERRQ2(comm,PETSC_ERR_SUP,"nsends %d != count %d",nsends,count);
369: /* wait on sends */
370: if (nsends) {
371: MPI_Waitall(nsends,send_waits,send_status);
372: }
374: /* recvs */
375: count=0;
376: for (j= nreceives; j>0; j--) {
377: MPI_Waitany(nreceives,recv_waits,&widx,&recv_status);
378: MPI_Get_count(&recv_status,MPIU_INT,&nindices);
379: rbuf = rindices+nmax*widx; /* global index */
381: /* compute local mapping */
382: for (i=0; i<nindices; i+=2) { /* pack aomap_loc */
383: ip = rbuf[i] - owners[rank]; /* local index */
384: ia = rbuf[i+1];
385: aomap_loc[ip] = ia;
386: }
387: count++;
388: }
390: PetscFree(start);
391: PetscFree3(sindices,send_waits,send_status);
392: PetscFree2(rindices,recv_waits);
393: PetscFree(owner);
394: PetscFree(sizes);
395: return(0);
396: }
398: PETSC_EXTERN PetscErrorCode AOCreate_MemoryScalable(AO ao)
399: {
400: PetscErrorCode ierr;
401: IS isapp=ao->isapp,ispetsc=ao->ispetsc;
402: const PetscInt *mypetsc,*myapp;
403: PetscInt napp,n_local,N,i,start,*petsc,*lens,*disp;
404: MPI_Comm comm;
405: AO_MemoryScalable *aomems;
406: PetscLayout map;
407: PetscMPIInt size,rank;
410: if (!isapp) SETERRQ(PetscObjectComm((PetscObject)ao),PETSC_ERR_ARG_WRONGSTATE,"AOSetIS() must be called before AOSetType()");
411: /* create special struct aomems */
412: PetscNewLog(ao,&aomems);
413: ao->data = (void*) aomems;
414: PetscMemcpy(ao->ops,&AOOps_MemoryScalable,sizeof(struct _AOOps));
415: PetscObjectChangeTypeName((PetscObject)ao,AOMEMORYSCALABLE);
417: /* transmit all local lengths of isapp to all processors */
418: PetscObjectGetComm((PetscObject)isapp,&comm);
419: MPI_Comm_size(comm, &size);
420: MPI_Comm_rank(comm, &rank);
421: PetscMalloc2(size,&lens,size,&disp);
422: ISGetLocalSize(isapp,&napp);
423: MPI_Allgather(&napp, 1, MPIU_INT, lens, 1, MPIU_INT, comm);
425: N = 0;
426: for (i = 0; i < size; i++) {
427: disp[i] = N;
428: N += lens[i];
429: }
431: /* If ispetsc is 0 then use "natural" numbering */
432: if (napp) {
433: if (!ispetsc) {
434: start = disp[rank];
435: PetscMalloc1(napp+1, &petsc);
436: for (i=0; i<napp; i++) petsc[i] = start + i;
437: } else {
438: ISGetIndices(ispetsc,&mypetsc);
439: petsc = (PetscInt*)mypetsc;
440: }
441: } else {
442: petsc = NULL;
443: }
445: /* create a map with global size N - used to determine the local sizes of ao - shall we use local napp instead of N? */
446: PetscLayoutCreate(comm,&map);
447: map->bs = 1;
448: map->N = N;
449: PetscLayoutSetUp(map);
451: ao->N = N;
452: ao->n = map->n;
453: aomems->map = map;
455: /* create distributed indices app_loc: petsc->app and petsc_loc: app->petsc */
456: n_local = map->n;
457: PetscCalloc2(n_local, &aomems->app_loc,n_local,&aomems->petsc_loc);
458: PetscLogObjectMemory((PetscObject)ao,2*n_local*sizeof(PetscInt));
459: ISGetIndices(isapp,&myapp);
461: AOCreateMemoryScalable_private(comm,napp,petsc,myapp,ao,aomems->app_loc);
462: AOCreateMemoryScalable_private(comm,napp,myapp,petsc,ao,aomems->petsc_loc);
464: ISRestoreIndices(isapp,&myapp);
465: if (napp) {
466: if (ispetsc) {
467: ISRestoreIndices(ispetsc,&mypetsc);
468: } else {
469: PetscFree(petsc);
470: }
471: }
472: PetscFree2(lens,disp);
473: return(0);
474: }
476: /*@C
477: AOCreateMemoryScalable - Creates a memory scalable Section 1.5 Writing Application Codes with PETSc ordering using two integer arrays.
479: Collective
481: Input Parameters:
482: + comm - MPI communicator that is to share AO
483: . napp - size of integer arrays
484: . myapp - integer array that defines an ordering
485: - mypetsc - integer array that defines another ordering (may be NULL to
486: indicate the natural ordering, that is 0,1,2,3,...)
488: Output Parameter:
489: . aoout - the new Section 1.5 Writing Application Codes with PETSc ordering
491: Level: beginner
493: Notes:
494: The arrays myapp and mypetsc must contain the all the integers 0 to napp-1 with no duplicates; that is there cannot be any "holes"
495: in the indices. Use AOCreateMapping() or AOCreateMappingIS() if you wish to have "holes" in the indices.
496: Comparing with AOCreateBasic(), this routine trades memory with message communication.
498: .seealso: AOCreateMemoryScalableIS(), AODestroy(), AOPetscToApplication(), AOApplicationToPetsc()
499: @*/
500: PetscErrorCode AOCreateMemoryScalable(MPI_Comm comm,PetscInt napp,const PetscInt myapp[],const PetscInt mypetsc[],AO *aoout)
501: {
503: IS isapp,ispetsc;
504: const PetscInt *app=myapp,*petsc=mypetsc;
507: ISCreateGeneral(comm,napp,app,PETSC_USE_POINTER,&isapp);
508: if (mypetsc) {
509: ISCreateGeneral(comm,napp,petsc,PETSC_USE_POINTER,&ispetsc);
510: } else {
511: ispetsc = NULL;
512: }
513: AOCreateMemoryScalableIS(isapp,ispetsc,aoout);
514: ISDestroy(&isapp);
515: if (mypetsc) {
516: ISDestroy(&ispetsc);
517: }
518: return(0);
519: }
521: /*@C
522: AOCreateMemoryScalableIS - Creates a memory scalable Section 1.5 Writing Application Codes with PETSc ordering using two index sets.
524: Collective on IS
526: Input Parameters:
527: + isapp - index set that defines an ordering
528: - ispetsc - index set that defines another ordering (may be NULL to use the
529: natural ordering)
531: Output Parameter:
532: . aoout - the new Section 1.5 Writing Application Codes with PETSc ordering
534: Level: beginner
536: Notes:
537: The index sets isapp and ispetsc must contain the all the integers 0 to napp-1 (where napp is the length of the index sets) with no duplicates;
538: that is there cannot be any "holes".
539: Comparing with AOCreateBasicIS(), this routine trades memory with message communication.
540: .seealso: AOCreateMemoryScalable(), AODestroy()
541: @*/
542: PetscErrorCode AOCreateMemoryScalableIS(IS isapp,IS ispetsc,AO *aoout)
543: {
545: MPI_Comm comm;
546: AO ao;
549: PetscObjectGetComm((PetscObject)isapp,&comm);
550: AOCreate(comm,&ao);
551: AOSetIS(ao,isapp,ispetsc);
552: AOSetType(ao,AOMEMORYSCALABLE);
553: AOViewFromOptions(ao,NULL,"-ao_view");
554: *aoout = ao;
555: return(0);
556: }