Actual source code: aomemscalable.c
petsc-3.9.4 2018-09-11
2: /*
3: The memory scalable AO application ordering routines. These store the
4: local orderings on each processor.
5: */
7: #include <../src/vec/is/ao/aoimpl.h>
9: typedef struct {
10: PetscInt *app_loc; /* app_loc[i] is the partner for the ith local PETSc slot */
11: PetscInt *petsc_loc; /* petsc_loc[j] is the partner for the jth local app slot */
12: PetscLayout map; /* determines the local sizes of ao */
13: } AO_MemoryScalable;
15: /*
16: All processors have the same data so processor 1 prints it
17: */
18: PetscErrorCode AOView_MemoryScalable(AO ao,PetscViewer viewer)
19: {
20: PetscErrorCode ierr;
21: PetscMPIInt rank,size;
22: AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
23: PetscBool iascii;
24: PetscMPIInt tag_app,tag_petsc;
25: PetscLayout map = aomems->map;
26: PetscInt *app,*app_loc,*petsc,*petsc_loc,len,i,j;
27: MPI_Status status;
30: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
31: if (!iascii) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not supported for AO MemoryScalable",((PetscObject)viewer)->type_name);
33: MPI_Comm_rank(PetscObjectComm((PetscObject)ao),&rank);
34: MPI_Comm_size(PetscObjectComm((PetscObject)ao),&size);
36: PetscObjectGetNewTag((PetscObject)ao,&tag_app);
37: PetscObjectGetNewTag((PetscObject)ao,&tag_petsc);
39: if (!rank) {
40: PetscViewerASCIIPrintf(viewer,"Number of elements in ordering %D\n",ao->N);
41: PetscViewerASCIIPrintf(viewer, "PETSc->App App->PETSc\n");
43: PetscMalloc2(map->N,&app,map->N,&petsc);
44: len = map->n;
45: /* print local AO */
46: PetscViewerASCIIPrintf(viewer,"Process [%D]\n",rank);
47: for (i=0; i<len; i++) {
48: PetscViewerASCIIPrintf(viewer,"%3D %3D %3D %3D\n",i,aomems->app_loc[i],i,aomems->petsc_loc[i]);
49: }
51: /* recv and print off-processor's AO */
52: for (i=1; i<size; i++) {
53: len = map->range[i+1] - map->range[i];
54: app_loc = app + map->range[i];
55: petsc_loc = petsc+ map->range[i];
56: MPI_Recv(app_loc,(PetscMPIInt)len,MPIU_INT,i,tag_app,PetscObjectComm((PetscObject)ao),&status);
57: MPI_Recv(petsc_loc,(PetscMPIInt)len,MPIU_INT,i,tag_petsc,PetscObjectComm((PetscObject)ao),&status);
58: PetscViewerASCIIPrintf(viewer,"Process [%D]\n",i);
59: for (j=0; j<len; j++) {
60: PetscViewerASCIIPrintf(viewer,"%3D %3D %3D %3D\n",map->range[i]+j,app_loc[j],map->range[i]+j,petsc_loc[j]);
61: }
62: }
63: PetscFree2(app,petsc);
65: } else {
66: /* send values */
67: MPI_Send((void*)aomems->app_loc,map->n,MPIU_INT,0,tag_app,PetscObjectComm((PetscObject)ao));
68: MPI_Send((void*)aomems->petsc_loc,map->n,MPIU_INT,0,tag_petsc,PetscObjectComm((PetscObject)ao));
69: }
70: PetscViewerFlush(viewer);
71: return(0);
72: }
74: PetscErrorCode AODestroy_MemoryScalable(AO ao)
75: {
76: AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
77: PetscErrorCode ierr;
80: PetscFree2(aomems->app_loc,aomems->petsc_loc);
81: PetscLayoutDestroy(&aomems->map);
82: PetscFree(aomems);
83: return(0);
84: }
86: /*
87: Input Parameters:
88: + ao - the application ordering context
89: . n - the number of integers in ia[]
90: . ia - the integers; these are replaced with their mapped value
91: - maploc - app_loc or petsc_loc in struct "AO_MemoryScalable"
93: Output Parameter:
94: . ia - the mapped interges
95: */
96: PetscErrorCode AOMap_MemoryScalable_private(AO ao,PetscInt n,PetscInt *ia,PetscInt *maploc)
97: {
98: PetscErrorCode ierr;
99: AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
100: MPI_Comm comm;
101: PetscMPIInt rank,size,tag1,tag2;
102: PetscInt *owner,*start,*sizes,nsends,nreceives;
103: PetscInt nmax,count,*sindices,*rindices,i,j,idx,lastidx,*sindices2,*rindices2;
104: PetscInt *owners = aomems->map->range;
105: MPI_Request *send_waits,*recv_waits,*send_waits2,*recv_waits2;
106: MPI_Status recv_status;
107: PetscMPIInt nindices,source,widx;
108: PetscInt *rbuf,*sbuf;
109: MPI_Status *send_status,*send_status2;
112: PetscObjectGetComm((PetscObject)ao,&comm);
113: MPI_Comm_rank(comm,&rank);
114: MPI_Comm_size(comm,&size);
116: /* first count number of contributors to each processor */
117: PetscMalloc2(2*size,&sizes,size,&start);
118: PetscMemzero(sizes,2*size*sizeof(PetscInt));
119: PetscCalloc1(n,&owner);
121: j = 0;
122: lastidx = -1;
123: for (i=0; i<n; i++) {
124: /* if indices are NOT locally sorted, need to start search at the beginning */
125: if (lastidx > (idx = ia[i])) j = 0;
126: lastidx = idx;
127: for (; j<size; j++) {
128: if (idx >= owners[j] && idx < owners[j+1]) {
129: sizes[2*j]++; /* num of indices to be sent */
130: sizes[2*j+1] = 1; /* send to proc[j] */
131: owner[i] = j;
132: break;
133: }
134: }
135: }
136: sizes[2*rank]=sizes[2*rank+1]=0; /* do not receive from self! */
137: nsends = 0;
138: for (i=0; i<size; i++) nsends += sizes[2*i+1];
140: /* inform other processors of number of messages and max length*/
141: PetscMaxSum(comm,sizes,&nmax,&nreceives);
143: /* allocate arrays */
144: PetscObjectGetNewTag((PetscObject)ao,&tag1);
145: PetscObjectGetNewTag((PetscObject)ao,&tag2);
147: PetscMalloc2(nreceives*nmax,&rindices,nreceives,&recv_waits);
148: PetscMalloc2(nsends*nmax,&rindices2,nsends,&recv_waits2);
150: PetscMalloc3(n,&sindices,nsends,&send_waits,nsends,&send_status);
151: PetscMalloc3(n,&sindices2,nreceives,&send_waits2,nreceives,&send_status2);
153: /* post 1st receives: receive others requests
154: since we don't know how long each individual message is we
155: allocate the largest needed buffer for each receive. Potentially
156: this is a lot of wasted space.
157: */
158: for (i=0,count=0; i<nreceives; i++) {
159: MPI_Irecv(rindices+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,recv_waits+count++);
160: }
162: /* do 1st sends:
163: 1) starts[i] gives the starting index in svalues for stuff going to
164: the ith processor
165: */
166: start[0] = 0;
167: for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2];
168: for (i=0; i<n; i++) {
169: j = owner[i];
170: if (j != rank) {
171: sindices[start[j]++] = ia[i];
172: } else { /* compute my own map */
173: if (ia[i] >= owners[rank] && ia[i] < owners[rank+1]) {
174: ia[i] = maploc[ia[i]-owners[rank]];
175: } else {
176: ia[i] = -1; /* ia[i] is not in the range of 0 and N-1, maps it to -1 */
177: }
178: }
179: }
181: start[0] = 0;
182: for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2];
183: for (i=0,count=0; i<size; i++) {
184: if (sizes[2*i+1]) {
185: /* send my request to others */
186: MPI_Isend(sindices+start[i],sizes[2*i],MPIU_INT,i,tag1,comm,send_waits+count);
187: /* post receive for the answer of my request */
188: MPI_Irecv(sindices2+start[i],sizes[2*i],MPIU_INT,i,tag2,comm,recv_waits2+count);
189: count++;
190: }
191: }
192: if (nsends != count) SETERRQ2(comm,PETSC_ERR_SUP,"nsends %d != count %d",nsends,count);
194: /* wait on 1st sends */
195: if (nsends) {
196: MPI_Waitall(nsends,send_waits,send_status);
197: }
199: /* 1st recvs: other's requests */
200: for (j=0; j< nreceives; j++) {
201: MPI_Waitany(nreceives,recv_waits,&widx,&recv_status); /* idx: index of handle for operation that completed */
202: MPI_Get_count(&recv_status,MPIU_INT,&nindices);
203: rbuf = rindices+nmax*widx; /* global index */
204: source = recv_status.MPI_SOURCE;
206: /* compute mapping */
207: sbuf = rbuf;
208: for (i=0; i<nindices; i++) sbuf[i] = maploc[rbuf[i]-owners[rank]];
210: /* send mapping back to the sender */
211: MPI_Isend(sbuf,nindices,MPIU_INT,source,tag2,comm,send_waits2+widx);
212: }
214: /* wait on 2nd sends */
215: if (nreceives) {
216: MPI_Waitall(nreceives,send_waits2,send_status2);
217: }
219: /* 2nd recvs: for the answer of my request */
220: for (j=0; j< nsends; j++) {
221: MPI_Waitany(nsends,recv_waits2,&widx,&recv_status);
222: MPI_Get_count(&recv_status,MPIU_INT,&nindices);
223: source = recv_status.MPI_SOURCE;
224: /* pack output ia[] */
225: rbuf = sindices2+start[source];
226: count = 0;
227: for (i=0; i<n; i++) {
228: if (source == owner[i]) ia[i] = rbuf[count++];
229: }
230: }
232: /* free arrays */
233: PetscFree2(sizes,start);
234: PetscFree(owner);
235: PetscFree2(rindices,recv_waits);
236: PetscFree2(rindices2,recv_waits2);
237: PetscFree3(sindices,send_waits,send_status);
238: PetscFree3(sindices2,send_waits2,send_status2);
239: return(0);
240: }
242: PetscErrorCode AOPetscToApplication_MemoryScalable(AO ao,PetscInt n,PetscInt *ia)
243: {
244: PetscErrorCode ierr;
245: AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
246: PetscInt *app_loc = aomems->app_loc;
249: AOMap_MemoryScalable_private(ao,n,ia,app_loc);
250: return(0);
251: }
253: PetscErrorCode AOApplicationToPetsc_MemoryScalable(AO ao,PetscInt n,PetscInt *ia)
254: {
255: PetscErrorCode ierr;
256: AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
257: PetscInt *petsc_loc = aomems->petsc_loc;
260: AOMap_MemoryScalable_private(ao,n,ia,petsc_loc);
261: return(0);
262: }
264: static struct _AOOps AOOps_MemoryScalable = {
265: AOView_MemoryScalable,
266: AODestroy_MemoryScalable,
267: AOPetscToApplication_MemoryScalable,
268: AOApplicationToPetsc_MemoryScalable,
269: 0,
270: 0,
271: 0,
272: 0
273: };
275: PetscErrorCode AOCreateMemoryScalable_private(MPI_Comm comm,PetscInt napp,const PetscInt from_array[],const PetscInt to_array[],AO ao, PetscInt *aomap_loc)
276: {
277: PetscErrorCode ierr;
278: AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
279: PetscLayout map = aomems->map;
280: PetscInt n_local = map->n,i,j;
281: PetscMPIInt rank,size,tag;
282: PetscInt *owner,*start,*sizes,nsends,nreceives;
283: PetscInt nmax,count,*sindices,*rindices,idx,lastidx;
284: PetscInt *owners = aomems->map->range;
285: MPI_Request *send_waits,*recv_waits;
286: MPI_Status recv_status;
287: PetscMPIInt nindices,widx;
288: PetscInt *rbuf;
289: PetscInt n=napp,ip,ia;
290: MPI_Status *send_status;
293: PetscMemzero(aomap_loc,n_local*sizeof(PetscInt));
295: MPI_Comm_rank(comm,&rank);
296: MPI_Comm_size(comm,&size);
298: /* first count number of contributors (of from_array[]) to each processor */
299: PetscCalloc1(2*size,&sizes);
300: PetscMalloc1(n,&owner);
302: j = 0;
303: lastidx = -1;
304: for (i=0; i<n; i++) {
305: /* if indices are NOT locally sorted, need to start search at the beginning */
306: if (lastidx > (idx = from_array[i])) j = 0;
307: lastidx = idx;
308: for (; j<size; j++) {
309: if (idx >= owners[j] && idx < owners[j+1]) {
310: sizes[2*j] += 2; /* num of indices to be sent - in pairs (ip,ia) */
311: sizes[2*j+1] = 1; /* send to proc[j] */
312: owner[i] = j;
313: break;
314: }
315: }
316: }
317: sizes[2*rank]=sizes[2*rank+1]=0; /* do not receive from self! */
318: nsends = 0;
319: for (i=0; i<size; i++) nsends += sizes[2*i+1];
321: /* inform other processors of number of messages and max length*/
322: PetscMaxSum(comm,sizes,&nmax,&nreceives);
324: /* allocate arrays */
325: PetscObjectGetNewTag((PetscObject)ao,&tag);
326: PetscMalloc2(nreceives*nmax,&rindices,nreceives,&recv_waits);
327: PetscMalloc3(2*n,&sindices,nsends,&send_waits,nsends,&send_status);
328: PetscMalloc1(size,&start);
330: /* post receives: */
331: for (i=0; i<nreceives; i++) {
332: MPI_Irecv(rindices+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
333: }
335: /* do sends:
336: 1) starts[i] gives the starting index in svalues for stuff going to
337: the ith processor
338: */
339: start[0] = 0;
340: for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2];
341: for (i=0; i<n; i++) {
342: j = owner[i];
343: if (j != rank) {
344: ip = from_array[i];
345: ia = to_array[i];
346: sindices[start[j]++] = ip;
347: sindices[start[j]++] = ia;
348: } else { /* compute my own map */
349: ip = from_array[i] - owners[rank];
350: ia = to_array[i];
351: aomap_loc[ip] = ia;
352: }
353: }
355: start[0] = 0;
356: for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2];
357: for (i=0,count=0; i<size; i++) {
358: if (sizes[2*i+1]) {
359: MPI_Isend(sindices+start[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count);
360: count++;
361: }
362: }
363: if (nsends != count) SETERRQ2(comm,PETSC_ERR_SUP,"nsends %d != count %d",nsends,count);
365: /* wait on sends */
366: if (nsends) {
367: MPI_Waitall(nsends,send_waits,send_status);
368: }
370: /* recvs */
371: count=0;
372: for (j= nreceives; j>0; j--) {
373: MPI_Waitany(nreceives,recv_waits,&widx,&recv_status);
374: MPI_Get_count(&recv_status,MPIU_INT,&nindices);
375: rbuf = rindices+nmax*widx; /* global index */
377: /* compute local mapping */
378: for (i=0; i<nindices; i+=2) { /* pack aomap_loc */
379: ip = rbuf[i] - owners[rank]; /* local index */
380: ia = rbuf[i+1];
381: aomap_loc[ip] = ia;
382: }
383: count++;
384: }
386: PetscFree(start);
387: PetscFree3(sindices,send_waits,send_status);
388: PetscFree2(rindices,recv_waits);
389: PetscFree(owner);
390: PetscFree(sizes);
391: return(0);
392: }
394: PETSC_EXTERN PetscErrorCode AOCreate_MemoryScalable(AO ao)
395: {
396: PetscErrorCode ierr;
397: IS isapp=ao->isapp,ispetsc=ao->ispetsc;
398: const PetscInt *mypetsc,*myapp;
399: PetscInt napp,n_local,N,i,start,*petsc,*lens,*disp;
400: MPI_Comm comm;
401: AO_MemoryScalable *aomems;
402: PetscLayout map;
403: PetscMPIInt size,rank;
406: if (!isapp) SETERRQ(PetscObjectComm((PetscObject)ao),PETSC_ERR_ARG_WRONGSTATE,"AOSetIS() must be called before AOSetType()");
407: /* create special struct aomems */
408: PetscNewLog(ao,&aomems);
409: ao->data = (void*) aomems;
410: PetscMemcpy(ao->ops,&AOOps_MemoryScalable,sizeof(struct _AOOps));
411: PetscObjectChangeTypeName((PetscObject)ao,AOMEMORYSCALABLE);
413: /* transmit all local lengths of isapp to all processors */
414: PetscObjectGetComm((PetscObject)isapp,&comm);
415: MPI_Comm_size(comm, &size);
416: MPI_Comm_rank(comm, &rank);
417: PetscMalloc2(size,&lens,size,&disp);
418: ISGetLocalSize(isapp,&napp);
419: MPI_Allgather(&napp, 1, MPIU_INT, lens, 1, MPIU_INT, comm);
421: N = 0;
422: for (i = 0; i < size; i++) {
423: disp[i] = N;
424: N += lens[i];
425: }
427: /* If ispetsc is 0 then use "natural" numbering */
428: if (napp) {
429: if (!ispetsc) {
430: start = disp[rank];
431: PetscMalloc1(napp+1, &petsc);
432: for (i=0; i<napp; i++) petsc[i] = start + i;
433: } else {
434: ISGetIndices(ispetsc,&mypetsc);
435: petsc = (PetscInt*)mypetsc;
436: }
437: } else {
438: petsc = NULL;
439: }
441: /* create a map with global size N - used to determine the local sizes of ao - shall we use local napp instead of N? */
442: PetscLayoutCreate(comm,&map);
443: map->bs = 1;
444: map->N = N;
445: PetscLayoutSetUp(map);
447: ao->N = N;
448: ao->n = map->n;
449: aomems->map = map;
451: /* create distributed indices app_loc: petsc->app and petsc_loc: app->petsc */
452: n_local = map->n;
453: PetscMalloc2(n_local, &aomems->app_loc,n_local,&aomems->petsc_loc);
454: PetscLogObjectMemory((PetscObject)ao,2*n_local*sizeof(PetscInt));
455: PetscMemzero(aomems->app_loc,n_local*sizeof(PetscInt));
456: PetscMemzero(aomems->petsc_loc,n_local*sizeof(PetscInt));
457: ISGetIndices(isapp,&myapp);
459: AOCreateMemoryScalable_private(comm,napp,petsc,myapp,ao,aomems->app_loc);
460: AOCreateMemoryScalable_private(comm,napp,myapp,petsc,ao,aomems->petsc_loc);
462: ISRestoreIndices(isapp,&myapp);
463: if (napp) {
464: if (ispetsc) {
465: ISRestoreIndices(ispetsc,&mypetsc);
466: } else {
467: PetscFree(petsc);
468: }
469: }
470: PetscFree2(lens,disp);
471: return(0);
472: }
474: /*@C
475: AOCreateMemoryScalable - Creates a memory scalable application ordering using two integer arrays.
477: Collective on MPI_Comm
479: Input Parameters:
480: + comm - MPI communicator that is to share AO
481: . napp - size of integer arrays
482: . myapp - integer array that defines an ordering
483: - mypetsc - integer array that defines another ordering (may be NULL to
484: indicate the natural ordering, that is 0,1,2,3,...)
486: Output Parameter:
487: . aoout - the new application ordering
489: Level: beginner
491: Notes: The arrays myapp and mypetsc must contain the all the integers 0 to napp-1 with no duplicates; that is there cannot be any "holes"
492: in the indices. Use AOCreateMapping() or AOCreateMappingIS() if you wish to have "holes" in the indices.
493: Comparing with AOCreateBasic(), this routine trades memory with message communication.
495: .keywords: AO, create
497: .seealso: AOCreateMemoryScalableIS(), AODestroy(), AOPetscToApplication(), AOApplicationToPetsc()
498: @*/
499: PetscErrorCode AOCreateMemoryScalable(MPI_Comm comm,PetscInt napp,const PetscInt myapp[],const PetscInt mypetsc[],AO *aoout)
500: {
502: IS isapp,ispetsc;
503: const PetscInt *app=myapp,*petsc=mypetsc;
506: ISCreateGeneral(comm,napp,app,PETSC_USE_POINTER,&isapp);
507: if (mypetsc) {
508: ISCreateGeneral(comm,napp,petsc,PETSC_USE_POINTER,&ispetsc);
509: } else {
510: ispetsc = NULL;
511: }
512: AOCreateMemoryScalableIS(isapp,ispetsc,aoout);
513: ISDestroy(&isapp);
514: if (mypetsc) {
515: ISDestroy(&ispetsc);
516: }
517: return(0);
518: }
520: /*@C
521: AOCreateMemoryScalableIS - Creates a memory scalable application ordering using two index sets.
523: Collective on IS
525: Input Parameters:
526: + isapp - index set that defines an ordering
527: - ispetsc - index set that defines another ordering (may be NULL to use the
528: natural ordering)
530: Output Parameter:
531: . aoout - the new application ordering
533: Level: beginner
535: Notes: The index sets isapp and ispetsc must contain the all the integers 0 to napp-1 (where napp is the length of the index sets) with no duplicates;
536: that is there cannot be any "holes".
537: Comparing with AOCreateBasicIS(), this routine trades memory with message communication.
538: .keywords: AO, create
540: .seealso: AOCreateMemoryScalable(), AODestroy()
541: @*/
542: PetscErrorCode AOCreateMemoryScalableIS(IS isapp,IS ispetsc,AO *aoout)
543: {
545: MPI_Comm comm;
546: AO ao;
549: PetscObjectGetComm((PetscObject)isapp,&comm);
550: AOCreate(comm,&ao);
551: AOSetIS(ao,isapp,ispetsc);
552: AOSetType(ao,AOMEMORYSCALABLE);
553: AOViewFromOptions(ao,NULL,"-ao_view");
554: *aoout = ao;
555: return(0);
556: }