Actual source code: sfallgatherv.c
petsc-3.12.5 2020-03-29
1: #include <../src/vec/is/sf/impls/basic/allgatherv/sfallgatherv.h>
3: PETSC_INTERN PetscErrorCode PetscSFBcastAndOpBegin_Gatherv(PetscSF,MPI_Datatype,PetscMemType,const void*,PetscMemType,void*,MPI_Op);
5: /*===================================================================================*/
6: /* Internal routines for PetscSFPack */
7: /*===================================================================================*/
8: PETSC_INTERN PetscErrorCode PetscSFPackGet_Allgatherv(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,const void *leafdata,PetscSFPack *mylink)
9: {
10: PetscErrorCode ierr;
11: PetscSF_Allgatherv *dat = (PetscSF_Allgatherv*)sf->data;
12: PetscSFPack link,*p;
13: PetscBool match;
14: PetscInt i,j;
17: PetscSFPackSetErrorOnUnsupportedOverlap(sf,unit,rootdata,leafdata);
18: /* Look for types in cache */
19: for (p=&dat->avail; (link=*p); p=&link->next) {
20: MPIPetsc_Type_compare(unit,link->unit,&match);
21: if (match) {
22: *p = link->next; /* Remove from available list */
23: goto found;
24: }
25: }
27: PetscNew(&link);
28: PetscSFPackSetUp_Host(sf,link,unit);
29: link->nrootreqs = 1;
30: link->nleafreqs = 0;
31: PetscMalloc1(4,&link->reqs); /* 4 = (nrootreqs+nleafreqs)*4 */
32: for (i=0; i<4; i++) link->reqs[i] = MPI_REQUEST_NULL; /* Initialized to NULL so that we know which need to be freed in Destroy */
34: for (i=0; i<2; i++) {
35: for (j=0; j<2; j++) {
36: link->rootreqs[i][j] = link->reqs + (2*i+j);
37: link->leafreqs[i][j] = NULL; /* leaf requests are not needed. Make it NULL to segfault accident use */
38: }
39: }
41: #if defined(PETSC_HAVE_CUDA)
42: link->stream = NULL; /* CUDA default stream */
43: #endif
45: /* DO NOT allocate link->rootbuf[]/leafleaf[]. We use lazy allocation since these buffers are likely not needed */
46: found:
47: link->rootmtype = rootmtype;
48: link->leafmtype = leafmtype;
49: #if defined(PETSC_HAVE_CUDA)
50: if (rootmtype == PETSC_MEMTYPE_DEVICE || leafmtype == PETSC_MEMTYPE_DEVICE) {PetscSFPackSetUp_Device(sf,link,unit);}
51: #endif
52: link->rkey = rootdata;
53: link->lkey = leafdata;
54: link->next = dat->inuse;
55: dat->inuse = link;
57: *mylink = link;
58: return(0);
59: }
61: /*===================================================================================*/
62: /* Implementations of SF public APIs */
63: /*===================================================================================*/
65: /* PetscSFGetGraph is non-collective. An implementation should not have collective calls */
66: PETSC_INTERN PetscErrorCode PetscSFGetGraph_Allgatherv(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
67: {
69: PetscInt i,j,k;
70: const PetscInt *range;
71: PetscMPIInt size;
74: MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);
75: if (nroots) *nroots = sf->nroots;
76: if (nleaves) *nleaves = sf->nleaves;
77: if (ilocal) *ilocal = NULL; /* Contiguous leaves */
78: if (iremote) {
79: if (!sf->remote && sf->nleaves) { /* The && sf->nleaves makes sfgatherv able to inherit this routine */
80: PetscLayoutGetRanges(sf->map,&range);
81: PetscMalloc1(sf->nleaves,&sf->remote);
82: sf->remote_alloc = sf->remote;
83: for (i=0; i<size; i++) {
84: for (j=range[i],k=0; j<range[i+1]; j++,k++) {
85: sf->remote[j].rank = i;
86: sf->remote[j].index = k;
87: }
88: }
89: }
90: *iremote = sf->remote;
91: }
92: return(0);
93: }
95: PETSC_INTERN PetscErrorCode PetscSFSetUp_Allgatherv(PetscSF sf)
96: {
97: PetscErrorCode ierr;
98: PetscSF_Allgatherv *dat = (PetscSF_Allgatherv*)sf->data;
99: PetscMPIInt size;
100: PetscInt i;
101: const PetscInt *range;
104: MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);
105: if (sf->nleaves) { /* This if (sf->nleaves) test makes sfgatherv able to inherit this routine */
106: PetscMalloc1(size,&dat->recvcounts);
107: PetscMalloc1(size,&dat->displs);
108: PetscLayoutGetRanges(sf->map,&range);
110: for (i=0; i<size; i++) {
111: PetscMPIIntCast(range[i],&dat->displs[i]);
112: PetscMPIIntCast(range[i+1]-range[i],&dat->recvcounts[i]);
113: }
114: }
115: return(0);
116: }
118: PETSC_INTERN PetscErrorCode PetscSFReset_Allgatherv(PetscSF sf)
119: {
120: PetscErrorCode ierr;
121: PetscSF_Allgatherv *dat = (PetscSF_Allgatherv*)sf->data;
124: PetscFree(dat->iranks);
125: PetscFree(dat->ioffset);
126: PetscFree(dat->irootloc);
127: PetscFree(dat->recvcounts);
128: PetscFree(dat->displs);
129: if (dat->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed");
130: PetscSFPackDestroyAvailable(&dat->avail);
131: return(0);
132: }
134: PETSC_INTERN PetscErrorCode PetscSFDestroy_Allgatherv(PetscSF sf)
135: {
139: PetscSFReset_Allgatherv(sf);
140: PetscFree(sf->data);
141: return(0);
142: }
144: static PetscErrorCode PetscSFBcastAndOpBegin_Allgatherv(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
145: {
146: PetscErrorCode ierr;
147: PetscSFPack link;
148: PetscMPIInt sendcount;
149: MPI_Comm comm;
150: char *recvbuf;
151: PetscSF_Allgatherv *dat = (PetscSF_Allgatherv*)sf->data;
154: PetscSFPackGet_Allgatherv(sf,unit,rootmtype,rootdata,leafmtype,leafdata,&link);
155: PetscObjectGetComm((PetscObject)sf,&comm);
156: PetscMPIIntCast(sf->nroots,&sendcount);
158: if (op == MPIU_REPLACE) {recvbuf = (char*)leafdata;}
159: else {
160: if (!link->leafbuf[leafmtype]) {PetscMallocWithMemType(leafmtype,sf->nleaves*link->unitbytes,(void**)&link->leafbuf[leafmtype]);}
161: recvbuf = link->leafbuf[leafmtype];
162: }
163: MPIU_Iallgatherv(rootdata,sendcount,unit,recvbuf,dat->recvcounts,dat->displs,unit,comm,link->rootreqs[PETSCSF_../../../../../..2LEAF_BCAST][rootmtype]);
164: return(0);
165: }
167: PETSC_INTERN PetscErrorCode PetscSFBcastAndOpEnd_Allgatherv(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
168: {
169: PetscErrorCode ierr;
170: PetscSFPack link;
173: PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);
174: MPI_Wait(link->rootreqs[PETSCSF_../../../../../..2LEAF_BCAST][rootmtype],MPI_STATUS_IGNORE);
175: if (op != MPIU_REPLACE) {PetscSFUnpackAndOpLeafData(sf,link,NULL,leafdata,op,PETSC_FALSE);}
176: PetscSFPackReclaim(sf,&link);
177: return(0);
178: }
180: static PetscErrorCode PetscSFReduceBegin_Allgatherv(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
181: {
182: PetscErrorCode ierr;
183: PetscSFPack link;
184: PetscSF_Allgatherv *dat = (PetscSF_Allgatherv*)sf->data;
185: PetscInt rstart;
186: PetscMPIInt rank,count,recvcount;
187: MPI_Comm comm;
190: PetscSFPackGet_Allgatherv(sf,unit,rootmtype,rootdata,leafmtype,leafdata,&link);
191: PetscObjectGetComm((PetscObject)sf,&comm);
192: MPI_Comm_rank(comm,&rank);
194: if (op == MPIU_REPLACE) {
195: /* REPLACE is only meaningful when all processes have the same leafdata to readue. Therefore copy from local leafdata is fine */
196: PetscLayoutGetRange(sf->map,&rstart,NULL);
197: PetscMemcpyWithMemType(rootmtype,leafmtype,rootdata,(const char*)leafdata+(size_t)rstart*link->unitbytes,(size_t)sf->nroots*link->unitbytes);
198: } else {
199: /* Reduce all leafdata on rank 0, then scatter the result to root buffer, then reduce root buffer to leafdata */
200: if (!rank && !link->leafbuf[leafmtype]) {PetscMallocWithMemType(leafmtype,sf->nleaves*link->unitbytes,(void**)&link->leafbuf[leafmtype]);}
201: PetscMPIIntCast(sf->nleaves*link->bs,&count);
202: PetscMPIIntCast(sf->nroots,&recvcount);
203: MPI_Reduce(leafdata,link->leafbuf[leafmtype],count,link->basicunit,op,0,comm); /* Must do reduce with MPI builltin datatype basicunit */
204: if (!link->rootbuf[rootmtype]) {PetscMallocWithMemType(rootmtype,sf->nroots*link->unitbytes,(void**)&link->rootbuf[rootmtype]);}
205: MPIU_Iscatterv(link->leafbuf[leafmtype],dat->recvcounts,dat->displs,unit,link->rootbuf[rootmtype],recvcount,unit,0,comm,link->rootreqs[PETSCSF_LEAF2../../../../../.._REDUCE][rootmtype]);
206: }
207: return(0);
208: }
210: PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Allgatherv(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
211: {
212: PetscErrorCode ierr;
213: PetscSFPack link;
216: PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);
217: MPI_Wait(link->rootreqs[PETSCSF_LEAF2../../../../../.._REDUCE][rootmtype],MPI_STATUS_IGNORE);
218: if (op != MPIU_REPLACE) {PetscSFUnpackAndOpRootData(sf,link,NULL,rootdata,op,PETSC_FALSE);}
219: PetscSFPackReclaim(sf,&link);
220: return(0);
221: }
223: static PetscErrorCode PetscSFBcastToZero_Allgatherv(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata)
224: {
225: PetscErrorCode ierr;
226: PetscSFPack link;
229: PetscSFBcastAndOpBegin_Gatherv(sf,unit,rootmtype,rootdata,leafmtype,leafdata,MPIU_REPLACE);
230: /* A simplified PetscSFBcastAndOpEnd_Allgatherv */
231: PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);
232: MPI_Wait(link->rootreqs[PETSCSF_../../../../../..2LEAF_BCAST][rootmtype],MPI_STATUS_IGNORE);
233: PetscSFPackReclaim(sf,&link);
234: return(0);
235: }
237: /* This routine is very tricky (I believe it is rarely used with this kind of graph so just provide a simple but not-optimal implementation).
239: Suppose we have three ranks. Rank 0 has a root with value 1. Rank 0,1,2 has a leaf with value 2,3,4 respectively. The leaves are connected
240: to the root on rank 0. Suppose op=MPI_SUM and rank 0,1,2 gets root state in their rank order. By definition of this routine, rank 0 sees 1
241: in root, fetches it into its leafupate, then updates root to 1 + 2 = 3; rank 1 sees 3 in root, fetches it into its leafupate, then updates
242: root to 3 + 3 = 6; rank 2 sees 6 in root, fetches it into its leafupdate, then updates root to 6 + 4 = 10. At the end, leafupdate on rank
243: 0,1,2 is 1,3,6 respectively. root is 10.
245: One optimized implementation could be: starting from the initial state:
246: rank-0 rank-1 rank-2
247: Root 1
248: Leaf 2 3 4
250: Shift leaves rightwards to leafupdate. Rank 0 gathers the root value and puts it in leafupdate. We have:
251: rank-0 rank-1 rank-2
252: Root 1
253: Leaf 2 3 4
254: Leafupdate 1 2 3
256: Then, do MPI_Scan on leafupdate and get:
257: rank-0 rank-1 rank-2
258: Root 1
259: Leaf 2 3 4
260: Leafupdate 1 3 6
262: Rank 2 sums its leaf and leafupdate, scatters the result to the root, and gets
263: rank-0 rank-1 rank-2
264: Root 10
265: Leaf 2 3 4
266: Leafupdate 1 3 6
268: We use a simpler implementation. From the same initial state, we copy leafdata to leafupdate
269: rank-0 rank-1 rank-2
270: Root 1
271: Leaf 2 3 4
272: Leafupdate 2 3 4
274: Do MPI_Exscan on leafupdate,
275: rank-0 rank-1 rank-2
276: Root 1
277: Leaf 2 3 4
278: Leafupdate 2 2 5
280: BcastAndOp from root to leafupdate,
281: rank-0 rank-1 rank-2
282: Root 1
283: Leaf 2 3 4
284: Leafupdate 3 3 6
286: Copy root to leafupdate on rank-0
287: rank-0 rank-1 rank-2
288: Root 1
289: Leaf 2 3 4
290: Leafupdate 1 3 6
292: Reduce from leaf to root,
293: rank-0 rank-1 rank-2
294: Root 10
295: Leaf 2 3 4
296: Leafupdate 1 3 6
297: */
298: PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Allgatherv(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op)
299: {
300: PetscErrorCode ierr;
301: PetscSFPack link;
302: MPI_Comm comm;
303: PetscMPIInt count;
306: /* Copy leafdata to leafupdate */
307: PetscSFPackGet_Allgatherv(sf,unit,rootmtype,rootdata,leafmtype,leafdata,&link);
308: PetscMemcpyWithMemType(leafmtype,leafmtype,leafupdate,leafdata,sf->nleaves*link->unitbytes);
309: PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);
311: /* Exscan on leafupdate and then BcastAndOp rootdata to leafupdate */
312: PetscObjectGetComm((PetscObject)sf,&comm);
313: PetscMPIIntCast(sf->nleaves,&count);
314: if (op == MPIU_REPLACE) {
315: PetscMPIInt size,rank,prev,next;
316: MPI_Comm_rank(comm,&rank);
317: MPI_Comm_size(comm,&size);
318: prev = rank ? rank-1 : MPI_PROC_NULL;
319: next = (rank < size-1) ? rank+1 : MPI_PROC_NULL;
320: MPI_Sendrecv_replace(leafupdate,count,unit,next,link->tag,prev,link->tag,comm,MPI_STATUSES_IGNORE);
321: } else {MPI_Exscan(MPI_IN_PLACE,leafupdate,count,unit,op,comm);}
322: PetscSFPackReclaim(sf,&link);
323: PetscSFBcastAndOpBegin(sf,unit,rootdata,leafupdate,op);
324: PetscSFBcastAndOpEnd(sf,unit,rootdata,leafupdate,op);
326: /* Bcast roots to rank 0's leafupdate */
327: PetscSFBcastToZero_Private(sf,unit,rootdata,leafupdate); /* Using this line makes Allgather SFs able to inherit this routine */
329: /* Reduce leafdata to rootdata */
330: PetscSFReduceBegin(sf,unit,leafdata,rootdata,op);
331: return(0);
332: }
334: PETSC_INTERN PetscErrorCode PetscSFFetchAndOpEnd_Allgatherv(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op)
335: {
336: PetscErrorCode ierr;
339: PetscSFReduceEnd(sf,unit,leafdata,rootdata,op);
340: return(0);
341: }
343: /* Get root ranks accessing my leaves */
344: PETSC_INTERN PetscErrorCode PetscSFGetRootRanks_Allgatherv(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote)
345: {
347: PetscInt i,j,k,size;
348: const PetscInt *range;
351: /* Lazily construct these large arrays if users really need them for this type of SF. Very likely, they do not */
352: if (sf->nranks && !sf->ranks) { /* On rank!=0, sf->nranks=0. The sf->nranks test makes this routine also works for sfgatherv */
353: size = sf->nranks;
354: PetscLayoutGetRanges(sf->map,&range);
355: PetscMalloc4(size,&sf->ranks,size+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);
356: for (i=0; i<size; i++) sf->ranks[i] = i;
357: PetscArraycpy(sf->roffset,range,size+1);
358: for (i=0; i<sf->nleaves; i++) sf->rmine[i] = i; /*rmine are never NULL even for contiguous leaves */
359: for (i=0; i<size; i++) {
360: for (j=range[i],k=0; j<range[i+1]; j++,k++) sf->rremote[j] = k;
361: }
362: }
364: if (nranks) *nranks = sf->nranks;
365: if (ranks) *ranks = sf->ranks;
366: if (roffset) *roffset = sf->roffset;
367: if (rmine) *rmine = sf->rmine;
368: if (rremote) *rremote = sf->rremote;
369: return(0);
370: }
372: /* Get leaf ranks accessing my roots */
373: PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Allgatherv(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
374: {
375: PetscErrorCode ierr;
376: PetscSF_Allgatherv *dat = (PetscSF_Allgatherv*)sf->data;
377: MPI_Comm comm;
378: PetscMPIInt size,rank;
379: PetscInt i,j;
382: /* Lazily construct these large arrays if users really need them for this type of SF. Very likely, they do not */
383: PetscObjectGetComm((PetscObject)sf,&comm);
384: MPI_Comm_size(comm,&size);
385: MPI_Comm_rank(comm,&rank);
386: if (niranks) *niranks = size;
388: /* PetscSF_Basic has distinguished incoming ranks. Here we do not need that. But we must put self as the first and
389: sort other ranks. See comments in PetscSFSetUp_Basic about MatGetBrowsOfAoCols_MPIAIJ on why.
390: */
391: if (iranks) {
392: if (!dat->iranks) {
393: PetscMalloc1(size,&dat->iranks);
394: dat->iranks[0] = rank;
395: for (i=0,j=1; i<size; i++) {if (i == rank) continue; dat->iranks[j++] = i;}
396: }
397: *iranks = dat->iranks; /* dat->iranks was init'ed to NULL by PetscNewLog */
398: }
400: if (ioffset) {
401: if (!dat->ioffset) {
402: PetscMalloc1(size+1,&dat->ioffset);
403: for (i=0; i<=size; i++) dat->ioffset[i] = i*sf->nroots;
404: }
405: *ioffset = dat->ioffset;
406: }
408: if (irootloc) {
409: if (!dat->irootloc) {
410: PetscMalloc1(sf->nleaves,&dat->irootloc);
411: for (i=0; i<size; i++) {
412: for (j=0; j<sf->nroots; j++) dat->irootloc[i*sf->nroots+j] = j;
413: }
414: }
415: *irootloc = dat->irootloc;
416: }
417: return(0);
418: }
420: PETSC_INTERN PetscErrorCode PetscSFCreateLocalSF_Allgatherv(PetscSF sf,PetscSF *out)
421: {
422: PetscInt i,nroots,nleaves,rstart,*ilocal;
423: PetscSFNode *iremote;
424: PetscSF lsf;
428: nleaves = sf->nleaves ? sf->nroots : 0; /* sf->nleaves can be zero with SFGather(v) */
429: nroots = nleaves;
430: PetscMalloc1(nleaves,&ilocal);
431: PetscMalloc1(nleaves,&iremote);
432: PetscLayoutGetRange(sf->map,&rstart,NULL);
434: for (i=0; i<nleaves; i++) {
435: ilocal[i] = rstart + i; /* lsf does not change leave indices */
436: iremote[i].rank = 0; /* rank in PETSC_COMM_SELF */
437: iremote[i].index = i; /* root index */
438: }
440: PetscSFCreate(PETSC_COMM_SELF,&lsf);
441: PetscSFSetGraph(lsf,nroots,nleaves,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);
442: PetscSFSetUp(lsf);
443: *out = lsf;
444: return(0);
445: }
447: PETSC_INTERN PetscErrorCode PetscSFCreate_Allgatherv(PetscSF sf)
448: {
449: PetscErrorCode ierr;
450: PetscSF_Allgatherv *dat = (PetscSF_Allgatherv*)sf->data;
453: sf->ops->SetUp = PetscSFSetUp_Allgatherv;
454: sf->ops->Reset = PetscSFReset_Allgatherv;
455: sf->ops->Destroy = PetscSFDestroy_Allgatherv;
456: sf->ops->GetRootRanks = PetscSFGetRootRanks_Allgatherv;
457: sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Allgatherv;
458: sf->ops->GetGraph = PetscSFGetGraph_Allgatherv;
459: sf->ops->BcastAndOpBegin = PetscSFBcastAndOpBegin_Allgatherv;
460: sf->ops->BcastAndOpEnd = PetscSFBcastAndOpEnd_Allgatherv;
461: sf->ops->ReduceBegin = PetscSFReduceBegin_Allgatherv;
462: sf->ops->ReduceEnd = PetscSFReduceEnd_Allgatherv;
463: sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Allgatherv;
464: sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Allgatherv;
465: sf->ops->CreateLocalSF = PetscSFCreateLocalSF_Allgatherv;
466: sf->ops->BcastToZero = PetscSFBcastToZero_Allgatherv;
468: PetscNewLog(sf,&dat);
469: sf->data = (void*)dat;
470: return(0);
471: }