Actual source code: sfalltoall.c

  1: #include <../src/vec/is/sf/impls/basic/allgatherv/sfallgatherv.h>
  2: #include <../src/vec/is/sf/impls/basic/allgather/sfallgather.h>
  3: #include <../src/vec/is/sf/impls/basic/gatherv/sfgatherv.h>

  5: /* Reuse the type. The difference is some fields (i.e., displs, recvcounts) are not used, which is not a big deal */
  6: typedef PetscSF_Allgatherv PetscSF_Alltoall;

  8: /*===================================================================================*/
  9: /*              Implementations of SF public APIs                                    */
 10: /*===================================================================================*/
 11: static PetscErrorCode PetscSFGetGraph_Alltoall(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
 12: {
 13:   PetscInt       i;

 15:   if (nroots)  *nroots  = sf->nroots;
 16:   if (nleaves) *nleaves = sf->nleaves;
 17:   if (ilocal)  *ilocal  = NULL; /* Contiguous local indices */
 18:   if (iremote) {
 19:     if (!sf->remote) {
 20:       PetscMalloc1(sf->nleaves,&sf->remote);
 21:       sf->remote_alloc = sf->remote;
 22:       for (i=0; i<sf->nleaves; i++) {
 23:         sf->remote[i].rank  = i;
 24:         sf->remote[i].index = i;
 25:       }
 26:     }
 27:     *iremote = sf->remote;
 28:   }
 29:   return 0;
 30: }

 32: static PetscErrorCode PetscSFBcastBegin_Alltoall(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
 33: {
 34:   PetscSFLink          link;
 35:   MPI_Comm             comm;
 36:   void                 *rootbuf = NULL,*leafbuf = NULL; /* buffer used by MPI */
 37:   MPI_Request          *req;

 39:   PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,PETSCSF_BCAST,&link);
 40:   PetscSFLinkPackRootData(sf,link,PETSCSF_REMOTE,rootdata);
 41:   PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf,link,PETSC_TRUE/* device2host before sending */);
 42:   PetscObjectGetComm((PetscObject)sf,&comm);
 43:   PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_../../../../../..2LEAF,&rootbuf,&leafbuf,&req,NULL);
 44:   PetscSFLinkSyncStreamBeforeCallMPI(sf,link,PETSCSF_../../../../../..2LEAF);
 45:   MPIU_Ialltoall(rootbuf,1,unit,leafbuf,1,unit,comm,req);
 46:   return 0;
 47: }

 49: static PetscErrorCode PetscSFReduceBegin_Alltoall(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
 50: {
 51:   PetscSFLink          link;
 52:   MPI_Comm             comm;
 53:   void                 *rootbuf = NULL,*leafbuf = NULL; /* buffer used by MPI */
 54:   MPI_Request          *req;

 56:   PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,PETSCSF_REDUCE,&link);
 57:   PetscSFLinkPackLeafData(sf,link,PETSCSF_REMOTE,leafdata);
 58:   PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf,link,PETSC_TRUE/* device2host before sending */);
 59:   PetscObjectGetComm((PetscObject)sf,&comm);
 60:   PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_LEAF2../../../../../..,&rootbuf,&leafbuf,&req,NULL);
 61:   PetscSFLinkSyncStreamBeforeCallMPI(sf,link,PETSCSF_LEAF2../../../../../..);
 62:   MPIU_Ialltoall(leafbuf,1,unit,rootbuf,1,unit,comm,req);
 63:   return 0;
 64: }

 66: static PetscErrorCode PetscSFCreateLocalSF_Alltoall(PetscSF sf,PetscSF *out)
 67: {
 68:   PetscInt       nroots = 1,nleaves = 1,*ilocal;
 69:   PetscSFNode    *iremote = NULL;
 70:   PetscSF        lsf;
 71:   PetscMPIInt    rank;

 73:   nroots  = 1;
 74:   nleaves = 1;
 75:   MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
 76:   PetscMalloc1(nleaves,&ilocal);
 77:   PetscMalloc1(nleaves,&iremote);
 78:   ilocal[0]        = rank;
 79:   iremote[0].rank  = 0;    /* rank in PETSC_COMM_SELF */
 80:   iremote[0].index = rank; /* LocalSF is an embedded SF. Indices are not remapped */

 82:   PetscSFCreate(PETSC_COMM_SELF,&lsf);
 83:   PetscSFSetGraph(lsf,nroots,nleaves,NULL/*contiguous leaves*/,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);
 84:   PetscSFSetUp(lsf);
 85:   *out = lsf;
 86:   return 0;
 87: }

 89: static PetscErrorCode PetscSFCreateEmbeddedRootSF_Alltoall(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
 90: {
 91:   PetscInt       i,*tmproots,*ilocal,ndranks,ndiranks;
 92:   PetscSFNode    *iremote;
 93:   PetscMPIInt    nroots,*roots,nleaves,*leaves,rank;
 94:   MPI_Comm       comm;
 95:   PetscSF_Basic  *bas;
 96:   PetscSF        esf;

 98:   PetscObjectGetComm((PetscObject)sf,&comm);
 99:   MPI_Comm_rank(comm,&rank);

101:   /* Uniq selected[] and store the result in roots[] */
102:   PetscMalloc1(nselected,&tmproots);
103:   PetscArraycpy(tmproots,selected,nselected);
104:   PetscSortRemoveDupsInt(&nselected,tmproots); /* nselected might be changed */
106:   nroots = nselected;   /* For Alltoall, we know root indices will not overflow MPI_INT */
107:   PetscMalloc1(nselected,&roots);
108:   for (i=0; i<nselected; i++) roots[i] = tmproots[i];
109:   PetscFree(tmproots);

111:   /* Find out which leaves are still connected to roots in the embedded sf. Expect PetscCommBuildTwoSided is more scalable than MPI_Alltoall */
112:   PetscCommBuildTwoSided(comm,0/*empty msg*/,MPI_INT/*fake*/,nroots,roots,NULL/*todata*/,&nleaves,&leaves,NULL/*fromdata*/);

114:   /* Move myself ahead if rank is in leaves[], since I am a distinguished rank */
115:   ndranks = 0;
116:   for (i=0; i<nleaves; i++) {
117:     if (leaves[i] == rank) {leaves[i] = -rank; ndranks = 1; break;}
118:   }
119:   PetscSortMPIInt(nleaves,leaves);
120:   if (nleaves && leaves[0] < 0) leaves[0] = rank;

122:   /* Build esf and fill its fields manually (without calling PetscSFSetUp) */
123:   PetscMalloc1(nleaves,&ilocal);
124:   PetscMalloc1(nleaves,&iremote);
125:   for (i=0; i<nleaves; i++) { /* 1:1 map from roots to leaves */
126:     ilocal[i]        = leaves[i];
127:     iremote[i].rank  = leaves[i];
128:     iremote[i].index = leaves[i];
129:   }
130:   PetscSFCreate(comm,&esf);
131:   PetscSFSetType(esf,PETSCSFBASIC); /* This optimized routine can only create a basic sf */
132:   PetscSFSetGraph(esf,sf->nleaves,nleaves,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);

134:   /* As if we are calling PetscSFSetUpRanks(esf,self's group) */
135:   PetscMalloc4(nleaves,&esf->ranks,nleaves+1,&esf->roffset,nleaves,&esf->rmine,nleaves,&esf->rremote);
136:   esf->nranks     = nleaves;
137:   esf->ndranks    = ndranks;
138:   esf->roffset[0] = 0;
139:   for (i=0; i<nleaves; i++) {
140:     esf->ranks[i]     = leaves[i];
141:     esf->roffset[i+1] = i+1;
142:     esf->rmine[i]     = leaves[i];
143:     esf->rremote[i]   = leaves[i];
144:   }

146:   /* Set up esf->data, the incoming communication (i.e., recv info), which is usually done by PetscSFSetUp_Basic */
147:   bas  = (PetscSF_Basic*)esf->data;
148:   PetscMalloc2(nroots,&bas->iranks,nroots+1,&bas->ioffset);
149:   PetscMalloc1(nroots,&bas->irootloc);
150:   /* Move myself ahead if rank is in roots[], since I am a distinguished irank */
151:   ndiranks = 0;
152:   for (i=0; i<nroots; i++) {
153:     if (roots[i] == rank) {roots[i] = -rank; ndiranks = 1; break;}
154:   }
155:   PetscSortMPIInt(nroots,roots);
156:   if (nroots && roots[0] < 0) roots[0] = rank;

158:   bas->niranks    = nroots;
159:   bas->ndiranks   = ndiranks;
160:   bas->ioffset[0] = 0;
161:   bas->itotal     = nroots;
162:   for (i=0; i<nroots; i++) {
163:     bas->iranks[i]    = roots[i];
164:     bas->ioffset[i+1] = i+1;
165:     bas->irootloc[i]  = roots[i];
166:   }

168:   /* See PetscSFCreateEmbeddedRootSF_Basic */
169:   esf->nleafreqs  = esf->nranks - esf->ndranks;
170:   bas->nrootreqs  = bas->niranks - bas->ndiranks;
171:   esf->persistent = PETSC_TRUE;
172:   /* Setup packing related fields */
173:   PetscSFSetUpPackFields(esf);

175:   esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */
176:   *newsf = esf;
177:   return 0;
178: }

180: PETSC_INTERN PetscErrorCode PetscSFCreate_Alltoall(PetscSF sf)
181: {
182:   PetscSF_Alltoall *dat = (PetscSF_Alltoall*)sf->data;

184:   sf->ops->BcastEnd        = PetscSFBcastEnd_Basic;
185:   sf->ops->ReduceEnd       = PetscSFReduceEnd_Basic;

187:   /* Inherit from Allgatherv. It is astonishing Alltoall can inherit so much from Allgather(v) */
188:   sf->ops->Destroy         = PetscSFDestroy_Allgatherv;
189:   sf->ops->Reset           = PetscSFReset_Allgatherv;
190:   sf->ops->FetchAndOpEnd   = PetscSFFetchAndOpEnd_Allgatherv;
191:   sf->ops->GetRootRanks    = PetscSFGetRootRanks_Allgatherv;

193:   /* Inherit from Allgather. Every process gathers equal-sized data from others, which enables this inheritance. */
194:   sf->ops->GetLeafRanks    = PetscSFGetLeafRanks_Allgatherv;
195:   sf->ops->SetUp           = PetscSFSetUp_Allgather;

197:   /* Inherit from Gatherv. Each root has only one leaf connected, which enables this inheritance */
198:   sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Gatherv;

200:   /* Alltoall stuff */
201:   sf->ops->GetGraph             = PetscSFGetGraph_Alltoall;
202:   sf->ops->BcastBegin           = PetscSFBcastBegin_Alltoall;
203:   sf->ops->ReduceBegin          = PetscSFReduceBegin_Alltoall;
204:   sf->ops->CreateLocalSF        = PetscSFCreateLocalSF_Alltoall;
205:   sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Alltoall;

207:   PetscNewLog(sf,&dat);
208:   sf->data = (void*)dat;
209:   return 0;
210: }