Actual source code: vpscat_mpi1.h

petsc-3.14.6 2021-03-30
Report Typos and Errors

  2: /*
  3:      Defines the methods VecScatterBegin/End_1,2,......
  4:      This is included by vpscat.c with different values for BS

  6:      This is a terrible way of doing "templates" in C.
  7: */
  8: #define PETSCMAP1_a(a,b)  a ## _ ## b
  9: #define PETSCMAP1_b(a,b)  PETSCMAP1_a(a,b)
 10: #define PETSCMAP1(a)      PETSCMAP1_b(a,BS)

 12: PetscErrorCode PETSCMAP1(VecScatterBeginMPI1)(VecScatter ctx,Vec xin,Vec yin,InsertMode addv,ScatterMode mode)
 13: {
 14:   VecScatter_MPI_General *to,*from;
 15:   const PetscScalar      *xv;
 16:   PetscScalar            *yv,*svalues;
 17:   MPI_Request            *rwaits,*swaits;
 18:   PetscErrorCode         ierr;
 19:   PetscInt               i,*indices,*sstarts,nrecvs,nsends,bs;
 20: #if defined(PETSC_HAVE_CUDA)
 21:   PetscBool              is_cudatype = PETSC_FALSE;
 22: #endif

 25:   /* If xin != yin, lock xin for read-only access; otherwise, we need to lock xin (yin) for read/write access once we get its data */
 26:   if (xin != yin) {VecLockReadPush(xin);}

 28:   if (mode & SCATTER_REVERSE) {
 29:     to     = (VecScatter_MPI_General*)ctx->fromdata;
 30:     from   = (VecScatter_MPI_General*)ctx->todata;
 31:     rwaits = from->rev_requests;
 32:     swaits = to->rev_requests;
 33:   } else {
 34:     to     = (VecScatter_MPI_General*)ctx->todata;
 35:     from   = (VecScatter_MPI_General*)ctx->fromdata;
 36:     rwaits = from->requests;
 37:     swaits = to->requests;
 38:   }
 39:   bs      = to->bs;
 40:   svalues = to->values;
 41:   nrecvs  = from->n;
 42:   nsends  = to->n;
 43:   indices = to->indices;
 44:   sstarts = to->starts;
 45: #if defined(PETSC_HAVE_CUDA)
 46:   PetscObjectTypeCompareAny((PetscObject)xin,&is_cudatype,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
 47:   if (is_cudatype) {
 48:     VecCUDAAllocateCheckHost(xin);
 49:     if (xin->offloadmask == PETSC_OFFLOAD_GPU) {
 50:       if (xin->spptr && ctx->spptr) {
 51:         VecCUDACopyFromGPUSome_Public(xin,(PetscCUDAIndices)ctx->spptr,mode);
 52:       } else {
 53:         VecCUDACopyFromGPU(xin);
 54:       }
 55:     }
 56:     xv = *((PetscScalar**)xin->data);
 57:   } else
 58: #endif
 59:   {
 60:     VecGetArrayRead(xin,(const PetscScalar**)&xv);
 61:   }

 63:   if (xin != yin) {VecGetArray(yin,&yv);}
 64:   else yv = (PetscScalar *)xv;
 65:   VecLockWriteSet_Private(yin,PETSC_TRUE);

 67:   ctx->xdata = xv;
 68:   ctx->ydata = yv;

 70:   if (!(mode & SCATTER_LOCAL)) {
 71:     /* post receives since they were not previously posted    */
 72:     MPI_Startall_irecv(from->starts[nrecvs]*bs,MPIU_SCALAR,nrecvs,rwaits);

 74:     /* this version packs and sends one at a time */
 75:     for (i=0; i<nsends; i++) {
 76:       if (to->memcpy_plan.optimized[i]) { /* use memcpy instead of indivisual load/store */
 77:         VecScatterMemcpyPlanExecute_Pack(i,xv,&to->memcpy_plan,svalues+bs*sstarts[i],INSERT_VALUES,bs);
 78:       } else {
 79:         PETSCMAP1(Pack_MPI1)(sstarts[i+1]-sstarts[i],indices + sstarts[i],xv,svalues + bs*sstarts[i],bs);
 80:       }
 81:       MPI_Start_isend((sstarts[i+1]-sstarts[i])*bs,MPIU_SCALAR,swaits+i);
 82:     }
 83:   }

 85:   /* take care of local scatters */
 86:   if (to->local.n) {
 87:     if (to->local.memcpy_plan.optimized[0] && addv == INSERT_VALUES) {
 88:       /* do copy when it is not a self-to-self copy */
 89:       if (!(xv == yv && to->local.memcpy_plan.same_copy_starts)) { VecScatterMemcpyPlanExecute_Scatter(0,xv,&to->local.memcpy_plan,yv,&from->local.memcpy_plan,addv); }
 90:     } else if (to->local.memcpy_plan.optimized[0]) {
 91:       VecScatterMemcpyPlanExecute_Scatter(0,xv,&to->local.memcpy_plan,yv,&from->local.memcpy_plan,addv);
 92:     } else {
 93:       if (xv == yv && addv == INSERT_VALUES && to->local.nonmatching_computed) {
 94:         /* only copy entries that do not share identical memory locations */
 95:         PETSCMAP1(Scatter_MPI1)(to->local.n_nonmatching,to->local.slots_nonmatching,xv,from->local.slots_nonmatching,yv,addv,bs);
 96:       } else {
 97:         PETSCMAP1(Scatter_MPI1)(to->local.n,to->local.vslots,xv,from->local.vslots,yv,addv,bs);
 98:       }
 99:     }
100:   }
101:   return(0);
102: }

104: /* --------------------------------------------------------------------------------------*/

106: PetscErrorCode PETSCMAP1(VecScatterEndMPI1)(VecScatter ctx,Vec xin,Vec yin,InsertMode addv,ScatterMode mode)
107: {
108:   VecScatter_MPI_General *to,*from;
109:   PetscScalar            *rvalues,*yv;
110:   PetscErrorCode         ierr;
111:   PetscInt               nrecvs,nsends,*indices,count,*rstarts,bs;
112:   PetscMPIInt            imdex;
113:   MPI_Request            *rwaits,*swaits;
114:   MPI_Status             xrstatus,*sstatus;

117:   if (mode & SCATTER_LOCAL) goto functionend;
118:   yv      = ctx->ydata;
119:   to      = (VecScatter_MPI_General*)ctx->todata;
120:   from    = (VecScatter_MPI_General*)ctx->fromdata;
121:   rwaits  = from->requests;
122:   swaits  = to->requests;
123:   sstatus = to->sstatus;    /* sstatus and rstatus are always stored in to */
124:   if (mode & SCATTER_REVERSE) {
125:     to     = (VecScatter_MPI_General*)ctx->fromdata;
126:     from   = (VecScatter_MPI_General*)ctx->todata;
127:     rwaits = from->rev_requests;
128:     swaits = to->rev_requests;
129:   }
130:   bs      = from->bs;
131:   rvalues = from->values;
132:   nrecvs  = from->n;
133:   nsends  = to->n;
134:   indices = from->indices;
135:   rstarts = from->starts;

137:   /* unpack one at a time */
138:   count = nrecvs;
139:   while (count) {
140:     MPI_Waitany(nrecvs,rwaits,&imdex,&xrstatus);
141:     /* unpack receives into our local space */
142:     if (from->memcpy_plan.optimized[imdex]) {
143:       VecScatterMemcpyPlanExecute_Unpack(imdex,rvalues+bs*rstarts[imdex],yv,&from->memcpy_plan,addv,bs);
144:     } else {
145:       PETSCMAP1(UnPack_MPI1)(rstarts[imdex+1] - rstarts[imdex],rvalues + bs*rstarts[imdex],indices + rstarts[imdex],yv,addv,bs);
146:     }
147:     count--;
148:   }

150:   /* wait on sends */
151:   if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}

153: functionend:
154:   if (xin != yin) {
155:     VecRestoreArrayRead(xin,&ctx->xdata);
156:     VecLockReadPop(xin);
157:   }
158:   VecRestoreArray(yin,&ctx->ydata);
159:   VecLockWriteSet_Private(yin,PETSC_FALSE);
160:   return(0);
161: }

163: #undef PETSCMAP1_a
164: #undef PETSCMAP1_b
165: #undef PETSCMAP1
166: #undef BS