Actual source code: vpscat_mpi1.h
petsc-3.14.6 2021-03-30
2: /*
3: Defines the methods VecScatterBegin/End_1,2,......
4: This is included by vpscat.c with different values for BS
6: This is a terrible way of doing "templates" in C.
7: */
8: #define PETSCMAP1_a(a,b) a ## _ ## b
9: #define PETSCMAP1_b(a,b) PETSCMAP1_a(a,b)
10: #define PETSCMAP1(a) PETSCMAP1_b(a,BS)
12: PetscErrorCode PETSCMAP1(VecScatterBeginMPI1)(VecScatter ctx,Vec xin,Vec yin,InsertMode addv,ScatterMode mode)
13: {
14: VecScatter_MPI_General *to,*from;
15: const PetscScalar *xv;
16: PetscScalar *yv,*svalues;
17: MPI_Request *rwaits,*swaits;
18: PetscErrorCode ierr;
19: PetscInt i,*indices,*sstarts,nrecvs,nsends,bs;
20: #if defined(PETSC_HAVE_CUDA)
21: PetscBool is_cudatype = PETSC_FALSE;
22: #endif
25: /* If xin != yin, lock xin for read-only access; otherwise, we need to lock xin (yin) for read/write access once we get its data */
26: if (xin != yin) {VecLockReadPush(xin);}
28: if (mode & SCATTER_REVERSE) {
29: to = (VecScatter_MPI_General*)ctx->fromdata;
30: from = (VecScatter_MPI_General*)ctx->todata;
31: rwaits = from->rev_requests;
32: swaits = to->rev_requests;
33: } else {
34: to = (VecScatter_MPI_General*)ctx->todata;
35: from = (VecScatter_MPI_General*)ctx->fromdata;
36: rwaits = from->requests;
37: swaits = to->requests;
38: }
39: bs = to->bs;
40: svalues = to->values;
41: nrecvs = from->n;
42: nsends = to->n;
43: indices = to->indices;
44: sstarts = to->starts;
45: #if defined(PETSC_HAVE_CUDA)
46: PetscObjectTypeCompareAny((PetscObject)xin,&is_cudatype,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
47: if (is_cudatype) {
48: VecCUDAAllocateCheckHost(xin);
49: if (xin->offloadmask == PETSC_OFFLOAD_GPU) {
50: if (xin->spptr && ctx->spptr) {
51: VecCUDACopyFromGPUSome_Public(xin,(PetscCUDAIndices)ctx->spptr,mode);
52: } else {
53: VecCUDACopyFromGPU(xin);
54: }
55: }
56: xv = *((PetscScalar**)xin->data);
57: } else
58: #endif
59: {
60: VecGetArrayRead(xin,(const PetscScalar**)&xv);
61: }
63: if (xin != yin) {VecGetArray(yin,&yv);}
64: else yv = (PetscScalar *)xv;
65: VecLockWriteSet_Private(yin,PETSC_TRUE);
67: ctx->xdata = xv;
68: ctx->ydata = yv;
70: if (!(mode & SCATTER_LOCAL)) {
71: /* post receives since they were not previously posted */
72: MPI_Startall_irecv(from->starts[nrecvs]*bs,MPIU_SCALAR,nrecvs,rwaits);
74: /* this version packs and sends one at a time */
75: for (i=0; i<nsends; i++) {
76: if (to->memcpy_plan.optimized[i]) { /* use memcpy instead of indivisual load/store */
77: VecScatterMemcpyPlanExecute_Pack(i,xv,&to->memcpy_plan,svalues+bs*sstarts[i],INSERT_VALUES,bs);
78: } else {
79: PETSCMAP1(Pack_MPI1)(sstarts[i+1]-sstarts[i],indices + sstarts[i],xv,svalues + bs*sstarts[i],bs);
80: }
81: MPI_Start_isend((sstarts[i+1]-sstarts[i])*bs,MPIU_SCALAR,swaits+i);
82: }
83: }
85: /* take care of local scatters */
86: if (to->local.n) {
87: if (to->local.memcpy_plan.optimized[0] && addv == INSERT_VALUES) {
88: /* do copy when it is not a self-to-self copy */
89: if (!(xv == yv && to->local.memcpy_plan.same_copy_starts)) { VecScatterMemcpyPlanExecute_Scatter(0,xv,&to->local.memcpy_plan,yv,&from->local.memcpy_plan,addv); }
90: } else if (to->local.memcpy_plan.optimized[0]) {
91: VecScatterMemcpyPlanExecute_Scatter(0,xv,&to->local.memcpy_plan,yv,&from->local.memcpy_plan,addv);
92: } else {
93: if (xv == yv && addv == INSERT_VALUES && to->local.nonmatching_computed) {
94: /* only copy entries that do not share identical memory locations */
95: PETSCMAP1(Scatter_MPI1)(to->local.n_nonmatching,to->local.slots_nonmatching,xv,from->local.slots_nonmatching,yv,addv,bs);
96: } else {
97: PETSCMAP1(Scatter_MPI1)(to->local.n,to->local.vslots,xv,from->local.vslots,yv,addv,bs);
98: }
99: }
100: }
101: return(0);
102: }
104: /* --------------------------------------------------------------------------------------*/
106: PetscErrorCode PETSCMAP1(VecScatterEndMPI1)(VecScatter ctx,Vec xin,Vec yin,InsertMode addv,ScatterMode mode)
107: {
108: VecScatter_MPI_General *to,*from;
109: PetscScalar *rvalues,*yv;
110: PetscErrorCode ierr;
111: PetscInt nrecvs,nsends,*indices,count,*rstarts,bs;
112: PetscMPIInt imdex;
113: MPI_Request *rwaits,*swaits;
114: MPI_Status xrstatus,*sstatus;
117: if (mode & SCATTER_LOCAL) goto functionend;
118: yv = ctx->ydata;
119: to = (VecScatter_MPI_General*)ctx->todata;
120: from = (VecScatter_MPI_General*)ctx->fromdata;
121: rwaits = from->requests;
122: swaits = to->requests;
123: sstatus = to->sstatus; /* sstatus and rstatus are always stored in to */
124: if (mode & SCATTER_REVERSE) {
125: to = (VecScatter_MPI_General*)ctx->fromdata;
126: from = (VecScatter_MPI_General*)ctx->todata;
127: rwaits = from->rev_requests;
128: swaits = to->rev_requests;
129: }
130: bs = from->bs;
131: rvalues = from->values;
132: nrecvs = from->n;
133: nsends = to->n;
134: indices = from->indices;
135: rstarts = from->starts;
137: /* unpack one at a time */
138: count = nrecvs;
139: while (count) {
140: MPI_Waitany(nrecvs,rwaits,&imdex,&xrstatus);
141: /* unpack receives into our local space */
142: if (from->memcpy_plan.optimized[imdex]) {
143: VecScatterMemcpyPlanExecute_Unpack(imdex,rvalues+bs*rstarts[imdex],yv,&from->memcpy_plan,addv,bs);
144: } else {
145: PETSCMAP1(UnPack_MPI1)(rstarts[imdex+1] - rstarts[imdex],rvalues + bs*rstarts[imdex],indices + rstarts[imdex],yv,addv,bs);
146: }
147: count--;
148: }
150: /* wait on sends */
151: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
153: functionend:
154: if (xin != yin) {
155: VecRestoreArrayRead(xin,&ctx->xdata);
156: VecLockReadPop(xin);
157: }
158: VecRestoreArray(yin,&ctx->ydata);
159: VecLockWriteSet_Private(yin,PETSC_FALSE);
160: return(0);
161: }
163: #undef PETSCMAP1_a
164: #undef PETSCMAP1_b
165: #undef PETSCMAP1
166: #undef BS