Actual source code: vpscat.h
petsc-3.14.6 2021-03-30
2: /*
3: Defines the methods VecScatterBegin/End_1,2,......
4: This is included by vpscat.c with different values for BS
6: This is a terrible way of doing "templates" in C.
7: */
8: #define PETSCMAP1_a(a,b) a ## _ ## b
9: #define PETSCMAP1_b(a,b) PETSCMAP1_a(a,b)
10: #define PETSCMAP1(a) PETSCMAP1_b(a,BS)
12: PetscErrorCode PETSCMAP1(VecScatterBegin)(VecScatter ctx,Vec xin,Vec yin,InsertMode addv,ScatterMode mode)
13: {
14: VecScatter_MPI_General *to,*from;
15: PetscScalar *xv,*yv,*svalues;
16: MPI_Request *rwaits,*swaits;
17: PetscErrorCode ierr;
18: PetscInt i,*indices,*sstarts,nrecvs,nsends,bs;
19: #if defined(PETSC_HAVE_CUDA)
20: PetscBool is_cudatype = PETSC_FALSE;
21: #endif
24: if (mode & SCATTER_REVERSE) {
25: to = (VecScatter_MPI_General*)ctx->fromdata;
26: from = (VecScatter_MPI_General*)ctx->todata;
27: rwaits = from->rev_requests;
28: swaits = to->rev_requests;
29: } else {
30: to = (VecScatter_MPI_General*)ctx->todata;
31: from = (VecScatter_MPI_General*)ctx->fromdata;
32: rwaits = from->requests;
33: swaits = to->requests;
34: }
35: bs = to->bs;
36: svalues = to->values;
37: nrecvs = from->n;
38: nsends = to->n;
39: indices = to->indices;
40: sstarts = to->starts;
41: #if defined(PETSC_HAVE_CUDA)
42: PetscObjectTypeCompareAny((PetscObject)xin,&is_cudatype,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
43: if (is_cudatype) {
44: VecCUDAAllocateCheckHost(xin);
45: if (xin->offloadmask == PETSC_OFFLOAD_GPU) {
46: if (xin->spptr && ctx->spptr) {
47: VecCUDACopyFromGPUSome_Public(xin,(PetscCUDAIndices)ctx->spptr,mode);
48: } else {
49: VecCUDACopyFromGPU(xin);
50: }
51: }
52: xv = *((PetscScalar**)xin->data);
53: } else
54: #endif
55: {
56: VecGetArrayRead(xin,(const PetscScalar**)&xv);
57: }
59: if (xin != yin) {VecGetArray(yin,&yv);}
60: else yv = xv;
62: if (!(mode & SCATTER_LOCAL)) {
63: /* post receives since they were not previously posted */
64: MPI_Startall_irecv(from->starts[nrecvs]*bs,MPIU_SCALAR,nrecvs,rwaits);
66: if (to->sharedspace) {
67: /* Pack the send data into my shared memory buffer --- this is the normal forward scatter */
68: PETSCMAP1(Pack)(to->sharedcnt,to->sharedspaceindices,xv,to->sharedspace,bs);
69: } else {
70: /* Pack the send data into receivers shared memory buffer -- this is the normal backward scatter */
71: for (i=0; i<to->msize; i++) {
72: if (to->sharedspacesoffset && to->sharedspacesoffset[i] > -1) {
73: PETSCMAP1(Pack)(to->sharedspacestarts[i+1] - to->sharedspacestarts[i],to->sharedspaceindices + to->sharedspacestarts[i],xv,&to->sharedspaces[i][bs*to->sharedspacesoffset[i]],bs);
74: }
75: }
76: }
77: /* this version packs and sends one at a time */
78: for (i=0; i<nsends; i++) {
79: PETSCMAP1(Pack)(sstarts[i+1]-sstarts[i],indices + sstarts[i],xv,svalues + bs*sstarts[i],bs);
80: MPI_Start_isend((sstarts[i+1]-sstarts[i])*bs,MPIU_SCALAR,swaits+i);
81: }
82: }
84: /* take care of local scatters */
85: if (to->local.n) {
86: if (to->local.memcpy_plan.optimized[0] && addv == INSERT_VALUES) {
87: /* do copy when it is not a self-to-self copy */
88: if (!(yv == xv && to->local.memcpy_plan.same_copy_starts)) {
89: for (i=to->local.memcpy_plan.copy_offsets[0]; i<to->local.memcpy_plan.copy_offsets[1]; i++) {
90: /* Do we need to take care of overlaps? We could but overlaps sound more like a bug than a requirement,
91: so I just leave it and let PetscMemcpy detect this bug.
92: */
93: PetscArraycpy(yv + from->local.memcpy_plan.copy_starts[i],xv + to->local.memcpy_plan.copy_starts[i],to->local.memcpy_plan.copy_lengths[i]);
94: }
95: }
96: } else {
97: if (xv == yv && addv == INSERT_VALUES && to->local.nonmatching_computed) {
98: /* only copy entries that do not share identical memory locations */
99: PETSCMAP1(Scatter)(to->local.n_nonmatching,to->local.slots_nonmatching,xv,from->local.slots_nonmatching,yv,addv,bs);
100: } else {
101: PETSCMAP1(Scatter)(to->local.n,to->local.vslots,xv,from->local.vslots,yv,addv,bs);
102: }
103: }
104: }
105: VecRestoreArrayRead(xin,(const PetscScalar**)&xv);
106: if (xin != yin) {VecRestoreArray(yin,&yv);}
107: return(0);
108: }
110: /* --------------------------------------------------------------------------------------*/
112: PetscErrorCode PETSCMAP1(VecScatterEnd)(VecScatter ctx,Vec xin,Vec yin,InsertMode addv,ScatterMode mode)
113: {
114: VecScatter_MPI_General *to,*from;
115: PetscScalar *rvalues,*yv;
116: PetscErrorCode ierr;
117: PetscInt nrecvs,nsends,*indices,count,*rstarts,bs;
118: PetscMPIInt imdex;
119: MPI_Request *rwaits,*swaits;
120: MPI_Status xrstatus,*sstatus;
121: PetscMPIInt i;
124: if (mode & SCATTER_LOCAL) return(0);
125: VecGetArray(yin,&yv);
127: to = (VecScatter_MPI_General*)ctx->todata;
128: from = (VecScatter_MPI_General*)ctx->fromdata;
129: rwaits = from->requests;
130: swaits = to->requests;
131: sstatus = to->sstatus; /* sstatus and rstatus are always stored in to */
132: if (mode & SCATTER_REVERSE) {
133: to = (VecScatter_MPI_General*)ctx->fromdata;
134: from = (VecScatter_MPI_General*)ctx->todata;
135: rwaits = from->rev_requests;
136: swaits = to->rev_requests;
137: }
138: bs = from->bs;
139: rvalues = from->values;
140: nrecvs = from->n;
141: nsends = to->n;
142: indices = from->indices;
143: rstarts = from->starts;
146: MPI_Barrier(PetscObjectComm((PetscObject)ctx));
148: /* unpack one at a time */
149: count = nrecvs;
150: while (count) {
151: MPI_Waitany(nrecvs,rwaits,&imdex,&xrstatus);
152: /* unpack receives into our local space */
153: PETSCMAP1(UnPack)(rstarts[imdex+1] - rstarts[imdex],rvalues + bs*rstarts[imdex],indices + rstarts[imdex],yv,addv,bs);
154: count--;
155: }
156: /* handle processes that share the same shared memory communicator */
157: if (from->sharedspace) {
158: /* unpack the data from my shared memory buffer --- this is the normal backward scatter */
159: PETSCMAP1(UnPack)(from->sharedcnt,from->sharedspace,from->sharedspaceindices,yv,addv,bs);
160: } else {
161: /* unpack the data from each of my sending partners shared memory buffers --- this is the normal forward scatter */
162: for (i=0; i<from->msize; i++) {
163: if (from->sharedspacesoffset && from->sharedspacesoffset[i] > -1) {
164: PETSCMAP1(UnPack)(from->sharedspacestarts[i+1] - from->sharedspacestarts[i],&from->sharedspaces[i][bs*from->sharedspacesoffset[i]],from->sharedspaceindices + from->sharedspacestarts[i],yv,addv,bs);
165: }
166: }
167: }
168: MPI_Barrier(PetscObjectComm((PetscObject)ctx));
170: /* wait on sends */
171: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
172: VecRestoreArray(yin,&yv);
173: return(0);
174: }
176: /* -------------------------------------------------------- */
177: #include <../src/vec/vec/impls/node/vecnodeimpl.h>
179: PetscErrorCode PETSCMAP1(VecScatterBeginMPI3Node)(VecScatter ctx,Vec xin,Vec yin,InsertMode addv,ScatterMode mode)
180: {
181: VecScatter_MPI_General *to,*from;
182: PetscScalar *xv,*yv,*svalues;
183: MPI_Request *rwaits,*swaits;
184: PetscErrorCode ierr;
185: PetscInt i,*indices,*sstarts,nrecvs,nsends,bs;
188: if (mode & SCATTER_REVERSE) {
189: to = (VecScatter_MPI_General*)ctx->fromdata;
190: from = (VecScatter_MPI_General*)ctx->todata;
191: rwaits = from->rev_requests;
192: swaits = to->rev_requests;
193: } else {
194: to = (VecScatter_MPI_General*)ctx->todata;
195: from = (VecScatter_MPI_General*)ctx->fromdata;
196: rwaits = from->requests;
197: swaits = to->requests;
198: }
199: bs = to->bs;
200: svalues = to->values;
201: nrecvs = from->n;
202: nsends = to->n;
203: indices = to->indices;
204: sstarts = to->starts;
206: VecGetArrayRead(xin,(const PetscScalar**)&xv);
208: if (xin != yin) {VecGetArray(yin,&yv);}
209: else yv = xv;
211: if (!(mode & SCATTER_LOCAL)) {
212: /* post receives since they were not previously posted */
213: MPI_Startall_irecv(from->starts[nrecvs]*bs,MPIU_SCALAR,nrecvs,rwaits);
215: /* this version packs and sends one at a time */
216: for (i=0; i<nsends; i++) {
217: PETSCMAP1(Pack)(sstarts[i+1]-sstarts[i],indices + sstarts[i],xv,svalues + bs*sstarts[i],bs);
218: MPI_Start_isend((sstarts[i+1]-sstarts[i])*bs,MPIU_SCALAR,swaits+i);
219: }
220: }
222: /* take care of local scatters */
223: if (to->local.n) {
224: if (to->local.memcpy_plan.optimized[0] && addv == INSERT_VALUES) {
225: /* do copy when it is not a self-to-self copy */
226: if (!(yv == xv && to->local.memcpy_plan.same_copy_starts)) {
227: for (i=to->local.memcpy_plan.copy_offsets[0]; i<to->local.memcpy_plan.copy_offsets[1]; i++) {
228: /* Do we need to take care of overlaps? We could but overlaps sound more like a bug than a requirement,
229: so I just leave it and let PetscMemcpy detect this bug.
230: */
231: PetscArraycpy(yv + from->local.memcpy_plan.copy_starts[i],xv + to->local.memcpy_plan.copy_starts[i],to->local.memcpy_plan.copy_lengths[i]);
232: }
233: }
234: } else {
235: if (xv == yv && addv == INSERT_VALUES && to->local.nonmatching_computed) {
236: /* only copy entries that do not share identical memory locations */
237: PETSCMAP1(Scatter)(to->local.n_nonmatching,to->local.slots_nonmatching,xv,from->local.slots_nonmatching,yv,addv,bs);
238: } else {
239: PETSCMAP1(Scatter)(to->local.n,to->local.vslots,xv,from->local.vslots,yv,addv,bs);
240: }
241: }
242: }
243: VecRestoreArrayRead(xin,(const PetscScalar**)&xv);
244: if (xin != yin) {VecRestoreArray(yin,&yv);}
245: return(0);
246: }
248: #define PETSC_MEMSHARE_SAFE
249: PetscErrorCode PETSCMAP1(VecScatterEndMPI3Node)(VecScatter ctx,Vec xin,Vec yin,InsertMode addv,ScatterMode mode)
250: {
251: VecScatter_MPI_General *to,*from;
252: PetscScalar *rvalues,*yv;
253: const PetscScalar *xv;
254: PetscErrorCode ierr;
255: PetscInt nrecvs,nsends,*indices,count,*rstarts,bs;
256: PetscMPIInt imdex;
257: MPI_Request *rwaits,*swaits;
258: MPI_Status xrstatus,*sstatus;
259: Vec_Node *vnode;
260: PetscInt cnt,*idx,*idy;
261: MPI_Comm comm,mscomm,veccomm;
262: PetscShmComm scomm;
263: PetscMPIInt i,xsize;
264: PetscInt k,k1;
265: PetscScalar *sharedspace;
268: if (mode & SCATTER_LOCAL) return(0);
270: PetscObjectGetComm((PetscObject)ctx,&comm);
271: PetscShmCommGet(comm,&scomm);
272: PetscShmCommGetMpiShmComm(scomm,&mscomm);
274: VecGetArray(yin,&yv);
276: to = (VecScatter_MPI_General*)ctx->todata;
277: from = (VecScatter_MPI_General*)ctx->fromdata;
278: rwaits = from->requests;
279: swaits = to->requests;
280: sstatus = to->sstatus; /* sstatus and rstatus are always stored in to */
281: if (mode & SCATTER_REVERSE) {
282: to = (VecScatter_MPI_General*)ctx->fromdata;
283: from = (VecScatter_MPI_General*)ctx->todata;
284: rwaits = from->rev_requests;
285: swaits = to->rev_requests;
286: }
287: bs = from->bs;
288: rvalues = from->values;
289: nrecvs = from->n;
290: nsends = to->n;
291: indices = from->indices;
292: rstarts = from->starts;
295: /* unpack one at a time */
296: count = nrecvs;
297: while (count) {
298: MPI_Waitany(nrecvs,rwaits,&imdex,&xrstatus);
299: /* unpack receives into our local space */
300: PETSCMAP1(UnPack)(rstarts[imdex+1] - rstarts[imdex],rvalues + bs*rstarts[imdex],indices + rstarts[imdex],yv,addv,bs);
301: count--;
302: }
304: /* handle processes that share the same shared memory communicator */
305: #if defined(PETSC_MEMSHARE_SAFE)
306: MPI_Barrier(mscomm);
307: #endif
309: /* check if xin is sequential */
310: PetscObjectGetComm((PetscObject)xin,&veccomm);
311: MPI_Comm_size(veccomm,&xsize);
313: if (xsize == 1 || from->sharedspace) { /* 'from->sharedspace' indicates this core's shared memory will be written */
314: /* StoP: read sequential local xvalues, then write to shared yvalues */
315: PetscInt notdone = to->notdone;
316: vnode = (Vec_Node*)yin->data;
317: if (!vnode->win) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"vector y must have type VECNODE with shared memory");
318: if (ctx->is_duplicate) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Duplicate index is not supported");
319: VecGetArrayRead(xin,&xv);
321: i = 0;
322: while (notdone) {
323: while (i < to->msize) {
324: if (to->sharedspacesoffset && to->sharedspacesoffset[i] > -1) {
325: cnt = to->sharedspacestarts[i+1] - to->sharedspacestarts[i];
326: idx = to->sharedspaceindices + to->sharedspacestarts[i];
327: idy = idx + to->sharedcnt;
329: sharedspace = vnode->winarray[i];
331: if (sharedspace[-1] != yv[-1]) {
332: if (PetscRealPart(sharedspace[-1] - yv[-1]) > 0.0) {
333: PetscMPIInt msrank;
334: MPI_Comm_rank(mscomm,&msrank);
335: SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"[%d] statecnt %g > [%d] my_statecnt %g",i,PetscRealPart(sharedspace[-1]),msrank,PetscRealPart(yv[-1]));
336: }
337: /* i-the core has not reached the current object statecnt yet, wait ... */
338: continue;
339: }
341: if (addv == ADD_VALUES) {
342: for (k= 0; k<cnt; k++) {
343: for (k1=0; k1<bs; k1++) sharedspace[idy[k]+k1] += xv[idx[k]+k1];
344: }
345: } else if (addv == INSERT_VALUES) {
346: for (k= 0; k<cnt; k++) {
347: for (k1=0; k1<bs; k1++) sharedspace[idy[k]+k1] = xv[idx[k]+k1];
348: }
349: } else SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %D", addv);
350: notdone--;
351: }
352: i++;
353: }
354: }
355: VecRestoreArrayRead(xin,&xv);
356: } else {
357: /* PtoS: read shared xvalues, then write to sequential local yvalues */
358: PetscInt notdone = from->notdone;
360: vnode = (Vec_Node*)xin->data;
361: if (!vnode->win && notdone) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"vector x must have type VECNODE with shared memory");
362: VecGetArrayRead(xin,&xv);
364: i = 0;
365: while (notdone) {
366: while (i < from->msize) {
367: if (from->sharedspacesoffset && from->sharedspacesoffset[i] > -1) {
368: cnt = from->sharedspacestarts[i+1] - from->sharedspacestarts[i];
369: idy = from->sharedspaceindices + from->sharedspacestarts[i]; /* recv local y indices */
370: idx = idy + from->sharedcnt;
372: sharedspace = vnode->winarray[i];
374: if (sharedspace[-1] != xv[-1]) {
375: if (PetscRealPart(sharedspace[-1] - xv[-1]) > 0.0) {
376: PetscMPIInt msrank;
377: MPI_Comm_rank(mscomm,&msrank);
378: SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"[%d] statecnt %g > [%d] my_statecnt %g",i,PetscRealPart(sharedspace[-1]),msrank,PetscRealPart(xv[-1]));
379: }
380: /* i-the core has not reached the current object state cnt yet, wait ... */
381: continue;
382: }
384: if (addv==ADD_VALUES) {
385: for (k=0; k<cnt; k++) {
386: for (k1=0; k1<bs; k1++) yv[idy[k]+k1] += sharedspace[idx[k]+k1]; /* read x shared values */
387: }
388: } else if (addv==INSERT_VALUES){
389: for (k=0; k<cnt; k++) {
390: for (k1=0; k1<bs; k1++) yv[idy[k]+k1] = sharedspace[idx[k]+k1]; /* read x shared values */
391: }
392: } else SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %D", addv);
394: notdone--;
395: }
396: i++;
397: }
398: }
399: VecRestoreArrayRead(xin,&xv);
400: }
401: /* output y is parallel, ensure it is done -- would lose performance */
402: MPI_Barrier(mscomm);
404: /* wait on sends */
405: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
406: VecRestoreArray(yin,&yv);
407: return(0);
408: }
410: #undef PETSCMAP1_a
411: #undef PETSCMAP1_b
412: #undef PETSCMAP1
413: #undef BS