Actual source code: vpscat.h
petsc-3.9.4 2018-09-11
2: /*
3: Defines the methods VecScatterBegin/End_1,2,......
4: This is included by vpscat.c with different values for BS
6: This is a terrible way of doing "templates" in C.
7: */
8: #define PETSCMAP1_a(a,b) a ## _ ## b
9: #define PETSCMAP1_b(a,b) PETSCMAP1_a(a,b)
10: #define PETSCMAP1(a) PETSCMAP1_b(a,BS)
12: PetscErrorCode PETSCMAP1(VecScatterBegin)(VecScatter ctx,Vec xin,Vec yin,InsertMode addv,ScatterMode mode)
13: {
14: VecScatter_MPI_General *to,*from;
15: PetscScalar *xv,*yv,*svalues;
16: MPI_Request *rwaits,*swaits;
17: PetscErrorCode ierr;
18: PetscInt i,*indices,*sstarts,nrecvs,nsends,bs;
19: #if defined(PETSC_HAVE_VIENNACL)
20: PetscBool is_viennacltype = PETSC_FALSE;
21: #endif
24: if (mode & SCATTER_REVERSE) {
25: to = (VecScatter_MPI_General*)ctx->fromdata;
26: from = (VecScatter_MPI_General*)ctx->todata;
27: rwaits = from->rev_requests;
28: swaits = to->rev_requests;
29: } else {
30: to = (VecScatter_MPI_General*)ctx->todata;
31: from = (VecScatter_MPI_General*)ctx->fromdata;
32: rwaits = from->requests;
33: swaits = to->requests;
34: }
35: bs = to->bs;
36: svalues = to->values;
37: nrecvs = from->n;
38: nsends = to->n;
39: indices = to->indices;
40: sstarts = to->starts;
41: #if defined(PETSC_HAVE_VIENNACL)
42: PetscObjectTypeCompareAny((PetscObject)xin,&is_viennacltype,VECSEQVIENNACL,VECMPIVIENNACL,VECVIENNACL,"");
43: if (is_viennacltype) {
44: VecGetArrayRead(xin,(const PetscScalar**)&xv);
45: } else
46: #endif
47: #if defined(PETSC_HAVE_VECCUDA)
48: {
49: VecCUDAAllocateCheckHost(xin);
50: if (xin->valid_GPU_array == PETSC_OFFLOAD_GPU) {
51: if (xin->spptr && ctx->spptr) {
52: VecCUDACopyFromGPUSome_Public(xin,(PetscCUDAIndices)ctx->spptr);
53: } else {
54: VecCUDACopyFromGPU(xin);
55: }
56: }
57: xv = *((PetscScalar**)xin->data);
58: }
59: #else
60: {
61: VecGetArrayRead(xin,(const PetscScalar**)&xv);
62: }
63: #endif
65: if (xin != yin) {VecGetArray(yin,&yv);}
66: else yv = xv;
68: if (!(mode & SCATTER_LOCAL)) {
69: if (!from->use_readyreceiver && !to->sendfirst && !to->use_alltoallv && !to->use_window) {
70: /* post receives since they were not previously posted */
71: if (nrecvs) {MPI_Startall_irecv(from->starts[nrecvs]*bs,nrecvs,rwaits);}
72: }
74: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
75: if (to->use_alltoallw && addv == INSERT_VALUES) {
76: MPI_Alltoallw(xv,to->wcounts,to->wdispls,to->types,yv,from->wcounts,from->wdispls,from->types,PetscObjectComm((PetscObject)ctx));
77: } else
78: #endif
79: if (ctx->packtogether || to->use_alltoallv || to->use_window) {
80: /* this version packs all the messages together and sends, when -vecscatter_packtogether used */
81: PETSCMAP1(Pack)(sstarts[nsends],indices,xv,svalues,bs);
82: if (to->use_alltoallv) {
83: MPI_Alltoallv(to->values,to->counts,to->displs,MPIU_SCALAR,from->values,from->counts,from->displs,MPIU_SCALAR,PetscObjectComm((PetscObject)ctx));
84: } else if (to->use_window) {
85: PetscInt cnt;
87: MPI_Win_fence(0,from->window);
88: for (i=0; i<nsends; i++) {
89: cnt = bs*(to->starts[i+1]-to->starts[i]);
90: MPI_Put(to->values+bs*to->starts[i],cnt,MPIU_SCALAR,to->procs[i],bs*to->winstarts[i],cnt,MPIU_SCALAR,from->window);
91: }
92: } else if (nsends) {
93: MPI_Startall_isend(to->starts[to->n]*bs,nsends,swaits);
94: }
95: } else {
96: if (to->sharedspace) {
97: /* Pack the send data into my shared memory buffer --- this is the normal forward scatter */
98: PETSCMAP1(Pack)(to->sharedcnt,to->sharedspaceindices,xv,to->sharedspace,bs);
99: } else {
100: /* Pack the send data into receivers shared memory buffer -- this is the normal backward scatter */
101: for (i=0; i<to->msize; i++) {
102: if (to->sharedspacesoffset && to->sharedspacesoffset[i] > -1) {
103: PETSCMAP1(Pack)(to->sharedspacestarts[i+1] - to->sharedspacestarts[i],to->sharedspaceindices + to->sharedspacestarts[i],xv,&to->sharedspaces[i][bs*to->sharedspacesoffset[i]],bs);
104: }
105: }
106: }
107: /* this version packs and sends one at a time */
108: for (i=0; i<nsends; i++) {
109: PETSCMAP1(Pack)(sstarts[i+1]-sstarts[i],indices + sstarts[i],xv,svalues + bs*sstarts[i],bs);
110: MPI_Start_isend((sstarts[i+1]-sstarts[i])*bs,swaits+i);
111: }
112: }
114: if (!from->use_readyreceiver && to->sendfirst && !to->use_alltoallv && !to->use_window) {
115: /* post receives since they were not previously posted */
116: if (nrecvs) {MPI_Startall_irecv(from->starts[nrecvs]*bs,nrecvs,rwaits);}
117: }
118: }
120: /* take care of local scatters */
121: if (to->local.n) {
122: if (to->local.is_copy && addv == INSERT_VALUES) {
123: if (yv != xv || from->local.copy_start != to->local.copy_start) {
124: PetscMemcpy(yv + from->local.copy_start,xv + to->local.copy_start,to->local.copy_length);
125: }
126: } else {
127: if (xv == yv && addv == INSERT_VALUES && to->local.nonmatching_computed) {
128: /* only copy entries that do not share identical memory locations */
129: PETSCMAP1(Scatter)(to->local.n_nonmatching,to->local.slots_nonmatching,xv,from->local.slots_nonmatching,yv,addv,bs);
130: } else {
131: PETSCMAP1(Scatter)(to->local.n,to->local.vslots,xv,from->local.vslots,yv,addv,bs);
132: }
133: }
134: }
135: VecRestoreArrayRead(xin,(const PetscScalar**)&xv);
136: if (xin != yin) {VecRestoreArray(yin,&yv);}
137: return(0);
138: }
140: /* --------------------------------------------------------------------------------------*/
142: PetscErrorCode PETSCMAP1(VecScatterEnd)(VecScatter ctx,Vec xin,Vec yin,InsertMode addv,ScatterMode mode)
143: {
144: VecScatter_MPI_General *to,*from;
145: PetscScalar *rvalues,*yv;
146: PetscErrorCode ierr;
147: PetscInt nrecvs,nsends,*indices,count,*rstarts,bs;
148: PetscMPIInt imdex;
149: MPI_Request *rwaits,*swaits;
150: MPI_Status xrstatus,*rstatus,*sstatus;
153: if (mode & SCATTER_LOCAL) return(0);
154: VecGetArray(yin,&yv);
156: to = (VecScatter_MPI_General*)ctx->todata;
157: from = (VecScatter_MPI_General*)ctx->fromdata;
158: rwaits = from->requests;
159: swaits = to->requests;
160: sstatus = to->sstatus; /* sstatus and rstatus are always stored in to */
161: rstatus = to->rstatus;
162: if (mode & SCATTER_REVERSE) {
163: to = (VecScatter_MPI_General*)ctx->fromdata;
164: from = (VecScatter_MPI_General*)ctx->todata;
165: rwaits = from->rev_requests;
166: swaits = to->rev_requests;
167: }
168: bs = from->bs;
169: rvalues = from->values;
170: nrecvs = from->n;
171: nsends = to->n;
172: indices = from->indices;
173: rstarts = from->starts;
175: if (ctx->packtogether || (to->use_alltoallw && (addv != INSERT_VALUES)) || (to->use_alltoallv && !to->use_alltoallw) || to->use_window) {
176: if (to->use_window) {MPI_Win_fence(0,from->window);}
177: else if (nrecvs && !to->use_alltoallv) {MPI_Waitall(nrecvs,rwaits,rstatus);}
178: PETSCMAP1(UnPack)(from->starts[from->n],from->values,indices,yv,addv,bs);
179: } else if (!to->use_alltoallw) {
180: PetscMPIInt i;
181: MPI_Barrier(PetscObjectComm((PetscObject)ctx));
183: /* unpack one at a time */
184: count = nrecvs;
185: while (count) {
186: if (ctx->reproduce) {
187: imdex = count - 1;
188: MPI_Wait(rwaits+imdex,&xrstatus);
189: } else {
190: MPI_Waitany(nrecvs,rwaits,&imdex,&xrstatus);
191: }
192: /* unpack receives into our local space */
193: PETSCMAP1(UnPack)(rstarts[imdex+1] - rstarts[imdex],rvalues + bs*rstarts[imdex],indices + rstarts[imdex],yv,addv,bs);
194: count--;
195: }
196: /* handle processes that share the same shared memory communicator */
197: if (from->sharedspace) {
198: /* unpack the data from my shared memory buffer --- this is the normal backward scatter */
199: PETSCMAP1(UnPack)(from->sharedcnt,from->sharedspace,from->sharedspaceindices,yv,addv,bs);
200: } else {
201: /* unpack the data from each of my sending partners shared memory buffers --- this is the normal forward scatter */
202: for (i=0; i<from->msize; i++) {
203: if (from->sharedspacesoffset && from->sharedspacesoffset[i] > -1) {
204: PETSCMAP1(UnPack)(from->sharedspacestarts[i+1] - from->sharedspacestarts[i],&from->sharedspaces[i][bs*from->sharedspacesoffset[i]],from->sharedspaceindices + from->sharedspacestarts[i],yv,addv,bs);
205: }
206: }
207: }
208: MPI_Barrier(PetscObjectComm((PetscObject)ctx));
209: }
210: if (from->use_readyreceiver) {
211: if (nrecvs) {MPI_Startall_irecv(from->starts[nrecvs]*bs,nrecvs,rwaits);}
212: MPI_Barrier(PetscObjectComm((PetscObject)ctx));
213: }
215: /* wait on sends */
216: if (nsends && !to->use_alltoallv && !to->use_window) {MPI_Waitall(nsends,swaits,sstatus);}
217: VecRestoreArray(yin,&yv);
218: return(0);
219: }
221: /* -------------------------------------------------------- */
222: #include <../src/vec/vec/impls/node/vecnodeimpl.h>
224: PetscErrorCode PETSCMAP1(VecScatterBeginMPI3Node)(VecScatter ctx,Vec xin,Vec yin,InsertMode addv,ScatterMode mode)
225: {
226: VecScatter_MPI_General *to,*from;
227: PetscScalar *xv,*yv,*svalues;
228: MPI_Request *rwaits,*swaits;
229: PetscErrorCode ierr;
230: PetscInt i,*indices,*sstarts,nrecvs,nsends,bs;
233: if (mode & SCATTER_REVERSE) {
234: to = (VecScatter_MPI_General*)ctx->fromdata;
235: from = (VecScatter_MPI_General*)ctx->todata;
236: rwaits = from->rev_requests;
237: swaits = to->rev_requests;
238: } else {
239: to = (VecScatter_MPI_General*)ctx->todata;
240: from = (VecScatter_MPI_General*)ctx->fromdata;
241: rwaits = from->requests;
242: swaits = to->requests;
243: }
244: bs = to->bs;
245: svalues = to->values;
246: nrecvs = from->n;
247: nsends = to->n;
248: indices = to->indices;
249: sstarts = to->starts;
251: VecGetArrayRead(xin,(const PetscScalar**)&xv);
253: if (xin != yin) {VecGetArray(yin,&yv);}
254: else yv = xv;
256: if (!(mode & SCATTER_LOCAL)) {
257: if (!from->use_readyreceiver && !to->sendfirst && !to->use_alltoallv & !to->use_window) {
258: /* post receives since they were not previously posted */
259: if (nrecvs) {MPI_Startall_irecv(from->starts[nrecvs]*bs,nrecvs,rwaits);}
260: }
262: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
263: if (to->use_alltoallw && addv == INSERT_VALUES) {
264: MPI_Alltoallw(xv,to->wcounts,to->wdispls,to->types,yv,from->wcounts,from->wdispls,from->types,PetscObjectComm((PetscObject)ctx));
265: } else
266: #endif
267: if (ctx->packtogether || to->use_alltoallv || to->use_window) {
268: /* this version packs all the messages together and sends, when -vecscatter_packtogether used */
269: PETSCMAP1(Pack)(sstarts[nsends],indices,xv,svalues,bs);
270: if (to->use_alltoallv) {
271: MPI_Alltoallv(to->values,to->counts,to->displs,MPIU_SCALAR,from->values,from->counts,from->displs,MPIU_SCALAR,PetscObjectComm((PetscObject)ctx));
272: } else if (to->use_window) {
273: PetscInt cnt;
275: MPI_Win_fence(0,from->window);
276: for (i=0; i<nsends; i++) {
277: cnt = bs*(to->starts[i+1]-to->starts[i]);
278: MPI_Put(to->values+bs*to->starts[i],cnt,MPIU_SCALAR,to->procs[i],bs*to->winstarts[i],cnt,MPIU_SCALAR,from->window);
279: }
280: } else if (nsends) {
281: MPI_Startall_isend(to->starts[to->n]*bs,nsends,swaits);
282: }
283: } else {
284: /* this version packs and sends one at a time */
285: for (i=0; i<nsends; i++) {
286: PETSCMAP1(Pack)(sstarts[i+1]-sstarts[i],indices + sstarts[i],xv,svalues + bs*sstarts[i],bs);
287: MPI_Start_isend((sstarts[i+1]-sstarts[i])*bs,swaits+i);
288: }
289: }
291: if (!from->use_readyreceiver && to->sendfirst && !to->use_alltoallv && !to->use_window) {
292: /* post receives since they were not previously posted */
293: if (nrecvs) {MPI_Startall_irecv(from->starts[nrecvs]*bs,nrecvs,rwaits);}
294: }
295: }
297: /* take care of local scatters */
298: if (to->local.n) {
299: if (to->local.is_copy && addv == INSERT_VALUES) {
300: if (yv != xv || from->local.copy_start != to->local.copy_start) {
301: PetscMemcpy(yv + from->local.copy_start,xv + to->local.copy_start,to->local.copy_length);
302: }
303: } else {
304: if (xv == yv && addv == INSERT_VALUES && to->local.nonmatching_computed) {
305: /* only copy entries that do not share identical memory locations */
306: PETSCMAP1(Scatter)(to->local.n_nonmatching,to->local.slots_nonmatching,xv,from->local.slots_nonmatching,yv,addv,bs);
307: } else {
308: PETSCMAP1(Scatter)(to->local.n,to->local.vslots,xv,from->local.vslots,yv,addv,bs);
309: }
310: }
311: }
312: VecRestoreArrayRead(xin,(const PetscScalar**)&xv);
313: if (xin != yin) {VecRestoreArray(yin,&yv);}
314: return(0);
315: }
317: #define PETSC_MEMSHARE_SAFE
318: PetscErrorCode PETSCMAP1(VecScatterEndMPI3Node)(VecScatter ctx,Vec xin,Vec yin,InsertMode addv,ScatterMode mode)
319: {
320: VecScatter_MPI_General *to,*from;
321: PetscScalar *rvalues,*yv;
322: const PetscScalar *xv;
323: PetscErrorCode ierr;
324: PetscInt nrecvs,nsends,*indices,count,*rstarts,bs;
325: PetscMPIInt imdex;
326: MPI_Request *rwaits,*swaits;
327: MPI_Status xrstatus,*rstatus,*sstatus;
328: Vec_Node *vnode;
329: PetscInt cnt,*idx,*idy;
330: MPI_Comm comm,mscomm,veccomm;
331: PetscCommShared scomm;
334: if (mode & SCATTER_LOCAL) return(0);
336: PetscObjectGetComm((PetscObject)ctx,&comm);
337: PetscCommSharedGet(comm,&scomm);
338: PetscCommSharedGetComm(scomm,&mscomm);
340: VecGetArray(yin,&yv);
342: to = (VecScatter_MPI_General*)ctx->todata;
343: from = (VecScatter_MPI_General*)ctx->fromdata;
344: rwaits = from->requests;
345: swaits = to->requests;
346: sstatus = to->sstatus; /* sstatus and rstatus are always stored in to */
347: rstatus = to->rstatus;
348: if (mode & SCATTER_REVERSE) {
349: to = (VecScatter_MPI_General*)ctx->fromdata;
350: from = (VecScatter_MPI_General*)ctx->todata;
351: rwaits = from->rev_requests;
352: swaits = to->rev_requests;
353: }
354: bs = from->bs;
355: rvalues = from->values;
356: nrecvs = from->n;
357: nsends = to->n;
358: indices = from->indices;
359: rstarts = from->starts;
361: if (ctx->packtogether || (to->use_alltoallw && (addv != INSERT_VALUES)) || (to->use_alltoallv && !to->use_alltoallw) || to->use_window) {
362: if (to->use_window) {MPI_Win_fence(0,from->window);}
363: else if (nrecvs && !to->use_alltoallv) {MPI_Waitall(nrecvs,rwaits,rstatus);}
364: PETSCMAP1(UnPack)(from->starts[from->n],from->values,indices,yv,addv,bs);
365: } else if (!to->use_alltoallw) {
366: PetscMPIInt i,xsize;
367: PetscInt k,k1;
368: PetscScalar *sharedspace;
370: /* unpack one at a time */
371: count = nrecvs;
372: while (count) {
373: if (ctx->reproduce) {
374: imdex = count - 1;
375: MPI_Wait(rwaits+imdex,&xrstatus);
376: } else {
377: MPI_Waitany(nrecvs,rwaits,&imdex,&xrstatus);
378: }
379: /* unpack receives into our local space */
380: PETSCMAP1(UnPack)(rstarts[imdex+1] - rstarts[imdex],rvalues + bs*rstarts[imdex],indices + rstarts[imdex],yv,addv,bs);
381: count--;
382: }
384: /* handle processes that share the same shared memory communicator */
385: #if defined(PETSC_MEMSHARE_SAFE)
386: MPI_Barrier(mscomm);
387: #endif
389: /* check if xin is sequential */
390: PetscObjectGetComm((PetscObject)xin,&veccomm);
391: MPI_Comm_size(veccomm,&xsize);
393: if (xsize == 1 || from->sharedspace) { /* 'from->sharedspace' indicates this core's shared memory will be written */
394: /* StoP: read sequential local xvalues, then write to shared yvalues */
395: PetscInt notdone = to->notdone;
396: vnode = (Vec_Node*)yin->data;
397: if (!vnode->win) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"vector y must have type VECNODE with shared memory");
398: if (ctx->is_duplicate) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Duplicate index is not supported");
399: VecGetArrayRead(xin,&xv);
401: i = 0;
402: while (notdone) {
403: while (i < to->msize) {
404: if (to->sharedspacesoffset && to->sharedspacesoffset[i] > -1) {
405: cnt = to->sharedspacestarts[i+1] - to->sharedspacestarts[i];
406: idx = to->sharedspaceindices + to->sharedspacestarts[i];
407: idy = idx + to->sharedcnt;
409: sharedspace = vnode->winarray[i];
411: if (sharedspace[-1] != yv[-1]) {
412: if (PetscRealPart(sharedspace[-1] - yv[-1]) > 0.0) {
413: PetscMPIInt msrank;
414: MPI_Comm_rank(mscomm,&msrank);
415: SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"[%d] statecnt %g > [%d] my_statecnt %g",i,PetscRealPart(sharedspace[-1]),msrank,PetscRealPart(yv[-1]));
416: }
417: /* i-the core has not reached the current object statecnt yet, wait ... */
418: continue;
419: }
421: if (addv == ADD_VALUES) {
422: for (k= 0; k<cnt; k++) {
423: for (k1=0; k1<bs; k1++) sharedspace[idy[k]+k1] += xv[idx[k]+k1];
424: }
425: } else if (addv == INSERT_VALUES) {
426: for (k= 0; k<cnt; k++) {
427: for (k1=0; k1<bs; k1++) sharedspace[idy[k]+k1] = xv[idx[k]+k1];
428: }
429: } else SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %D", addv);
430: notdone--;
431: }
432: i++;
433: }
434: }
435: VecRestoreArrayRead(xin,&xv);
436: } else {
437: /* PtoS: read shared xvalues, then write to sequential local yvalues */
438: PetscInt notdone = from->notdone;
440: vnode = (Vec_Node*)xin->data;
441: if (!vnode->win && notdone) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"vector x must have type VECNODE with shared memory");
442: VecGetArrayRead(xin,&xv);
444: i = 0;
445: while (notdone) {
446: while (i < from->msize) {
447: if (from->sharedspacesoffset && from->sharedspacesoffset[i] > -1) {
448: cnt = from->sharedspacestarts[i+1] - from->sharedspacestarts[i];
449: idy = from->sharedspaceindices + from->sharedspacestarts[i]; /* recv local y indices */
450: idx = idy + from->sharedcnt;
452: sharedspace = vnode->winarray[i];
454: if (sharedspace[-1] != xv[-1]) {
455: if (PetscRealPart(sharedspace[-1] - xv[-1]) > 0.0) {
456: PetscMPIInt msrank;
457: MPI_Comm_rank(mscomm,&msrank);
458: SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"[%d] statecnt %g > [%d] my_statecnt %g",i,PetscRealPart(sharedspace[-1]),msrank,PetscRealPart(xv[-1]));
459: }
460: /* i-the core has not reached the current object state cnt yet, wait ... */
461: continue;
462: }
464: if (addv==ADD_VALUES) {
465: for (k=0; k<cnt; k++) {
466: for (k1=0; k1<bs; k1++) yv[idy[k]+k1] += sharedspace[idx[k]+k1]; /* read x shared values */
467: }
468: } else if (addv==INSERT_VALUES){
469: for (k=0; k<cnt; k++) {
470: for (k1=0; k1<bs; k1++) yv[idy[k]+k1] = sharedspace[idx[k]+k1]; /* read x shared values */
471: }
472: } else SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %D", addv);
474: notdone--;
475: }
476: i++;
477: }
478: }
479: VecRestoreArrayRead(xin,&xv);
480: }
482: /* output y is parallel, ensure it is done -- would lose performance */
483: MPI_Barrier(mscomm);
484: }
485: if (from->use_readyreceiver) {
486: if (nrecvs) {MPI_Startall_irecv(from->starts[nrecvs]*bs,nrecvs,rwaits);}
487: MPI_Barrier(comm);
488: }
490: /* wait on sends */
491: if (nsends && !to->use_alltoallv && !to->use_window) {MPI_Waitall(nsends,swaits,sstatus);}
492: VecRestoreArray(yin,&yv);
493: return(0);
494: }
496: #undef PETSCMAP1_a
497: #undef PETSCMAP1_b
498: #undef PETSCMAP1
499: #undef BS