Actual source code: vscat.c
petsc-3.9.4 2018-09-11
2: /*
3: Code for creating scatters between vectors. This file
4: includes the code for scattering between sequential vectors and
5: some special cases for parallel scatters.
6: */
8: #include <petsc/private/isimpl.h>
9: #include <petsc/private/vecimpl.h>
11: #if defined(PETSC_HAVE_VECCUDA)
12: PETSC_INTERN PetscErrorCode VecScatterCUDAIndicesCreate_PtoP(PetscInt,PetscInt*,PetscInt,PetscInt*,PetscCUDAIndices*);
13: PETSC_INTERN PetscErrorCode VecScatterCUDAIndicesCreate_StoS(PetscInt,PetscInt,PetscInt,PetscInt,PetscInt,PetscInt*,PetscInt*,PetscCUDAIndices*);
14: PETSC_INTERN PetscErrorCode VecScatterCUDAIndicesDestroy(PetscCUDAIndices*);
15: PETSC_INTERN PetscErrorCode VecScatterCUDA_StoS(Vec,Vec,PetscCUDAIndices,InsertMode,ScatterMode);
16: #endif
18: #if defined(PETSC_USE_DEBUG)
19: /*
20: Checks if any indices are less than zero and generates an error
21: */
22: PetscErrorCode VecScatterCheckIndices_Private(PetscInt nmax,PetscInt n,const PetscInt *idx)
23: {
24: PetscInt i;
27: for (i=0; i<n; i++) {
28: if (idx[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative index %D at %D location",idx[i],i);
29: if (idx[i] >= nmax) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index %D at %D location greater than max %D",idx[i],i,nmax);
30: }
31: return(0);
32: }
33: #endif
35: /*
36: This is special scatter code for when the entire parallel vector is copied to each processor.
38: This code was written by Cameron Cooper, Occidental College, Fall 1995,
39: will working at ANL as a SERS student.
40: */
41: PetscErrorCode VecScatterBegin_MPI_ToAll(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
42: {
44: PetscInt yy_n,xx_n;
45: PetscScalar *xv,*yv;
48: VecGetLocalSize(y,&yy_n);
49: VecGetLocalSize(x,&xx_n);
50: VecGetArrayPair(x,y,&xv,&yv);
52: if (mode & SCATTER_REVERSE) {
53: PetscScalar *xvt,*xvt2;
54: VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
55: PetscInt i;
56: PetscMPIInt *disply = scat->displx;
58: if (addv == INSERT_VALUES) {
59: PetscInt rstart,rend;
60: /*
61: copy the correct part of the local vector into the local storage of
62: the MPI one. Note: this operation only makes sense if all the local
63: vectors have the same values
64: */
65: VecGetOwnershipRange(y,&rstart,&rend);
66: PetscMemcpy(yv,xv+rstart,yy_n*sizeof(PetscScalar));
67: } else {
68: MPI_Comm comm;
69: PetscMPIInt rank;
70: PetscObjectGetComm((PetscObject)y,&comm);
71: MPI_Comm_rank(comm,&rank);
72: if (scat->work1) xvt = scat->work1;
73: else {
74: PetscMalloc1(xx_n,&xvt);
75: scat->work1 = xvt;
76: }
77: if (!rank) { /* I am the zeroth processor, values are accumulated here */
78: if (scat->work2) xvt2 = scat->work2;
79: else {
80: PetscMalloc1(xx_n,&xvt2);
81: scat->work2 = xvt2;
82: }
83: MPI_Gatherv(yv,yy_n,MPIU_SCALAR,xvt2,scat->count,disply,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
84: MPI_Reduce(xv,xvt,xx_n,MPIU_SCALAR,MPIU_SUM,0,PetscObjectComm((PetscObject)ctx));
85: if (addv == ADD_VALUES) {
86: for (i=0; i<xx_n; i++) xvt[i] += xvt2[i];
87: #if !defined(PETSC_USE_COMPLEX)
88: } else if (addv == MAX_VALUES) {
89: for (i=0; i<xx_n; i++) xvt[i] = PetscMax(xvt[i],xvt2[i]);
90: #endif
91: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
92: MPI_Scatterv(xvt,scat->count,disply,MPIU_SCALAR,yv,yy_n,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
93: } else {
94: MPI_Gatherv(yv,yy_n,MPIU_SCALAR,0, 0,0,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
95: MPI_Reduce(xv,xvt,xx_n,MPIU_SCALAR,MPIU_SUM,0,PetscObjectComm((PetscObject)ctx));
96: MPI_Scatterv(0,scat->count,disply,MPIU_SCALAR,yv,yy_n,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
97: }
98: }
99: } else {
100: PetscScalar *yvt;
101: VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
102: PetscInt i;
103: PetscMPIInt *displx = scat->displx;
105: if (addv == INSERT_VALUES) {
106: MPI_Allgatherv(xv,xx_n,MPIU_SCALAR,yv,scat->count,displx,MPIU_SCALAR,PetscObjectComm((PetscObject)ctx));
107: } else {
108: if (scat->work1) yvt = scat->work1;
109: else {
110: PetscMalloc1(yy_n,&yvt);
111: scat->work1 = yvt;
112: }
113: MPI_Allgatherv(xv,xx_n,MPIU_SCALAR,yvt,scat->count,displx,MPIU_SCALAR,PetscObjectComm((PetscObject)ctx));
114: if (addv == ADD_VALUES) {
115: for (i=0; i<yy_n; i++) yv[i] += yvt[i];
116: #if !defined(PETSC_USE_COMPLEX)
117: } else if (addv == MAX_VALUES) {
118: for (i=0; i<yy_n; i++) yv[i] = PetscMax(yv[i],yvt[i]);
119: #endif
120: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
121: }
122: }
123: VecRestoreArrayPair(x,y,&xv,&yv);
124: return(0);
125: }
127: PetscErrorCode VecScatterView_MPI_ToAll(VecScatter in,PetscViewer viewer)
128: {
130: PetscBool isascii;
133: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
134: if (isascii) {
135: PetscViewerASCIIPrintf(viewer,"Entire parallel vector is copied to each process\n");
136: }
137: return(0);
138: }
140: /*
141: This is special scatter code for when the entire parallel vector is copied to processor 0.
143: */
144: PetscErrorCode VecScatterBegin_MPI_ToOne(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
145: {
146: PetscErrorCode ierr;
147: PetscMPIInt rank;
148: PetscInt yy_n,xx_n;
149: PetscScalar *yv;
150: const PetscScalar *xv;
151: MPI_Comm comm;
154: VecGetLocalSize(y,&yy_n);
155: VecGetLocalSize(x,&xx_n);
156: VecGetArrayRead(x,&xv);
157: VecGetArray(y,&yv);
159: PetscObjectGetComm((PetscObject)x,&comm);
160: MPI_Comm_rank(comm,&rank);
162: /* -------- Reverse scatter; spread from processor 0 to other processors */
163: if (mode & SCATTER_REVERSE) {
164: PetscScalar *yvt;
165: VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
166: PetscInt i;
167: PetscMPIInt *disply = scat->displx;
169: if (addv == INSERT_VALUES) {
170: MPI_Scatterv((PetscScalar*)xv,scat->count,disply,MPIU_SCALAR,yv,yy_n,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
171: } else {
172: if (scat->work2) yvt = scat->work2;
173: else {
174: PetscInt xx_nt;
175: MPI_Allreduce(&xx_n,&xx_nt,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)y));
176: PetscMalloc1(xx_nt,&yvt);
177: scat->work2 = yvt;
178: }
179: MPI_Scatterv((PetscScalar*)xv,scat->count,disply,MPIU_SCALAR,yvt,yy_n,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
180: if (addv == ADD_VALUES) {
181: for (i=0; i<yy_n; i++) yv[i] += yvt[i];
182: #if !defined(PETSC_USE_COMPLEX)
183: } else if (addv == MAX_VALUES) {
184: for (i=0; i<yy_n; i++) yv[i] = PetscMax(yv[i],yvt[i]);
185: #endif
186: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
187: }
188: /* --------- Forward scatter; gather all values onto processor 0 */
189: } else {
190: PetscScalar *yvt = 0;
191: VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
192: PetscInt i;
193: PetscMPIInt *displx = scat->displx;
195: if (addv == INSERT_VALUES) {
196: MPI_Gatherv((PetscScalar*)xv,xx_n,MPIU_SCALAR,yv,scat->count,displx,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
197: } else {
198: if (!rank) {
199: if (scat->work1) yvt = scat->work1;
200: else {
201: PetscMalloc1(yy_n,&yvt);
202: scat->work1 = yvt;
203: }
204: }
205: MPI_Gatherv((PetscScalar*)xv,xx_n,MPIU_SCALAR,yvt,scat->count,displx,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
206: if (!rank) {
207: if (addv == ADD_VALUES) {
208: for (i=0; i<yy_n; i++) yv[i] += yvt[i];
209: #if !defined(PETSC_USE_COMPLEX)
210: } else if (addv == MAX_VALUES) {
211: for (i=0; i<yy_n; i++) yv[i] = PetscMax(yv[i],yvt[i]);
212: #endif
213: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
214: }
215: }
216: }
217: VecRestoreArrayRead(x,&xv);
218: VecRestoreArray(y,&yv);
219: return(0);
220: }
222: /*
223: The follow to are used for both VecScatterBegin_MPI_ToAll() and VecScatterBegin_MPI_ToOne()
224: */
225: PetscErrorCode VecScatterDestroy_MPI_ToAll(VecScatter ctx)
226: {
227: VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
228: PetscErrorCode ierr;
231: PetscFree(scat->work1);
232: PetscFree(scat->work2);
233: PetscFree3(ctx->todata,scat->count,scat->displx);
234: return(0);
235: }
237: PetscErrorCode VecScatterDestroy_SGToSG(VecScatter ctx)
238: {
242: PetscFree2(((VecScatter_Seq_General*)ctx->todata)->vslots,((VecScatter_Seq_General*)ctx->fromdata)->vslots);
243: PetscFree2(ctx->todata,ctx->fromdata);
244: return(0);
245: }
247: PetscErrorCode VecScatterDestroy_SGToSS(VecScatter ctx)
248: {
252: PetscFree(((VecScatter_Seq_General*)ctx->fromdata)->vslots);
253: PetscFree2(ctx->todata,ctx->fromdata);
254: return(0);
255: }
257: PetscErrorCode VecScatterDestroy_SSToSG(VecScatter ctx)
258: {
262: PetscFree(((VecScatter_Seq_General*)ctx->todata)->vslots);
263: PetscFree2(ctx->todata,ctx->fromdata);
264: return(0);
265: }
267: PetscErrorCode VecScatterDestroy_SSToSS(VecScatter ctx)
268: {
272: PetscFree2(ctx->todata,ctx->fromdata);
273: return(0);
274: }
276: /* -------------------------------------------------------------------------*/
277: PetscErrorCode VecScatterCopy_MPI_ToAll(VecScatter in,VecScatter out)
278: {
279: VecScatter_MPI_ToAll *in_to = (VecScatter_MPI_ToAll*)in->todata,*sto;
280: PetscErrorCode ierr;
281: PetscMPIInt size,*count,*displx;
282: PetscInt i;
285: out->ops->begin = in->ops->begin;
286: out->ops->end = in->ops->end;
287: out->ops->copy = in->ops->copy;
288: out->ops->destroy = in->ops->destroy;
289: out->ops->view = in->ops->view;
291: MPI_Comm_size(PetscObjectComm((PetscObject)out),&size);
292: PetscMalloc3(1,&sto,size,&count,size,&displx);
293: sto->format = in_to->format;
294: sto->count = count;
295: sto->displx = displx;
296: for (i=0; i<size; i++) {
297: sto->count[i] = in_to->count[i];
298: sto->displx[i] = in_to->displx[i];
299: }
300: sto->work1 = 0;
301: sto->work2 = 0;
302: out->todata = (void*)sto;
303: out->fromdata = (void*)0;
304: return(0);
305: }
307: /* --------------------------------------------------------------------------------------*/
308: /*
309: Scatter: sequential general to sequential general
310: */
311: PetscErrorCode VecScatterBegin_SGToSG(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
312: {
313: VecScatter_Seq_General *gen_to = (VecScatter_Seq_General*)ctx->todata;
314: VecScatter_Seq_General *gen_from = (VecScatter_Seq_General*)ctx->fromdata;
315: PetscErrorCode ierr;
316: PetscInt i,n = gen_from->n,*fslots,*tslots;
317: PetscScalar *xv,*yv;
318: #if defined(PETSC_HAVE_VECCUDA)
319: PetscBool is_veccuda;
320: #endif
323: #if defined(PETSC_HAVE_VECCUDA)
324: PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
325: if (is_veccuda && x->valid_GPU_array == PETSC_OFFLOAD_GPU) {
326: /* create the scatter indices if not done already */
327: if (!ctx->spptr) {
328: PetscInt tofirst = 0,tostep = 0,fromfirst = 0,fromstep = 0;
329: fslots = gen_from->vslots;
330: tslots = gen_to->vslots;
331: VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
332: }
333: /* next do the scatter */
334: VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
335: return(0);
336: }
337: #endif
339: VecGetArrayPair(x,y,&xv,&yv);
340: if (mode & SCATTER_REVERSE) {
341: gen_to = (VecScatter_Seq_General*)ctx->fromdata;
342: gen_from = (VecScatter_Seq_General*)ctx->todata;
343: }
344: fslots = gen_from->vslots;
345: tslots = gen_to->vslots;
347: if (addv == INSERT_VALUES) {
348: for (i=0; i<n; i++) yv[tslots[i]] = xv[fslots[i]];
349: } else if (addv == ADD_VALUES) {
350: for (i=0; i<n; i++) yv[tslots[i]] += xv[fslots[i]];
351: #if !defined(PETSC_USE_COMPLEX)
352: } else if (addv == MAX_VALUES) {
353: for (i=0; i<n; i++) yv[tslots[i]] = PetscMax(yv[tslots[i]],xv[fslots[i]]);
354: #endif
355: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
356: VecRestoreArrayPair(x,y,&xv,&yv);
357: return(0);
358: }
360: /*
361: Scatter: sequential general to sequential stride 1
362: */
363: PetscErrorCode VecScatterBegin_SGToSS_Stride1(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
364: {
365: VecScatter_Seq_Stride *gen_to = (VecScatter_Seq_Stride*)ctx->todata;
366: VecScatter_Seq_General *gen_from = (VecScatter_Seq_General*)ctx->fromdata;
367: PetscInt i,n = gen_from->n,*fslots = gen_from->vslots;
368: PetscErrorCode ierr;
369: PetscInt first = gen_to->first;
370: PetscScalar *xv,*yv;
371: #if defined(PETSC_HAVE_VECCUDA)
372: PetscBool is_veccuda;
373: #endif
376: #if defined(PETSC_HAVE_VECCUDA)
377: PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
378: if (is_veccuda && x->valid_GPU_array == PETSC_OFFLOAD_GPU) {
379: /* create the scatter indices if not done already */
380: if (!ctx->spptr) {
381: PetscInt tofirst = first,tostep = 1,fromfirst = 0,fromstep = 0;
382: PetscInt *tslots = 0;
383: VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
384: }
385: /* next do the scatter */
386: VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
387: return(0);
388: }
389: #endif
391: VecGetArrayPair(x,y,&xv,&yv);
392: if (mode & SCATTER_REVERSE) {
393: xv += first;
394: if (addv == INSERT_VALUES) {
395: for (i=0; i<n; i++) yv[fslots[i]] = xv[i];
396: } else if (addv == ADD_VALUES) {
397: for (i=0; i<n; i++) yv[fslots[i]] += xv[i];
398: #if !defined(PETSC_USE_COMPLEX)
399: } else if (addv == MAX_VALUES) {
400: for (i=0; i<n; i++) yv[fslots[i]] = PetscMax(yv[fslots[i]],xv[i]);
401: #endif
402: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
403: } else {
404: yv += first;
405: if (addv == INSERT_VALUES) {
406: for (i=0; i<n; i++) yv[i] = xv[fslots[i]];
407: } else if (addv == ADD_VALUES) {
408: for (i=0; i<n; i++) yv[i] += xv[fslots[i]];
409: #if !defined(PETSC_USE_COMPLEX)
410: } else if (addv == MAX_VALUES) {
411: for (i=0; i<n; i++) yv[i] = PetscMax(yv[i],xv[fslots[i]]);
412: #endif
413: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
414: }
415: VecRestoreArrayPair(x,y,&xv,&yv);
416: return(0);
417: }
419: /*
420: Scatter: sequential general to sequential stride
421: */
422: PetscErrorCode VecScatterBegin_SGToSS(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
423: {
424: VecScatter_Seq_Stride *gen_to = (VecScatter_Seq_Stride*)ctx->todata;
425: VecScatter_Seq_General *gen_from = (VecScatter_Seq_General*)ctx->fromdata;
426: PetscInt i,n = gen_from->n,*fslots = gen_from->vslots;
427: PetscErrorCode ierr;
428: PetscInt first = gen_to->first,step = gen_to->step;
429: PetscScalar *xv,*yv;
430: #if defined(PETSC_HAVE_VECCUDA)
431: PetscBool is_veccuda;
432: #endif
435: #if defined(PETSC_HAVE_VECCUDA)
436: PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
437: if (is_veccuda && x->valid_GPU_array == PETSC_OFFLOAD_GPU) {
438: /* create the scatter indices if not done already */
439: if (!ctx->spptr) {
440: PetscInt tofirst = first,tostep = step,fromfirst = 0,fromstep = 0;
441: PetscInt * tslots = 0;
442: VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
443: }
444: /* next do the scatter */
445: VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
446: return(0);
447: }
448: #endif
450: VecGetArrayPair(x,y,&xv,&yv);
451: if (mode & SCATTER_REVERSE) {
452: if (addv == INSERT_VALUES) {
453: for (i=0; i<n; i++) yv[fslots[i]] = xv[first + i*step];
454: } else if (addv == ADD_VALUES) {
455: for (i=0; i<n; i++) yv[fslots[i]] += xv[first + i*step];
456: #if !defined(PETSC_USE_COMPLEX)
457: } else if (addv == MAX_VALUES) {
458: for (i=0; i<n; i++) yv[fslots[i]] = PetscMax(yv[fslots[i]],xv[first + i*step]);
459: #endif
460: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
461: } else {
462: if (addv == INSERT_VALUES) {
463: for (i=0; i<n; i++) yv[first + i*step] = xv[fslots[i]];
464: } else if (addv == ADD_VALUES) {
465: for (i=0; i<n; i++) yv[first + i*step] += xv[fslots[i]];
466: #if !defined(PETSC_USE_COMPLEX)
467: } else if (addv == MAX_VALUES) {
468: for (i=0; i<n; i++) yv[first + i*step] = PetscMax(yv[first + i*step],xv[fslots[i]]);
469: #endif
470: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
471: }
472: VecRestoreArrayPair(x,y,&xv,&yv);
473: return(0);
474: }
476: /*
477: Scatter: sequential stride 1 to sequential general
478: */
479: PetscErrorCode VecScatterBegin_SSToSG_Stride1(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
480: {
481: VecScatter_Seq_Stride *gen_from = (VecScatter_Seq_Stride*)ctx->fromdata;
482: VecScatter_Seq_General *gen_to = (VecScatter_Seq_General*)ctx->todata;
483: PetscInt i,n = gen_from->n,*tslots = gen_to->vslots;
484: PetscErrorCode ierr;
485: PetscInt first = gen_from->first;
486: PetscScalar *xv,*yv;
487: #if defined(PETSC_HAVE_VECCUDA)
488: PetscBool is_veccuda;
489: #endif
492: #if defined(PETSC_HAVE_VECCUDA)
493: PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
494: if (is_veccuda && x->valid_GPU_array == PETSC_OFFLOAD_GPU) {
495: /* create the scatter indices if not done already */
496: if (!ctx->spptr) {
497: PetscInt tofirst = 0,tostep = 0,fromfirst = first,fromstep = 1;
498: PetscInt *fslots = 0;
499: VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
500: }
501: /* next do the scatter */
502: VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
503: return(0);
504: }
505: #endif
507: VecGetArrayPair(x,y,&xv,&yv);
508: if (mode & SCATTER_REVERSE) {
509: yv += first;
510: if (addv == INSERT_VALUES) {
511: for (i=0; i<n; i++) yv[i] = xv[tslots[i]];
512: } else if (addv == ADD_VALUES) {
513: for (i=0; i<n; i++) yv[i] += xv[tslots[i]];
514: #if !defined(PETSC_USE_COMPLEX)
515: } else if (addv == MAX_VALUES) {
516: for (i=0; i<n; i++) yv[i] = PetscMax(yv[i],xv[tslots[i]]);
517: #endif
518: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
519: } else {
520: xv += first;
521: if (addv == INSERT_VALUES) {
522: for (i=0; i<n; i++) yv[tslots[i]] = xv[i];
523: } else if (addv == ADD_VALUES) {
524: for (i=0; i<n; i++) yv[tslots[i]] += xv[i];
525: #if !defined(PETSC_USE_COMPLEX)
526: } else if (addv == MAX_VALUES) {
527: for (i=0; i<n; i++) yv[tslots[i]] = PetscMax(yv[tslots[i]],xv[i]);
528: #endif
529: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
530: }
531: VecRestoreArrayPair(x,y,&xv,&yv);
532: return(0);
533: }
535: /*
536: Scatter: sequential stride to sequential general
537: */
538: PetscErrorCode VecScatterBegin_SSToSG(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
539: {
540: VecScatter_Seq_Stride *gen_from = (VecScatter_Seq_Stride*)ctx->fromdata;
541: VecScatter_Seq_General *gen_to = (VecScatter_Seq_General*)ctx->todata;
542: PetscInt i,n = gen_from->n,*tslots = gen_to->vslots;
543: PetscErrorCode ierr;
544: PetscInt first = gen_from->first,step = gen_from->step;
545: PetscScalar *xv,*yv;
546: #if defined(PETSC_HAVE_VECCUDA)
547: PetscBool is_veccuda;
548: #endif
551: #if defined(PETSC_HAVE_VECCUDA)
552: PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
553: if (is_veccuda && x->valid_GPU_array == PETSC_OFFLOAD_GPU) {
554: /* create the scatter indices if not done already */
555: if (!ctx->spptr) {
556: PetscInt tofirst = 0,tostep = 0,fromfirst = first,fromstep = step;
557: PetscInt *fslots = 0;
558: VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
559: }
560: /* next do the scatter */
561: VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
562: return(0);
563: }
564: #endif
566: VecGetArrayPair(x,y,&xv,&yv);
567: if (mode & SCATTER_REVERSE) {
568: if (addv == INSERT_VALUES) {
569: for (i=0; i<n; i++) yv[first + i*step] = xv[tslots[i]];
570: } else if (addv == ADD_VALUES) {
571: for (i=0; i<n; i++) yv[first + i*step] += xv[tslots[i]];
572: #if !defined(PETSC_USE_COMPLEX)
573: } else if (addv == MAX_VALUES) {
574: for (i=0; i<n; i++) yv[first + i*step] = PetscMax(yv[first + i*step],xv[tslots[i]]);
575: #endif
576: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
577: } else {
578: if (addv == INSERT_VALUES) {
579: for (i=0; i<n; i++) yv[tslots[i]] = xv[first + i*step];
580: } else if (addv == ADD_VALUES) {
581: for (i=0; i<n; i++) yv[tslots[i]] += xv[first + i*step];
582: #if !defined(PETSC_USE_COMPLEX)
583: } else if (addv == MAX_VALUES) {
584: for (i=0; i<n; i++) yv[tslots[i]] = PetscMax(yv[tslots[i]],xv[first + i*step]);
585: #endif
586: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
587: }
588: VecRestoreArrayPair(x,y,&xv,&yv);
589: return(0);
590: }
592: PetscErrorCode VecScatterView_SSToSG(VecScatter in,PetscViewer viewer)
593: {
594: PetscErrorCode ierr;
595: VecScatter_Seq_Stride *in_from = (VecScatter_Seq_Stride*)in->fromdata;
596: VecScatter_Seq_General *in_to = (VecScatter_Seq_General*)in->todata;
597: PetscInt i;
598: PetscBool isascii;
601: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
602: if (isascii) {
603: PetscViewerASCIIPrintf(viewer,"Sequential stride to general scatter\n");
604: for (i=0; i<in_to->n; i++) {
605: PetscViewerASCIIPrintf(viewer,"%D to %D\n",in_from->first + in_from->step*i,in_to->vslots[i]);
606: }
607: }
608: return(0);
609: }
610: /*
611: Scatter: sequential stride to sequential stride
612: */
613: PetscErrorCode VecScatterBegin_SSToSS(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
614: {
615: VecScatter_Seq_Stride *gen_to = (VecScatter_Seq_Stride*)ctx->todata;
616: VecScatter_Seq_Stride *gen_from = (VecScatter_Seq_Stride*)ctx->fromdata;
617: PetscInt i,n = gen_from->n,to_first = gen_to->first,to_step = gen_to->step;
618: PetscErrorCode ierr;
619: PetscInt from_first = gen_from->first,from_step = gen_from->step;
620: PetscScalar *xv,*yv;
621: #if defined(PETSC_HAVE_VECCUDA)
622: PetscBool is_veccuda;
623: #endif
626: #if defined(PETSC_HAVE_VECCUDA)
627: PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
628: if (is_veccuda && x->valid_GPU_array == PETSC_OFFLOAD_GPU) {
629: /* create the scatter indices if not done already */
630: if (!ctx->spptr) {
631: PetscInt *tslots = 0,*fslots = 0;
632: VecScatterCUDAIndicesCreate_StoS(n,to_first,from_first,to_step,from_step,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
633: }
634: /* next do the scatter */
635: VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
636: return(0);
637: }
638: #endif
640: VecGetArrayPair(x,y,&xv,&yv);
641: if (mode & SCATTER_REVERSE) {
642: from_first = gen_to->first;
643: to_first = gen_from->first;
644: from_step = gen_to->step;
645: to_step = gen_from->step;
646: }
648: if (addv == INSERT_VALUES) {
649: if (to_step == 1 && from_step == 1) {
650: PetscMemcpy(yv+to_first,xv+from_first,n*sizeof(PetscScalar));
651: } else {
652: for (i=0; i<n; i++) yv[to_first + i*to_step] = xv[from_first+i*from_step];
653: }
654: } else if (addv == ADD_VALUES) {
655: if (to_step == 1 && from_step == 1) {
656: yv += to_first; xv += from_first;
657: for (i=0; i<n; i++) yv[i] += xv[i];
658: } else {
659: for (i=0; i<n; i++) yv[to_first + i*to_step] += xv[from_first+i*from_step];
660: }
661: #if !defined(PETSC_USE_COMPLEX)
662: } else if (addv == MAX_VALUES) {
663: if (to_step == 1 && from_step == 1) {
664: yv += to_first; xv += from_first;
665: for (i=0; i<n; i++) yv[i] = PetscMax(yv[i],xv[i]);
666: } else {
667: for (i=0; i<n; i++) yv[to_first + i*to_step] = PetscMax(yv[to_first + i*to_step],xv[from_first+i*from_step]);
668: }
669: #endif
670: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
671: VecRestoreArrayPair(x,y,&xv,&yv);
672: return(0);
673: }
675: /* --------------------------------------------------------------------------*/
678: PetscErrorCode VecScatterCopy_SGToSG(VecScatter in,VecScatter out)
679: {
680: PetscErrorCode ierr;
681: VecScatter_Seq_General *in_to = (VecScatter_Seq_General*)in->todata,*out_to = NULL;
682: VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata,*out_from = NULL;
685: out->ops->begin = in->ops->begin;
686: out->ops->end = in->ops->end;
687: out->ops->copy = in->ops->copy;
688: out->ops->destroy = in->ops->destroy;
689: out->ops->view = in->ops->view;
691: PetscMalloc2(1,&out_to,1,&out_from);
692: PetscMalloc2(in_to->n,&out_to->vslots,in_from->n,&out_from->vslots);
693: out_to->n = in_to->n;
694: out_to->format = in_to->format;
695: out_to->nonmatching_computed = PETSC_FALSE;
696: out_to->slots_nonmatching = 0;
697: out_to->is_copy = PETSC_FALSE;
698: PetscMemcpy(out_to->vslots,in_to->vslots,(out_to->n)*sizeof(PetscInt));
700: out_from->n = in_from->n;
701: out_from->format = in_from->format;
702: out_from->nonmatching_computed = PETSC_FALSE;
703: out_from->slots_nonmatching = 0;
704: out_from->is_copy = PETSC_FALSE;
705: PetscMemcpy(out_from->vslots,in_from->vslots,(out_from->n)*sizeof(PetscInt));
707: out->todata = (void*)out_to;
708: out->fromdata = (void*)out_from;
709: return(0);
710: }
712: PetscErrorCode VecScatterView_SGToSG(VecScatter in,PetscViewer viewer)
713: {
714: PetscErrorCode ierr;
715: VecScatter_Seq_General *in_to = (VecScatter_Seq_General*)in->todata;
716: VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata;
717: PetscInt i;
718: PetscBool isascii;
721: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
722: if (isascii) {
723: PetscViewerASCIIPrintf(viewer,"Sequential general scatter\n");
724: for (i=0; i<in_to->n; i++) {
725: PetscViewerASCIIPrintf(viewer,"%D to %D\n",in_from->vslots[i],in_to->vslots[i]);
726: }
727: }
728: return(0);
729: }
732: PetscErrorCode VecScatterCopy_SGToSS(VecScatter in,VecScatter out)
733: {
734: PetscErrorCode ierr;
735: VecScatter_Seq_Stride *in_to = (VecScatter_Seq_Stride*)in->todata,*out_to = NULL;
736: VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata,*out_from = NULL;
739: out->ops->begin = in->ops->begin;
740: out->ops->end = in->ops->end;
741: out->ops->copy = in->ops->copy;
742: out->ops->destroy = in->ops->destroy;
743: out->ops->view = in->ops->view;
745: PetscMalloc2(1,&out_to,1,&out_from);
746: PetscMalloc1(in_from->n,&out_from->vslots);
747: out_to->n = in_to->n;
748: out_to->format = in_to->format;
749: out_to->first = in_to->first;
750: out_to->step = in_to->step;
751: out_to->format = in_to->format;
753: out_from->n = in_from->n;
754: out_from->format = in_from->format;
755: out_from->nonmatching_computed = PETSC_FALSE;
756: out_from->slots_nonmatching = 0;
757: out_from->is_copy = PETSC_FALSE;
758: PetscMemcpy(out_from->vslots,in_from->vslots,(out_from->n)*sizeof(PetscInt));
760: out->todata = (void*)out_to;
761: out->fromdata = (void*)out_from;
762: return(0);
763: }
765: PetscErrorCode VecScatterView_SGToSS(VecScatter in,PetscViewer viewer)
766: {
767: PetscErrorCode ierr;
768: VecScatter_Seq_Stride *in_to = (VecScatter_Seq_Stride*)in->todata;
769: VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata;
770: PetscInt i;
771: PetscBool isascii;
774: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
775: if (isascii) {
776: PetscViewerASCIIPrintf(viewer,"Sequential general scatter to stride\n");
777: for (i=0; i<in_to->n; i++) {
778: PetscViewerASCIIPrintf(viewer,"%D to %D\n",in_from->vslots[i],in_to->first + in_to->step*i);
779: }
780: }
781: return(0);
782: }
784: /* --------------------------------------------------------------------------*/
785: /*
786: Scatter: parallel to sequential vector, sequential strides for both.
787: */
788: PetscErrorCode VecScatterCopy_SSToSS(VecScatter in,VecScatter out)
789: {
790: VecScatter_Seq_Stride *in_to = (VecScatter_Seq_Stride*)in->todata,*out_to = NULL;
791: VecScatter_Seq_Stride *in_from = (VecScatter_Seq_Stride*)in->fromdata,*out_from = NULL;
792: PetscErrorCode ierr;
795: out->ops->begin = in->ops->begin;
796: out->ops->end = in->ops->end;
797: out->ops->copy = in->ops->copy;
798: out->ops->destroy = in->ops->destroy;
799: out->ops->view = in->ops->view;
801: PetscMalloc2(1,&out_to,1,&out_from);
802: out_to->n = in_to->n;
803: out_to->format = in_to->format;
804: out_to->first = in_to->first;
805: out_to->step = in_to->step;
806: out_to->format = in_to->format;
807: out_from->n = in_from->n;
808: out_from->format= in_from->format;
809: out_from->first = in_from->first;
810: out_from->step = in_from->step;
811: out_from->format= in_from->format;
812: out->todata = (void*)out_to;
813: out->fromdata = (void*)out_from;
814: return(0);
815: }
817: PetscErrorCode VecScatterView_SSToSS(VecScatter in,PetscViewer viewer)
818: {
819: VecScatter_Seq_Stride *in_to = (VecScatter_Seq_Stride*)in->todata;
820: VecScatter_Seq_Stride *in_from = (VecScatter_Seq_Stride*)in->fromdata;
821: PetscErrorCode ierr;
822: PetscBool isascii;
825: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
826: if (isascii) {
827: PetscViewerASCIIPrintf(viewer,"Sequential stride count %D start %D step to start %D stride %D\n",in_to->n,in_to->first,in_to->step,in_from->first,in_from->step);
828: }
829: return(0);
830: }
833: #if defined(PETSC_HAVE_MPI_WIN_CREATE_FEATURE)
834: extern PetscErrorCode VecScatterCreateLocal_PtoS_MPI3(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter);
835: extern PetscErrorCode VecScatterCreateLocal_PtoP_MPI3(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter);
836: extern PetscErrorCode VecScatterCreateLocal_StoP_MPI3(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter);
837: #endif
839: extern PetscErrorCode VecScatterCreateLocal_PtoS_MPI1(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter);
840: extern PetscErrorCode VecScatterCreateLocal_PtoP_MPI1(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter);
841: extern PetscErrorCode VecScatterCreateLocal_StoP_MPI1(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter);
843: static PetscErrorCode GetInputISType_private(VecScatter,PetscInt,PetscInt,PetscInt*,IS*,PetscInt*,IS*);
845: /* =======================================================================*/
846: #define VEC_SEQ_ID 0
847: #define VEC_MPI_ID 1
848: #define IS_GENERAL_ID 0
849: #define IS_STRIDE_ID 1
850: #define IS_BLOCK_ID 2
852: /*
853: Blocksizes we have optimized scatters for
854: */
856: #define VecScatterOptimizedBS(mbs) (2 <= mbs)
859: PetscErrorCode VecScatterCreateEmpty(MPI_Comm comm,VecScatter *newctx)
860: {
861: VecScatter ctx;
865: PetscHeaderCreate(ctx,VEC_SCATTER_CLASSID,"VecScatter","VecScatter","Vec",comm,VecScatterDestroy,VecScatterView);
866: ctx->inuse = PETSC_FALSE;
867: ctx->beginandendtogether = PETSC_FALSE;
868: PetscOptionsGetBool(NULL,NULL,"-vecscatter_merge",&ctx->beginandendtogether,NULL);
869: if (ctx->beginandendtogether) {
870: PetscInfo(ctx,"Using combined (merged) vector scatter begin and end\n");
871: }
872: ctx->packtogether = PETSC_FALSE;
873: PetscOptionsGetBool(NULL,NULL,"-vecscatter_packtogether",&ctx->packtogether,NULL);
874: if (ctx->packtogether) {
875: PetscInfo(ctx,"Pack all messages before sending\n");
876: }
877: *newctx = ctx;
878: return(0);
879: }
881: /* -------------------------------------------------- */
882: PetscErrorCode VecScatterCreate_Seq(VecScatter ctx)
883: {
884: PetscErrorCode ierr;
885: PetscInt ix_type=-1,iy_type=-1;
886: IS tix=NULL,tiy=NULL,ix=ctx->from_is,iy=ctx->to_is;
887: Vec xin=ctx->from_v;
890: PetscObjectChangeTypeName((PetscObject)ctx,VECSCATTERSEQ);
891: GetInputISType_private(ctx,VEC_SEQ_ID,VEC_SEQ_ID,&ix_type,&tix,&iy_type,&tiy);
892: if (tix) ix = tix;
893: if (tiy) iy = tiy;
895: if (ix_type == IS_GENERAL_ID && iy_type == IS_GENERAL_ID) {
896: PetscInt nx,ny;
897: const PetscInt *idx,*idy;
898: VecScatter_Seq_General *to = NULL,*from = NULL;
900: ISGetLocalSize(ix,&nx);
901: ISGetLocalSize(iy,&ny);
902: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
903: ISGetIndices(ix,&idx);
904: ISGetIndices(iy,&idy);
905: PetscMalloc2(1,&to,1,&from);
906: PetscMalloc2(nx,&to->vslots,nx,&from->vslots);
907: to->n = nx;
908: #if defined(PETSC_USE_DEBUG)
909: VecScatterCheckIndices_Private(ctx->to_n,ny,idy);
910: #endif
911: PetscMemcpy(to->vslots,idy,nx*sizeof(PetscInt));
912: from->n = nx;
913: #if defined(PETSC_USE_DEBUG)
914: VecScatterCheckIndices_Private(ctx->from_n,nx,idx);
915: #endif
916: PetscMemcpy(from->vslots,idx,nx*sizeof(PetscInt));
917: to->format = VEC_SCATTER_SEQ_GENERAL;
918: from->format = VEC_SCATTER_SEQ_GENERAL;
919: ctx->todata = (void*)to;
920: ctx->fromdata = (void*)from;
921: ctx->ops->begin = VecScatterBegin_SGToSG;
922: ctx->ops->end = 0;
923: ctx->ops->destroy = VecScatterDestroy_SGToSG;
924: ctx->ops->copy = VecScatterCopy_SGToSG;
925: ctx->ops->view = VecScatterView_SGToSG;
926: PetscInfo(xin,"Special case: sequential vector general scatter\n");
927: goto functionend;
928: } else if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
929: PetscInt nx,ny,to_first,to_step,from_first,from_step;
930: VecScatter_Seq_Stride *from8 = NULL,*to8 = NULL;
932: ISGetLocalSize(ix,&nx);
933: ISGetLocalSize(iy,&ny);
934: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
935: ISStrideGetInfo(iy,&to_first,&to_step);
936: ISStrideGetInfo(ix,&from_first,&from_step);
937: PetscMalloc2(1,&to8,1,&from8);
938: to8->n = nx;
939: to8->first = to_first;
940: to8->step = to_step;
941: from8->n = nx;
942: from8->first = from_first;
943: from8->step = from_step;
944: to8->format = VEC_SCATTER_SEQ_STRIDE;
945: from8->format = VEC_SCATTER_SEQ_STRIDE;
946: ctx->todata = (void*)to8;
947: ctx->fromdata = (void*)from8;
948: ctx->ops->begin = VecScatterBegin_SSToSS;
949: ctx->ops->end = 0;
950: ctx->ops->destroy = VecScatterDestroy_SSToSS;
951: ctx->ops->copy = VecScatterCopy_SSToSS;
952: ctx->ops->view = VecScatterView_SSToSS;
953: PetscInfo(xin,"Special case: sequential vector stride to stride\n");
954: goto functionend;
955: } else if (ix_type == IS_GENERAL_ID && iy_type == IS_STRIDE_ID) {
956: PetscInt nx,ny,first,step;
957: const PetscInt *idx;
958: VecScatter_Seq_General *from9 = NULL;
959: VecScatter_Seq_Stride *to9 = NULL;
961: ISGetLocalSize(ix,&nx);
962: ISGetIndices(ix,&idx);
963: ISGetLocalSize(iy,&ny);
964: ISStrideGetInfo(iy,&first,&step);
965: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
966: PetscMalloc2(1,&to9,1,&from9);
967: PetscMalloc1(nx,&from9->vslots);
968: to9->n = nx;
969: to9->first = first;
970: to9->step = step;
971: from9->n = nx;
972: #if defined(PETSC_USE_DEBUG)
973: VecScatterCheckIndices_Private(ctx->from_n,nx,idx);
974: #endif
975: PetscMemcpy(from9->vslots,idx,nx*sizeof(PetscInt));
976: ctx->todata = (void*)to9; ctx->fromdata = (void*)from9;
977: if (step == 1) ctx->ops->begin = VecScatterBegin_SGToSS_Stride1;
978: else ctx->ops->begin = VecScatterBegin_SGToSS;
979: ctx->ops->destroy = VecScatterDestroy_SGToSS;
980: ctx->ops->end = 0;
981: ctx->ops->copy = VecScatterCopy_SGToSS;
982: ctx->ops->view = VecScatterView_SGToSS;
983: to9->format = VEC_SCATTER_SEQ_STRIDE;
984: from9->format = VEC_SCATTER_SEQ_GENERAL;
985: PetscInfo(xin,"Special case: sequential vector general to stride\n");
986: goto functionend;
987: } else if (ix_type == IS_STRIDE_ID && iy_type == IS_GENERAL_ID) {
988: PetscInt nx,ny,first,step;
989: const PetscInt *idy;
990: VecScatter_Seq_General *to10 = NULL;
991: VecScatter_Seq_Stride *from10 = NULL;
993: ISGetLocalSize(ix,&nx);
994: ISGetIndices(iy,&idy);
995: ISGetLocalSize(iy,&ny);
996: ISStrideGetInfo(ix,&first,&step);
997: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
998: PetscMalloc2(1,&to10,1,&from10);
999: PetscMalloc1(nx,&to10->vslots);
1000: from10->n = nx;
1001: from10->first = first;
1002: from10->step = step;
1003: to10->n = nx;
1004: #if defined(PETSC_USE_DEBUG)
1005: VecScatterCheckIndices_Private(ctx->to_n,ny,idy);
1006: #endif
1007: PetscMemcpy(to10->vslots,idy,nx*sizeof(PetscInt));
1008: ctx->todata = (void*)to10;
1009: ctx->fromdata = (void*)from10;
1010: if (step == 1) ctx->ops->begin = VecScatterBegin_SSToSG_Stride1;
1011: else ctx->ops->begin = VecScatterBegin_SSToSG;
1012: ctx->ops->destroy = VecScatterDestroy_SSToSG;
1013: ctx->ops->end = 0;
1014: ctx->ops->copy = 0;
1015: ctx->ops->view = VecScatterView_SSToSG;
1016: to10->format = VEC_SCATTER_SEQ_GENERAL;
1017: from10->format = VEC_SCATTER_SEQ_STRIDE;
1018: PetscInfo(xin,"Special case: sequential vector stride to general\n");
1019: goto functionend;
1020: } else {
1021: PetscInt nx,ny;
1022: const PetscInt *idx,*idy;
1023: VecScatter_Seq_General *to11 = NULL,*from11 = NULL;
1024: PetscBool idnx,idny;
1026: ISGetLocalSize(ix,&nx);
1027: ISGetLocalSize(iy,&ny);
1028: if (nx != ny) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match, in %D out %D",nx,ny);
1030: ISIdentity(ix,&idnx);
1031: ISIdentity(iy,&idny);
1032: if (idnx && idny) {
1033: VecScatter_Seq_Stride *to13 = NULL,*from13 = NULL;
1034: PetscMalloc2(1,&to13,1,&from13);
1035: to13->n = nx;
1036: to13->first = 0;
1037: to13->step = 1;
1038: from13->n = nx;
1039: from13->first = 0;
1040: from13->step = 1;
1041: to13->format = VEC_SCATTER_SEQ_STRIDE;
1042: from13->format = VEC_SCATTER_SEQ_STRIDE;
1043: ctx->todata = (void*)to13;
1044: ctx->fromdata = (void*)from13;
1045: ctx->ops->begin = VecScatterBegin_SSToSS;
1046: ctx->ops->end = 0;
1047: ctx->ops->destroy = VecScatterDestroy_SSToSS;
1048: ctx->ops->copy = VecScatterCopy_SSToSS;
1049: ctx->ops->view = VecScatterView_SSToSS;
1050: PetscInfo(xin,"Special case: sequential copy\n");
1051: goto functionend;
1052: }
1054: ISGetIndices(iy,&idy);
1055: ISGetIndices(ix,&idx);
1056: PetscMalloc2(1,&to11,1,&from11);
1057: PetscMalloc2(nx,&to11->vslots,nx,&from11->vslots);
1058: to11->n = nx;
1059: #if defined(PETSC_USE_DEBUG)
1060: VecScatterCheckIndices_Private(ctx->to_n,ny,idy);
1061: #endif
1062: PetscMemcpy(to11->vslots,idy,nx*sizeof(PetscInt));
1063: from11->n = nx;
1064: #if defined(PETSC_USE_DEBUG)
1065: VecScatterCheckIndices_Private(ctx->from_n,nx,idx);
1066: #endif
1067: PetscMemcpy(from11->vslots,idx,nx*sizeof(PetscInt));
1068: to11->format = VEC_SCATTER_SEQ_GENERAL;
1069: from11->format = VEC_SCATTER_SEQ_GENERAL;
1070: ctx->todata = (void*)to11;
1071: ctx->fromdata = (void*)from11;
1072: ctx->ops->begin = VecScatterBegin_SGToSG;
1073: ctx->ops->end = 0;
1074: ctx->ops->destroy = VecScatterDestroy_SGToSG;
1075: ctx->ops->copy = VecScatterCopy_SGToSG;
1076: ctx->ops->view = VecScatterView_SGToSG;
1077: ISRestoreIndices(ix,&idx);
1078: ISRestoreIndices(iy,&idy);
1079: PetscInfo(xin,"Sequential vector scatter with block indices\n");
1080: goto functionend;
1081: }
1082: functionend:
1083: ISDestroy(&tix);
1084: ISDestroy(&tiy);
1085: VecScatterViewFromOptions(ctx,NULL,"-vecscatter_view");
1086: return(0);
1087: }
1089: static PetscErrorCode VecScatterCreate_PtoS(VecScatter ctx)
1090: {
1091: PetscErrorCode ierr;
1092: PetscMPIInt size;
1093: PetscInt ix_type=-1,iy_type=-1,*range;
1094: MPI_Comm comm;
1095: IS tix=NULL,tiy=NULL,ix=ctx->from_is,iy=ctx->to_is;
1096: Vec xin=ctx->from_v,yin=ctx->to_v;
1097: VecScatterType type;
1098: PetscBool vec_mpi1_flg;
1099: PetscBool totalv,ixblock,iyblock,iystride,islocal,cando;
1102: PetscObjectGetComm((PetscObject)ctx,&comm);
1103: GetInputISType_private(ctx,VEC_MPI_ID,VEC_SEQ_ID,&ix_type,&tix,&iy_type,&tiy);
1104: if (tix) ix = tix;
1105: if (tiy) iy = tiy;
1107: VecScatterGetType(ctx,&type);
1108: PetscStrcmp(type,"mpi1",&vec_mpi1_flg);
1110: islocal = PETSC_FALSE;
1111: /* special case extracting (subset of) local portion */
1112: if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
1113: /* Case (2-a) */
1114: PetscInt nx,ny,to_first,to_step,from_first,from_step;
1115: PetscInt start,end,min,max;
1116: VecScatter_Seq_Stride *from12 = NULL,*to12 = NULL;
1118: VecGetOwnershipRange(xin,&start,&end);
1119: ISGetLocalSize(ix,&nx);
1120: ISStrideGetInfo(ix,&from_first,&from_step);
1121: ISGetLocalSize(iy,&ny);
1122: ISStrideGetInfo(iy,&to_first,&to_step);
1123: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1124: ISGetMinMax(ix,&min,&max);
1125: if (min >= start && max < end) islocal = PETSC_TRUE;
1126: else islocal = PETSC_FALSE;
1127: /* cannot use MPIU_Allreduce() since this call matches with the MPI_Allreduce() in the else statement below */
1128: MPI_Allreduce(&islocal,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
1129: if (cando) {
1130: PetscMalloc2(1,&to12,1,&from12);
1131: to12->n = nx;
1132: to12->first = to_first;
1133: to12->step = to_step;
1134: from12->n = nx;
1135: from12->first = from_first-start;
1136: from12->step = from_step;
1137: to12->format = VEC_SCATTER_SEQ_STRIDE;
1138: from12->format = VEC_SCATTER_SEQ_STRIDE;
1139: ctx->todata = (void*)to12;
1140: ctx->fromdata = (void*)from12;
1141: ctx->ops->begin = VecScatterBegin_SSToSS;
1142: ctx->ops->end = 0;
1143: ctx->ops->destroy = VecScatterDestroy_SSToSS;
1144: ctx->ops->copy = VecScatterCopy_SSToSS;
1145: ctx->ops->view = VecScatterView_SSToSS;
1146: PetscInfo(xin,"Special case: processors only getting local values\n");
1147: goto functionend;
1148: }
1149: } else {
1150: MPI_Allreduce(&islocal,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
1151: }
1153: /* test for special case of all processors getting entire vector */
1154: /* contains check that PetscMPIInt can handle the sizes needed */
1155: totalv = PETSC_FALSE;
1156: if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
1157: /* Case (2-b) */
1158: PetscInt i,nx,ny,to_first,to_step,from_first,from_step,N;
1159: PetscMPIInt *count = NULL,*displx;
1160: VecScatter_MPI_ToAll *sto = NULL;
1162: ISGetLocalSize(ix,&nx);
1163: ISStrideGetInfo(ix,&from_first,&from_step);
1164: ISGetLocalSize(iy,&ny);
1165: ISStrideGetInfo(iy,&to_first,&to_step);
1166: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1167: VecGetSize(xin,&N);
1168: if (nx != N) totalv = PETSC_FALSE;
1169: else if (from_first == 0 && from_step == 1 && from_first == to_first && from_step == to_step) totalv = PETSC_TRUE;
1170: else totalv = PETSC_FALSE;
1171: /* cannot use MPIU_Allreduce() since this call matches with the MPI_Allreduce() in the else statement below */
1172: MPI_Allreduce(&totalv,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
1174: #if defined(PETSC_USE_64BIT_INDICES)
1175: if (cando && (yin->map->N < PETSC_MPI_INT_MAX)) {
1176: #else
1177: if (cando) {
1178: #endif
1179: MPI_Comm_size(PetscObjectComm((PetscObject)ctx),&size);
1180: PetscMalloc3(1,&sto,size,&count,size,&displx);
1181: range = xin->map->range;
1182: for (i=0; i<size; i++) {
1183: PetscMPIIntCast(range[i+1] - range[i],count+i);
1184: PetscMPIIntCast(range[i],displx+i);
1185: }
1186: sto->count = count;
1187: sto->displx = displx;
1188: sto->work1 = 0;
1189: sto->work2 = 0;
1190: sto->format = VEC_SCATTER_MPI_TOALL;
1191: ctx->todata = (void*)sto;
1192: ctx->fromdata = 0;
1193: ctx->ops->begin = VecScatterBegin_MPI_ToAll;
1194: ctx->ops->end = 0;
1195: ctx->ops->destroy = VecScatterDestroy_MPI_ToAll;
1196: ctx->ops->copy = VecScatterCopy_MPI_ToAll;
1197: ctx->ops->view = VecScatterView_MPI_ToAll;
1198: PetscInfo(xin,"Special case: all processors get entire parallel vector\n");
1199: goto functionend;
1200: }
1201: } else {
1202: MPI_Allreduce(&totalv,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
1203: }
1205: /* test for special case of processor 0 getting entire vector */
1206: /* contains check that PetscMPIInt can handle the sizes needed */
1207: totalv = PETSC_FALSE;
1208: if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
1209: /* Case (2-c) */
1210: PetscInt i,nx,ny,to_first,to_step,from_first,from_step,N;
1211: PetscMPIInt rank,*count = NULL,*displx;
1212: VecScatter_MPI_ToAll *sto = NULL;
1214: PetscObjectGetComm((PetscObject)xin,&comm);
1215: MPI_Comm_rank(comm,&rank);
1216: ISGetLocalSize(ix,&nx);
1217: ISStrideGetInfo(ix,&from_first,&from_step);
1218: ISGetLocalSize(iy,&ny);
1219: ISStrideGetInfo(iy,&to_first,&to_step);
1220: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1221: if (!rank) {
1222: VecGetSize(xin,&N);
1223: if (nx != N) totalv = PETSC_FALSE;
1224: else if (from_first == 0 && from_step == 1 &&
1225: from_first == to_first && from_step == to_step) totalv = PETSC_TRUE;
1226: else totalv = PETSC_FALSE;
1227: } else {
1228: if (!nx) totalv = PETSC_TRUE;
1229: else totalv = PETSC_FALSE;
1230: }
1231: /* cannot use MPIU_Allreduce() since this call matches with the MPI_Allreduce() in the else statement below */
1232: MPI_Allreduce(&totalv,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
1234: #if defined(PETSC_USE_64BIT_INDICES)
1235: if (cando && (yin->map->N < PETSC_MPI_INT_MAX)) {
1236: #else
1237: if (cando) {
1238: #endif
1239: MPI_Comm_size(PetscObjectComm((PetscObject)ctx),&size);
1240: PetscMalloc3(1,&sto,size,&count,size,&displx);
1241: range = xin->map->range;
1242: for (i=0; i<size; i++) {
1243: PetscMPIIntCast(range[i+1] - range[i],count+i);
1244: PetscMPIIntCast(range[i],displx+i);
1245: }
1246: sto->count = count;
1247: sto->displx = displx;
1248: sto->work1 = 0;
1249: sto->work2 = 0;
1250: sto->format = VEC_SCATTER_MPI_TOONE;
1251: ctx->todata = (void*)sto;
1252: ctx->fromdata = 0;
1253: ctx->ops->begin = VecScatterBegin_MPI_ToOne;
1254: ctx->ops->end = 0;
1255: ctx->ops->destroy = VecScatterDestroy_MPI_ToAll;
1256: ctx->ops->copy = VecScatterCopy_MPI_ToAll;
1257: ctx->ops->view = VecScatterView_MPI_ToAll;
1258: PetscInfo(xin,"Special case: processor zero gets entire parallel vector, rest get none\n");
1259: goto functionend;
1260: }
1261: } else {
1262: MPI_Allreduce(&totalv,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
1263: }
1265: /* Case 2-d */
1266: PetscObjectTypeCompare((PetscObject)ix,ISBLOCK,&ixblock);
1267: PetscObjectTypeCompare((PetscObject)iy,ISBLOCK,&iyblock);
1268: PetscObjectTypeCompare((PetscObject)iy,ISSTRIDE,&iystride);
1269: if (ixblock) {
1270: /* special case block to block */
1271: if (iyblock) {
1272: PetscInt nx,ny,bsx,bsy;
1273: const PetscInt *idx,*idy;
1274: ISGetBlockSize(iy,&bsy);
1275: ISGetBlockSize(ix,&bsx);
1276: if (bsx == bsy && VecScatterOptimizedBS(bsx)) {
1277: ISBlockGetLocalSize(ix,&nx);
1278: ISBlockGetIndices(ix,&idx);
1279: ISBlockGetLocalSize(iy,&ny);
1280: ISBlockGetIndices(iy,&idy);
1281: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1282: #if defined(PETSC_HAVE_MPI_WIN_CREATE_FEATURE)
1283: if (vec_mpi1_flg) {
1284: VecScatterCreateLocal_PtoS_MPI1(nx,idx,ny,idy,xin,yin,bsx,ctx);
1285: } else {
1286: VecScatterCreateLocal_PtoS_MPI3(nx,idx,ny,idy,xin,yin,bsx,ctx);
1287: }
1288: #else
1289: VecScatterCreateLocal_PtoS_MPI1(nx,idx,ny,idy,xin,yin,bsx,ctx);
1290: #endif
1291: ISBlockRestoreIndices(ix,&idx);
1292: ISBlockRestoreIndices(iy,&idy);
1293: PetscInfo(xin,"Special case: blocked indices\n");
1294: goto functionend;
1295: }
1296: /* special case block to stride */
1297: } else if (iystride) {
1298: /* Case (2-e) */
1299: PetscInt ystart,ystride,ysize,bsx;
1300: ISStrideGetInfo(iy,&ystart,&ystride);
1301: ISGetLocalSize(iy,&ysize);
1302: ISGetBlockSize(ix,&bsx);
1303: /* see if stride index set is equivalent to block index set */
1304: if (VecScatterOptimizedBS(bsx) && ((ystart % bsx) == 0) && (ystride == 1) && ((ysize % bsx) == 0)) {
1305: PetscInt nx,il,*idy;
1306: const PetscInt *idx;
1307: ISBlockGetLocalSize(ix,&nx);
1308: ISBlockGetIndices(ix,&idx);
1309: if (ysize != bsx*nx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1310: PetscMalloc1(nx,&idy);
1311: if (nx) {
1312: idy[0] = ystart/bsx;
1313: for (il=1; il<nx; il++) idy[il] = idy[il-1] + 1;
1314: }
1315: #if defined(PETSC_HAVE_MPI_WIN_CREATE_FEATURE)
1316: if (vec_mpi1_flg) {
1317: VecScatterCreateLocal_PtoS_MPI1(nx,idx,nx,idy,xin,yin,bsx,ctx);
1318: } else {
1319: VecScatterCreateLocal_PtoS_MPI3(nx,idx,nx,idy,xin,yin,bsx,ctx);
1320: }
1321: #else
1322: VecScatterCreateLocal_PtoS_MPI1(nx,idx,nx,idy,xin,yin,bsx,ctx);
1323: #endif
1324: PetscFree(idy);
1325: ISBlockRestoreIndices(ix,&idx);
1326: PetscInfo(xin,"Special case: blocked indices to stride\n");
1327: goto functionend;
1328: }
1329: }
1330: }
1332: /* left over general case (2-f) */
1333: {
1334: PetscInt nx,ny;
1335: const PetscInt *idx,*idy;
1336: ISGetLocalSize(ix,&nx);
1337: ISGetIndices(ix,&idx);
1338: ISGetLocalSize(iy,&ny);
1339: ISGetIndices(iy,&idy);
1340: if (nx != ny) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match (%D %D)",nx,ny);
1341: #if defined(PETSC_HAVE_MPI_WIN_CREATE_FEATURE)
1342: if (vec_mpi1_flg) {
1343: VecScatterCreateLocal_PtoS_MPI1(nx,idx,ny,idy,xin,yin,1,ctx);
1344: } else {
1345: VecScatterCreateLocal_PtoS_MPI3(nx,idx,ny,idy,xin,yin,1,ctx);
1346: }
1347: #else
1348: VecScatterCreateLocal_PtoS_MPI1(nx,idx,ny,idy,xin,yin,1,ctx);
1349: #endif
1350: ISRestoreIndices(ix,&idx);
1351: ISRestoreIndices(iy,&idy);
1352: PetscInfo(xin,"General case: MPI to Seq\n");
1353: goto functionend;
1354: }
1355: functionend:
1356: ISDestroy(&tix);
1357: ISDestroy(&tiy);
1358: VecScatterViewFromOptions(ctx,NULL,"-vecscatter_view");
1359: return(0);
1360: }
1362: static PetscErrorCode VecScatterCreate_StoP(VecScatter ctx)
1363: {
1364: PetscErrorCode ierr;
1365: PetscInt ix_type=-1,iy_type=-1;
1366: IS tix=NULL,tiy=NULL,ix=ctx->from_is,iy=ctx->to_is;
1367: Vec xin=ctx->from_v,yin=ctx->to_v;
1368: VecScatterType type;
1369: PetscBool vscat_mpi1;
1370: PetscBool islocal,cando;
1373: GetInputISType_private(ctx,VEC_MPI_ID,VEC_SEQ_ID,&ix_type,&tix,&iy_type,&tiy);
1374: if (tix) ix = tix;
1375: if (tiy) iy = tiy;
1377: VecScatterGetType(ctx,&type);
1378: PetscStrcmp(type,"mpi1",&vscat_mpi1);
1380: /* special case local copy portion */
1381: islocal = PETSC_FALSE;
1382: if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
1383: PetscInt nx,ny,to_first,to_step,from_step,start,end,from_first,min,max;
1384: VecScatter_Seq_Stride *from = NULL,*to = NULL;
1386: VecGetOwnershipRange(yin,&start,&end);
1387: ISGetLocalSize(ix,&nx);
1388: ISStrideGetInfo(ix,&from_first,&from_step);
1389: ISGetLocalSize(iy,&ny);
1390: ISStrideGetInfo(iy,&to_first,&to_step);
1391: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1392: ISGetMinMax(iy,&min,&max);
1393: if (min >= start && max < end) islocal = PETSC_TRUE;
1394: else islocal = PETSC_FALSE;
1395: /* cannot use MPIU_Allreduce() since this call matches with the MPI_Allreduce() in the else statement below */
1396: MPI_Allreduce(&islocal,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)yin));
1397: if (cando) {
1398: PetscMalloc2(1,&to,1,&from);
1399: to->n = nx;
1400: to->first = to_first-start;
1401: to->step = to_step;
1402: from->n = nx;
1403: from->first = from_first;
1404: from->step = from_step;
1405: to->format = VEC_SCATTER_SEQ_STRIDE;
1406: from->format = VEC_SCATTER_SEQ_STRIDE;
1407: ctx->todata = (void*)to;
1408: ctx->fromdata = (void*)from;
1409: ctx->ops->begin = VecScatterBegin_SSToSS;
1410: ctx->ops->end = 0;
1411: ctx->ops->destroy = VecScatterDestroy_SSToSS;
1412: ctx->ops->copy = VecScatterCopy_SSToSS;
1413: ctx->ops->view = VecScatterView_SSToSS;
1414: PetscInfo(xin,"Special case: sequential stride to MPI stride\n");
1415: goto functionend;
1416: }
1417: } else {
1418: MPI_Allreduce(&islocal,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)yin));
1419: }
1420: /* special case block to stride */
1421: if (ix_type == IS_BLOCK_ID && iy_type == IS_STRIDE_ID) {
1422: PetscInt ystart,ystride,ysize,bsx;
1423: ISStrideGetInfo(iy,&ystart,&ystride);
1424: ISGetLocalSize(iy,&ysize);
1425: ISGetBlockSize(ix,&bsx);
1426: /* see if stride index set is equivalent to block index set */
1427: if (VecScatterOptimizedBS(bsx) && ((ystart % bsx) == 0) && (ystride == 1) && ((ysize % bsx) == 0)) {
1428: PetscInt nx,il,*idy;
1429: const PetscInt *idx;
1430: ISBlockGetLocalSize(ix,&nx);
1431: ISBlockGetIndices(ix,&idx);
1432: if (ysize != bsx*nx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1433: PetscMalloc1(nx,&idy);
1434: if (nx) {
1435: idy[0] = ystart/bsx;
1436: for (il=1; il<nx; il++) idy[il] = idy[il-1] + 1;
1437: }
1438: if (vscat_mpi1) {
1439: VecScatterCreateLocal_StoP_MPI1(nx,idx,nx,idy,xin,yin,bsx,ctx);
1440: }
1441: #if defined(PETSC_HAVE_MPI_WIN_CREATE_FEATURE)
1442: else {
1443: VecScatterCreateLocal_StoP_MPI3(nx,idx,nx,idy,xin,yin,bsx,ctx);
1444: }
1445: #endif
1446: PetscFree(idy);
1447: ISBlockRestoreIndices(ix,&idx);
1448: PetscInfo(xin,"Special case: Blocked indices to stride\n");
1449: goto functionend;
1450: }
1451: }
1453: /* general case */
1454: {
1455: PetscInt nx,ny;
1456: const PetscInt *idx,*idy;
1457: ISGetLocalSize(ix,&nx);
1458: ISGetIndices(ix,&idx);
1459: ISGetLocalSize(iy,&ny);
1460: ISGetIndices(iy,&idy);
1461: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1462: if (vscat_mpi1) {
1463: VecScatterCreateLocal_StoP_MPI1(nx,idx,ny,idy,xin,yin,1,ctx);
1464: }
1465: #if defined(PETSC_HAVE_MPI_WIN_CREATE_FEATURE)
1466: else {
1467: VecScatterCreateLocal_StoP_MPI3(nx,idx,ny,idy,xin,yin,1,ctx);
1468: }
1469: #endif
1470: ISRestoreIndices(ix,&idx);
1471: ISRestoreIndices(iy,&idy);
1472: PetscInfo(xin,"General case: Seq to MPI\n");
1473: goto functionend;
1474: }
1475: functionend:
1476: ISDestroy(&tix);
1477: ISDestroy(&tiy);
1478: VecScatterViewFromOptions(ctx,NULL,"-vecscatter_view");
1479: return(0);
1480: }
1482: static PetscErrorCode VecScatterCreate_PtoP(VecScatter ctx)
1483: {
1484: PetscErrorCode ierr;
1485: PetscInt ix_type=-1,iy_type=-1;
1486: IS tix=NULL,tiy=NULL,ix=ctx->from_is,iy=ctx->to_is;
1487: Vec xin=ctx->from_v,yin=ctx->to_v;
1488: VecScatterType type;
1489: PetscBool vscat_mpi1;
1490: PetscInt nx,ny;
1491: const PetscInt *idx,*idy;
1494: GetInputISType_private(ctx,VEC_MPI_ID,VEC_MPI_ID,&ix_type,&tix,&iy_type,&tiy);
1495: if (tix) ix = tix;
1496: if (tiy) iy = tiy;
1498: VecScatterGetType(ctx,&type);
1499: PetscStrcmp(type,"mpi1",&vscat_mpi1);
1501: /* no special cases for now */
1502: ISGetLocalSize(ix,&nx);
1503: ISGetIndices(ix,&idx);
1504: ISGetLocalSize(iy,&ny);
1505: ISGetIndices(iy,&idy);
1506: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1507: if (vscat_mpi1) {
1508: VecScatterCreateLocal_PtoP_MPI1(nx,idx,ny,idy,xin,yin,1,ctx);
1509: }
1510: #if defined(PETSC_HAVE_MPI_WIN_CREATE_FEATURE)
1511: else {
1512: VecScatterCreateLocal_PtoP_MPI3(nx,idx,ny,idy,xin,yin,1,ctx);
1513: }
1514: #endif
1515: ISRestoreIndices(ix,&idx);
1516: ISRestoreIndices(iy,&idy);
1517: PetscInfo(xin,"General case: MPI to MPI\n");
1519: ISDestroy(&tix);
1520: ISDestroy(&tiy);
1521: VecScatterViewFromOptions(ctx,NULL,"-vecscatter_view");
1522: return(0);
1523: }
1525: static PetscErrorCode VecScatterGetInputVecType_private(VecScatter ctx,PetscInt *xin_type1,PetscInt *yin_type1)
1526: {
1527: PetscErrorCode ierr;
1528: MPI_Comm comm,ycomm;
1529: PetscMPIInt size;
1530: Vec xin = ctx->from_v,yin = ctx->to_v;
1531: PetscInt xin_type,yin_type;
1534: /*
1535: Determine if the vectors are "parallel", ie. it shares a comm with other processors, or
1536: sequential (it does not share a comm). The difference is that parallel vectors treat the
1537: index set as providing indices in the global parallel numbering of the vector, with
1538: sequential vectors treat the index set as providing indices in the local sequential
1539: numbering
1540: */
1541: PetscObjectGetComm((PetscObject)xin,&comm);
1542: MPI_Comm_size(comm,&size);
1543: if (size > 1) {
1544: xin_type = VEC_MPI_ID;
1545: } else xin_type = VEC_SEQ_ID;
1546: *xin_type1 = xin_type;
1548: PetscObjectGetComm((PetscObject)yin,&ycomm);
1549: MPI_Comm_size(ycomm,&size);
1550: if (size > 1) {
1551: yin_type = VEC_MPI_ID;
1552: } else yin_type = VEC_SEQ_ID;
1553: *yin_type1 = yin_type;
1554: return(0);
1555: }
1557: static PetscErrorCode GetInputISType_private(VecScatter ctx,PetscInt xin_type,PetscInt yin_type,PetscInt *ix_type1,IS *tix1,PetscInt *iy_type1,IS *tiy1)
1558: {
1559: PetscErrorCode ierr;
1560: MPI_Comm comm;
1561: Vec xin = ctx->from_v,yin = ctx->to_v;
1562: IS tix = 0,tiy = 0,ix = ctx->from_is, iy = ctx->to_is;
1563: PetscInt ix_type = IS_GENERAL_ID,iy_type = IS_GENERAL_ID;
1564: PetscBool flag;
1567: PetscObjectGetComm((PetscObject)ctx,&comm);
1569: /* if ix or iy is not included; assume just grabbing entire vector */
1570: if (!ix && xin_type == VEC_SEQ_ID) {
1571: ISCreateStride(comm,ctx->from_n,0,1,&ix);
1572: tix = ix;
1573: } else if (!ix && xin_type == VEC_MPI_ID) {
1574: if (yin_type == VEC_MPI_ID) {
1575: PetscInt ntmp, low;
1576: VecGetLocalSize(xin,&ntmp);
1577: VecGetOwnershipRange(xin,&low,NULL);
1578: ISCreateStride(comm,ntmp,low,1,&ix);
1579: } else {
1580: PetscInt Ntmp;
1581: VecGetSize(xin,&Ntmp);
1582: ISCreateStride(comm,Ntmp,0,1,&ix);
1583: }
1584: tix = ix;
1585: } else if (!ix) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ix not given, but not Seq or MPI vector");
1587: if (!iy && yin_type == VEC_SEQ_ID) {
1588: ISCreateStride(comm,ctx->to_n,0,1,&iy);
1589: tiy = iy;
1590: } else if (!iy && yin_type == VEC_MPI_ID) {
1591: if (xin_type == VEC_MPI_ID) {
1592: PetscInt ntmp, low;
1593: VecGetLocalSize(yin,&ntmp);
1594: VecGetOwnershipRange(yin,&low,NULL);
1595: ISCreateStride(comm,ntmp,low,1,&iy);
1596: } else {
1597: PetscInt Ntmp;
1598: VecGetSize(yin,&Ntmp);
1599: ISCreateStride(comm,Ntmp,0,1,&iy);
1600: }
1601: tiy = iy;
1602: } else if (!iy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"iy not given, but not Seq or MPI vector");
1604: /* Determine types of index sets */
1605: PetscObjectTypeCompare((PetscObject)ix,ISBLOCK,&flag);
1606: if (flag) ix_type = IS_BLOCK_ID;
1607: PetscObjectTypeCompare((PetscObject)iy,ISBLOCK,&flag);
1608: if (flag) iy_type = IS_BLOCK_ID;
1609: PetscObjectTypeCompare((PetscObject)ix,ISSTRIDE,&flag);
1610: if (flag) ix_type = IS_STRIDE_ID;
1611: PetscObjectTypeCompare((PetscObject)iy,ISSTRIDE,&flag);
1612: if (flag) iy_type = IS_STRIDE_ID;
1614: if (ix_type1) *ix_type1 = ix_type;
1615: if (iy_type1) *iy_type1 = iy_type;
1616: if (tix1) *tix1 = tix;
1617: if (tiy1) *tiy1 = tiy;
1618: return(0);
1619: }
1621: static PetscErrorCode VecScatterCreate_vectype_private(VecScatter ctx)
1622: {
1623: PetscErrorCode ierr;
1624: PetscInt xin_type=-1,yin_type=-1;
1627: VecScatterGetInputVecType_private(ctx,&xin_type,&yin_type);
1628: if (xin_type == VEC_MPI_ID && yin_type == VEC_SEQ_ID) {
1629: VecScatterCreate_PtoS(ctx);
1630: } else if (xin_type == VEC_SEQ_ID && yin_type == VEC_MPI_ID) {
1631: VecScatterCreate_StoP(ctx);
1632: } else if (xin_type == VEC_MPI_ID && yin_type == VEC_MPI_ID) {
1633: VecScatterCreate_PtoP(ctx);
1634: }
1635: return(0);
1636: }
1638: PetscErrorCode VecScatterCreate_MPI1(VecScatter ctx)
1639: {
1640: PetscErrorCode ierr;
1643: /* subroutines called in VecScatterCreate_vectype_private() need scatter_type as an input */
1644: PetscObjectChangeTypeName((PetscObject)ctx,VECSCATTERMPI1);
1645: PetscInfo(ctx,"Using MPI1 for vector scatter\n");
1646: VecScatterCreate_vectype_private(ctx);
1647: return(0);
1648: }
1650: PetscErrorCode VecScatterCreate_MPI3(VecScatter ctx)
1651: {
1652: PetscErrorCode ierr;
1655: /* subroutines called in VecScatterCreate_vectype_private() need scatter_type as an input */
1656: PetscObjectChangeTypeName((PetscObject)ctx,VECSCATTERMPI3);
1657: PetscInfo(ctx,"Using MPI3 for vector scatter\n");
1658: VecScatterCreate_vectype_private(ctx);
1659: return(0);
1660: }
1662: PetscErrorCode VecScatterCreate_MPI3Node(VecScatter ctx)
1663: {
1664: PetscErrorCode ierr;
1667: /* subroutines called in VecScatterCreate_vectype_private() need scatter_type as an input */
1668: PetscObjectChangeTypeName((PetscObject)ctx,VECSCATTERMPI3NODE);
1669: PetscInfo(ctx,"Using MPI3NODE for vector scatter\n");
1670: VecScatterCreate_vectype_private(ctx);
1671: return(0);
1672: }
1674: /* ------------------------------------------------------------------*/
1675: /*@
1676: VecScatterGetMerged - Returns true if the scatter is completed in the VecScatterBegin()
1677: and the VecScatterEnd() does nothing
1679: Not Collective
1681: Input Parameter:
1682: . ctx - scatter context created with VecScatterCreate()
1684: Output Parameter:
1685: . flg - PETSC_TRUE if the VecScatterBegin/End() are all done during the VecScatterBegin()
1687: Level: developer
1689: .seealso: VecScatterCreate(), VecScatterEnd(), VecScatterBegin()
1690: @*/
1691: PetscErrorCode VecScatterGetMerged(VecScatter ctx,PetscBool *flg)
1692: {
1695: *flg = ctx->beginandendtogether;
1696: return(0);
1697: }
1699: /*@
1700: VecScatterBegin - Begins a generalized scatter from one vector to
1701: another. Complete the scattering phase with VecScatterEnd().
1703: Neighbor-wise Collective on VecScatter and Vec
1705: Input Parameters:
1706: + inctx - scatter context generated by VecScatterCreate()
1707: . x - the vector from which we scatter
1708: . y - the vector to which we scatter
1709: . addv - either ADD_VALUES or INSERT_VALUES, with INSERT_VALUES mode any location
1710: not scattered to retains its old value; i.e. the vector is NOT first zeroed.
1711: - mode - the scattering mode, usually SCATTER_FORWARD. The available modes are:
1712: SCATTER_FORWARD or SCATTER_REVERSE
1715: Level: intermediate
1717: Options Database: See VecScatterCreate()
1719: Notes:
1720: The vectors x and y need not be the same vectors used in the call
1721: to VecScatterCreate(), but x must have the same parallel data layout
1722: as that passed in as the x to VecScatterCreate(), similarly for the y.
1723: Most likely they have been obtained from VecDuplicate().
1725: You cannot change the values in the input vector between the calls to VecScatterBegin()
1726: and VecScatterEnd().
1728: If you use SCATTER_REVERSE the two arguments x and y should be reversed, from
1729: the SCATTER_FORWARD.
1731: y[iy[i]] = x[ix[i]], for i=0,...,ni-1
1733: This scatter is far more general than the conventional
1734: scatter, since it can be a gather or a scatter or a combination,
1735: depending on the indices ix and iy. If x is a parallel vector and y
1736: is sequential, VecScatterBegin() can serve to gather values to a
1737: single processor. Similarly, if y is parallel and x sequential, the
1738: routine can scatter from one processor to many processors.
1740: Concepts: scatter^between vectors
1741: Concepts: gather^between vectors
1743: .seealso: VecScatterCreate(), VecScatterEnd()
1744: @*/
1745: PetscErrorCode VecScatterBegin(VecScatter inctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
1746: {
1748: #if defined(PETSC_USE_DEBUG)
1749: PetscInt to_n,from_n;
1750: #endif
1755: if (inctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
1757: #if defined(PETSC_USE_DEBUG)
1758: /*
1759: Error checking to make sure these vectors match the vectors used
1760: to create the vector scatter context. -1 in the from_n and to_n indicate the
1761: vector lengths are unknown (for example with mapped scatters) and thus
1762: no error checking is performed.
1763: */
1764: if (inctx->from_n >= 0 && inctx->to_n >= 0) {
1765: VecGetLocalSize(x,&from_n);
1766: VecGetLocalSize(y,&to_n);
1767: if (mode & SCATTER_REVERSE) {
1768: if (to_n != inctx->from_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter reverse and vector to != ctx from size)",to_n,inctx->from_n);
1769: if (from_n != inctx->to_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter reverse and vector from != ctx to size)",from_n,inctx->to_n);
1770: } else {
1771: if (to_n != inctx->to_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter forward and vector to != ctx to size)",to_n,inctx->to_n);
1772: if (from_n != inctx->from_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter forward and vector from != ctx from size)",from_n,inctx->from_n);
1773: }
1774: }
1775: #endif
1777: inctx->inuse = PETSC_TRUE;
1778: PetscLogEventBarrierBegin(VEC_ScatterBarrier,0,0,0,0,PetscObjectComm((PetscObject)inctx));
1779: (*inctx->ops->begin)(inctx,x,y,addv,mode);
1780: if (inctx->beginandendtogether && inctx->ops->end) {
1781: inctx->inuse = PETSC_FALSE;
1782: (*inctx->ops->end)(inctx,x,y,addv,mode);
1783: }
1784: PetscLogEventBarrierEnd(VEC_ScatterBarrier,0,0,0,0,PetscObjectComm((PetscObject)inctx));
1785: return(0);
1786: }
1788: /* --------------------------------------------------------------------*/
1789: /*@
1790: VecScatterEnd - Ends a generalized scatter from one vector to another. Call
1791: after first calling VecScatterBegin().
1793: Neighbor-wise Collective on VecScatter and Vec
1795: Input Parameters:
1796: + ctx - scatter context generated by VecScatterCreate()
1797: . x - the vector from which we scatter
1798: . y - the vector to which we scatter
1799: . addv - either ADD_VALUES or INSERT_VALUES.
1800: - mode - the scattering mode, usually SCATTER_FORWARD. The available modes are:
1801: SCATTER_FORWARD, SCATTER_REVERSE
1803: Level: intermediate
1805: Notes:
1806: If you use SCATTER_REVERSE the arguments x and y should be reversed, from the SCATTER_FORWARD.
1808: y[iy[i]] = x[ix[i]], for i=0,...,ni-1
1810: .seealso: VecScatterBegin(), VecScatterCreate()
1811: @*/
1812: PetscErrorCode VecScatterEnd(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
1813: {
1820: ctx->inuse = PETSC_FALSE;
1821: if (!ctx->ops->end) return(0);
1822: if (!ctx->beginandendtogether) {
1823: PetscLogEventBegin(VEC_ScatterEnd,ctx,x,y,0);
1824: (*(ctx)->ops->end)(ctx,x,y,addv,mode);
1825: PetscLogEventEnd(VEC_ScatterEnd,ctx,x,y,0);
1826: }
1827: return(0);
1828: }
1830: /*@C
1831: VecScatterDestroy - Destroys a scatter context created by
1832: VecScatterCreate().
1834: Collective on VecScatter
1836: Input Parameter:
1837: . ctx - the scatter context
1839: Level: intermediate
1841: .seealso: VecScatterCreate(), VecScatterCopy()
1842: @*/
1843: PetscErrorCode VecScatterDestroy(VecScatter *ctx)
1844: {
1848: if (!*ctx) return(0);
1850: if ((*ctx)->inuse && ((PetscObject)(*ctx))->refct == 1) SETERRQ(((PetscObject)(*ctx))->comm,PETSC_ERR_ARG_WRONGSTATE,"Scatter context is in use");
1851: if (--((PetscObject)(*ctx))->refct > 0) {*ctx = 0; return(0);}
1853: /* if memory was published with SAWs then destroy it */
1854: PetscObjectSAWsViewOff((PetscObject)(*ctx));
1855: if ((*ctx)->ops->destroy) {(*(*ctx)->ops->destroy)(*ctx);}
1856: #if defined(PETSC_HAVE_VECCUDA)
1857: VecScatterCUDAIndicesDestroy((PetscCUDAIndices*)&((*ctx)->spptr));
1858: #endif
1859: PetscHeaderDestroy(ctx);
1860: return(0);
1861: }
1863: /*@
1864: VecScatterCopy - Makes a copy of a scatter context.
1866: Collective on VecScatter
1868: Input Parameter:
1869: . sctx - the scatter context
1871: Output Parameter:
1872: . ctx - the context copy
1874: Level: advanced
1876: .seealso: VecScatterCreate(), VecScatterDestroy()
1877: @*/
1878: PetscErrorCode VecScatterCopy(VecScatter sctx,VecScatter *ctx)
1879: {
1881: VecScatterType type;
1886: if (!sctx->ops->copy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot copy this type");
1887: PetscHeaderCreate(*ctx,VEC_SCATTER_CLASSID,"VecScatter","VecScatter","Vec",PetscObjectComm((PetscObject)sctx),VecScatterDestroy,VecScatterView);
1888: (*ctx)->to_n = sctx->to_n;
1889: (*ctx)->from_n = sctx->from_n;
1890: (*sctx->ops->copy)(sctx,*ctx);
1892: VecScatterGetType(sctx,&type);
1893: PetscObjectChangeTypeName((PetscObject)(*ctx),type);
1894: return(0);
1895: }
1897: /* ------------------------------------------------------------------*/
1898: /*@C
1899: VecScatterView - Views a vector scatter context.
1901: Collective on VecScatter
1903: Input Parameters:
1904: + ctx - the scatter context
1905: - viewer - the viewer for displaying the context
1907: Level: intermediate
1909: @*/
1910: PetscErrorCode VecScatterView(VecScatter ctx,PetscViewer viewer)
1911: {
1916: if (!viewer) {
1917: PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)ctx),&viewer);
1918: }
1920: if (ctx->ops->view) {
1921: (*ctx->ops->view)(ctx,viewer);
1922: }
1923: return(0);
1924: }
1926: /*@C
1927: VecScatterRemap - Remaps the "from" and "to" indices in a
1928: vector scatter context. FOR EXPERTS ONLY!
1930: Collective on VecScatter
1932: Input Parameters:
1933: + scat - vector scatter context
1934: . from - remapping for "from" indices (may be NULL)
1935: - to - remapping for "to" indices (may be NULL)
1937: Level: developer
1939: Notes: In the parallel case the todata is actually the indices
1940: from which the data is TAKEN! The from stuff is where the
1941: data is finally put. This is VERY VERY confusing!
1943: In the sequential case the todata is the indices where the
1944: data is put and the fromdata is where it is taken from.
1945: This is backwards from the paralllel case! CRY! CRY! CRY!
1947: @*/
1948: PetscErrorCode VecScatterRemap(VecScatter scat,PetscInt *rto,PetscInt *rfrom)
1949: {
1950: VecScatter_Seq_General *to,*from;
1951: VecScatter_MPI_General *mto;
1952: PetscInt i;
1959: from = (VecScatter_Seq_General*)scat->fromdata;
1960: mto = (VecScatter_MPI_General*)scat->todata;
1962: if (mto->format == VEC_SCATTER_MPI_TOALL) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Not for to all scatter");
1964: if (rto) {
1965: if (mto->format == VEC_SCATTER_MPI_GENERAL) {
1966: /* handle off processor parts */
1967: for (i=0; i<mto->starts[mto->n]; i++) mto->indices[i] = rto[mto->indices[i]];
1969: /* handle local part */
1970: to = &mto->local;
1971: for (i=0; i<to->n; i++) to->vslots[i] = rto[to->vslots[i]];
1972: } else if (from->format == VEC_SCATTER_SEQ_GENERAL) {
1973: for (i=0; i<from->n; i++) from->vslots[i] = rto[from->vslots[i]];
1974: } else if (from->format == VEC_SCATTER_SEQ_STRIDE) {
1975: VecScatter_Seq_Stride *sto = (VecScatter_Seq_Stride*)from;
1977: /* if the remapping is the identity and stride is identity then skip remap */
1978: if (sto->step == 1 && sto->first == 0) {
1979: for (i=0; i<sto->n; i++) {
1980: if (rto[i] != i) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Unable to remap such scatters");
1981: }
1982: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Unable to remap such scatters");
1983: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Unable to remap such scatters");
1984: }
1986: if (rfrom) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unable to remap the FROM in scatters yet");
1988: /*
1989: Mark then vector lengths as unknown because we do not know the
1990: lengths of the remapped vectors
1991: */
1992: scat->from_n = -1;
1993: scat->to_n = -1;
1994: return(0);
1995: }
1997: /*
1998: VecScatterGetTypes_Private - Returns the scatter types.
2000: scatter - The scatter.
2001: from - Upon exit this contains the type of the from scatter.
2002: to - Upon exit this contains the type of the to scatter.
2003: */
2004: PetscErrorCode VecScatterGetTypes_Private(VecScatter scatter,VecScatterFormat *from,VecScatterFormat *to)
2005: {
2006: VecScatter_Common* fromdata = (VecScatter_Common*)scatter->fromdata;
2007: VecScatter_Common* todata = (VecScatter_Common*)scatter->todata;
2010: *from = fromdata->format;
2011: *to = todata->format;
2012: return(0);
2013: }
2016: /*
2017: VecScatterIsSequential_Private - Returns true if the scatter is sequential.
2019: scatter - The scatter.
2020: flag - Upon exit flag is true if the scatter is of type VecScatter_Seq_General
2021: or VecScatter_Seq_Stride; otherwise flag is false.
2022: */
2023: PetscErrorCode VecScatterIsSequential_Private(VecScatter_Common *scatter,PetscBool *flag)
2024: {
2025: VecScatterFormat scatterType = scatter->format;
2028: if (scatterType == VEC_SCATTER_SEQ_GENERAL || scatterType == VEC_SCATTER_SEQ_STRIDE) {
2029: *flag = PETSC_TRUE;
2030: } else {
2031: *flag = PETSC_FALSE;
2032: }
2033: return(0);
2034: }
2036: #if defined(PETSC_HAVE_VECCUDA)
2038: /*@C
2039: VecScatterInitializeForGPU - Initializes a generalized scatter from one vector
2040: to another for GPU based computation.
2042: Input Parameters:
2043: + inctx - scatter context generated by VecScatterCreate()
2044: . x - the vector from which we scatter
2045: - mode - the scattering mode, usually SCATTER_FORWARD. The available modes are:
2046: SCATTER_FORWARD or SCATTER_REVERSE
2048: Level: intermediate
2050: Notes:
2051: Effectively, this function creates all the necessary indexing buffers and work
2052: vectors needed to move data only those data points in a vector which need to
2053: be communicated across ranks. This is done at the first time this function is
2054: called. Currently, this only used in the context of the parallel SpMV call in
2055: MatMult_MPIAIJCUSPARSE.
2057: This function is executed before the call to MatMult. This enables the memory
2058: transfers to be overlapped with the MatMult SpMV kernel call.
2060: .seealso: VecScatterFinalizeForGPU(), VecScatterCreate(), VecScatterEnd()
2061: @*/
2062: PETSC_EXTERN PetscErrorCode VecScatterInitializeForGPU(VecScatter inctx,Vec x,ScatterMode mode)
2063: {
2065: VecScatter_MPI_General *to,*from;
2066: PetscErrorCode ierr;
2067: PetscInt i,*indices,*sstartsSends,*sstartsRecvs,nrecvs,nsends,bs;
2068: PetscBool isSeq1,isSeq2;
2071: VecScatterIsSequential_Private((VecScatter_Common*)inctx->fromdata,&isSeq1);
2072: VecScatterIsSequential_Private((VecScatter_Common*)inctx->todata,&isSeq2);
2073: if (isSeq1 || isSeq2) {
2074: return(0);
2075: }
2076: if (mode & SCATTER_REVERSE) {
2077: to = (VecScatter_MPI_General*)inctx->fromdata;
2078: from = (VecScatter_MPI_General*)inctx->todata;
2079: } else {
2080: to = (VecScatter_MPI_General*)inctx->todata;
2081: from = (VecScatter_MPI_General*)inctx->fromdata;
2082: }
2083: bs = to->bs;
2084: nrecvs = from->n;
2085: nsends = to->n;
2086: indices = to->indices;
2087: sstartsSends = to->starts;
2088: sstartsRecvs = from->starts;
2089: if (x->valid_GPU_array != PETSC_OFFLOAD_UNALLOCATED && (nsends>0 || nrecvs>0)) {
2090: if (!inctx->spptr) {
2091: PetscInt k,*tindicesSends,*sindicesSends,*tindicesRecvs,*sindicesRecvs;
2092: PetscInt ns = sstartsSends[nsends],nr = sstartsRecvs[nrecvs];
2093: /* Here we create indices for both the senders and receivers. */
2094: PetscMalloc1(ns,&tindicesSends);
2095: PetscMalloc1(nr,&tindicesRecvs);
2097: PetscMemcpy(tindicesSends,indices,ns*sizeof(PetscInt));
2098: PetscMemcpy(tindicesRecvs,from->indices,nr*sizeof(PetscInt));
2100: PetscSortRemoveDupsInt(&ns,tindicesSends);
2101: PetscSortRemoveDupsInt(&nr,tindicesRecvs);
2103: PetscMalloc1(bs*ns,&sindicesSends);
2104: PetscMalloc1(from->bs*nr,&sindicesRecvs);
2106: /* sender indices */
2107: for (i=0; i<ns; i++) {
2108: for (k=0; k<bs; k++) sindicesSends[i*bs+k] = tindicesSends[i]+k;
2109: }
2110: PetscFree(tindicesSends);
2112: /* receiver indices */
2113: for (i=0; i<nr; i++) {
2114: for (k=0; k<from->bs; k++) sindicesRecvs[i*from->bs+k] = tindicesRecvs[i]+k;
2115: }
2116: PetscFree(tindicesRecvs);
2118: /* create GPU indices, work vectors, ... */
2119: VecScatterCUDAIndicesCreate_PtoP(ns*bs,sindicesSends,nr*from->bs,sindicesRecvs,(PetscCUDAIndices*)&inctx->spptr);
2120: PetscFree(sindicesSends);
2121: PetscFree(sindicesRecvs);
2122: }
2123: }
2124: return(0);
2125: }
2127: /*@C
2128: VecScatterFinalizeForGPU - Finalizes a generalized scatter from one vector to
2129: another for GPU based computation.
2131: Input Parameter:
2132: + inctx - scatter context generated by VecScatterCreate()
2134: Level: intermediate
2136: Notes:
2137: Effectively, this function resets the temporary buffer flags. Currently, this
2138: only used in the context of the parallel SpMV call in in MatMult_MPIAIJCUDA
2139: or MatMult_MPIAIJCUDAARSE. Once the MatMultAdd is finished, the GPU temporary
2140: buffers used for messaging are no longer valid.
2142: .seealso: VecScatterInitializeForGPU(), VecScatterCreate(), VecScatterEnd()
2143: @*/
2144: PETSC_EXTERN PetscErrorCode VecScatterFinalizeForGPU(VecScatter inctx)
2145: {
2147: return(0);
2148: }
2150: #endif