Actual source code: vscat.c
petsc-3.8.4 2018-03-24
2: /*
3: Code for creating scatters between vectors. This file
4: includes the code for scattering between sequential vectors and
5: some special cases for parallel scatters.
6: */
8: #include <petsc/private/isimpl.h>
9: #include <petsc/private/vecimpl.h>
11: #if defined(PETSC_HAVE_CUSP)
12: PETSC_INTERN PetscErrorCode VecScatterCUSPIndicesCreate_PtoP(PetscInt,PetscInt*,PetscInt,PetscInt*,PetscCUSPIndices*);
13: PETSC_INTERN PetscErrorCode VecScatterCUSPIndicesCreate_StoS(PetscInt,PetscInt,PetscInt,PetscInt,PetscInt,PetscInt*,PetscInt*,PetscCUSPIndices*);
14: PETSC_INTERN PetscErrorCode VecScatterCUSPIndicesDestroy(PetscCUSPIndices*);
15: PETSC_INTERN PetscErrorCode VecScatterCUSP_StoS(Vec,Vec,PetscCUSPIndices,InsertMode,ScatterMode);
16: #elif defined(PETSC_HAVE_VECCUDA)
17: PETSC_INTERN PetscErrorCode VecScatterCUDAIndicesCreate_PtoP(PetscInt,PetscInt*,PetscInt,PetscInt*,PetscCUDAIndices*);
18: PETSC_INTERN PetscErrorCode VecScatterCUDAIndicesCreate_StoS(PetscInt,PetscInt,PetscInt,PetscInt,PetscInt,PetscInt*,PetscInt*,PetscCUDAIndices*);
19: PETSC_INTERN PetscErrorCode VecScatterCUDAIndicesDestroy(PetscCUDAIndices*);
20: PETSC_INTERN PetscErrorCode VecScatterCUDA_StoS(Vec,Vec,PetscCUDAIndices,InsertMode,ScatterMode);
21: #endif
23: /* Logging support */
24: PetscClassId VEC_SCATTER_CLASSID;
26: #if defined(PETSC_USE_DEBUG)
27: /*
28: Checks if any indices are less than zero and generates an error
29: */
30: static PetscErrorCode VecScatterCheckIndices_Private(PetscInt nmax,PetscInt n,const PetscInt *idx)
31: {
32: PetscInt i;
35: for (i=0; i<n; i++) {
36: if (idx[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative index %D at %D location",idx[i],i);
37: if (idx[i] >= nmax) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index %D at %D location greater than max %D",idx[i],i,nmax);
38: }
39: return(0);
40: }
41: #endif
43: /*
44: This is special scatter code for when the entire parallel vector is copied to each processor.
46: This code was written by Cameron Cooper, Occidental College, Fall 1995,
47: will working at ANL as a SERS student.
48: */
49: PetscErrorCode VecScatterBegin_MPI_ToAll(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
50: {
52: PetscInt yy_n,xx_n;
53: PetscScalar *xv,*yv;
56: VecGetLocalSize(y,&yy_n);
57: VecGetLocalSize(x,&xx_n);
58: VecGetArrayPair(x,y,&xv,&yv);
60: if (mode & SCATTER_REVERSE) {
61: PetscScalar *xvt,*xvt2;
62: VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
63: PetscInt i;
64: PetscMPIInt *disply = scat->displx;
66: if (addv == INSERT_VALUES) {
67: PetscInt rstart,rend;
68: /*
69: copy the correct part of the local vector into the local storage of
70: the MPI one. Note: this operation only makes sense if all the local
71: vectors have the same values
72: */
73: VecGetOwnershipRange(y,&rstart,&rend);
74: PetscMemcpy(yv,xv+rstart,yy_n*sizeof(PetscScalar));
75: } else {
76: MPI_Comm comm;
77: PetscMPIInt rank;
78: PetscObjectGetComm((PetscObject)y,&comm);
79: MPI_Comm_rank(comm,&rank);
80: if (scat->work1) xvt = scat->work1;
81: else {
82: PetscMalloc1(xx_n,&xvt);
83: scat->work1 = xvt;
84: }
85: if (!rank) { /* I am the zeroth processor, values are accumulated here */
86: if (scat->work2) xvt2 = scat->work2;
87: else {
88: PetscMalloc1(xx_n,&xvt2);
89: scat->work2 = xvt2;
90: }
91: MPI_Gatherv(yv,yy_n,MPIU_SCALAR,xvt2,scat->count,disply,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
92: MPI_Reduce(xv,xvt,xx_n,MPIU_SCALAR,MPIU_SUM,0,PetscObjectComm((PetscObject)ctx));
93: if (addv == ADD_VALUES) {
94: for (i=0; i<xx_n; i++) xvt[i] += xvt2[i];
95: #if !defined(PETSC_USE_COMPLEX)
96: } else if (addv == MAX_VALUES) {
97: for (i=0; i<xx_n; i++) xvt[i] = PetscMax(xvt[i],xvt2[i]);
98: #endif
99: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
100: MPI_Scatterv(xvt,scat->count,disply,MPIU_SCALAR,yv,yy_n,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
101: } else {
102: MPI_Gatherv(yv,yy_n,MPIU_SCALAR,0, 0,0,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
103: MPI_Reduce(xv,xvt,xx_n,MPIU_SCALAR,MPIU_SUM,0,PetscObjectComm((PetscObject)ctx));
104: MPI_Scatterv(0,scat->count,disply,MPIU_SCALAR,yv,yy_n,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
105: }
106: }
107: } else {
108: PetscScalar *yvt;
109: VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
110: PetscInt i;
111: PetscMPIInt *displx = scat->displx;
113: if (addv == INSERT_VALUES) {
114: MPI_Allgatherv(xv,xx_n,MPIU_SCALAR,yv,scat->count,displx,MPIU_SCALAR,PetscObjectComm((PetscObject)ctx));
115: } else {
116: if (scat->work1) yvt = scat->work1;
117: else {
118: PetscMalloc1(yy_n,&yvt);
119: scat->work1 = yvt;
120: }
121: MPI_Allgatherv(xv,xx_n,MPIU_SCALAR,yvt,scat->count,displx,MPIU_SCALAR,PetscObjectComm((PetscObject)ctx));
122: if (addv == ADD_VALUES) {
123: for (i=0; i<yy_n; i++) yv[i] += yvt[i];
124: #if !defined(PETSC_USE_COMPLEX)
125: } else if (addv == MAX_VALUES) {
126: for (i=0; i<yy_n; i++) yv[i] = PetscMax(yv[i],yvt[i]);
127: #endif
128: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
129: }
130: }
131: VecRestoreArrayPair(x,y,&xv,&yv);
132: return(0);
133: }
135: PetscErrorCode VecScatterView_MPI_ToAll(VecScatter in,PetscViewer viewer)
136: {
138: PetscBool isascii;
141: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
142: if (isascii) {
143: PetscViewerASCIIPrintf(viewer,"Entire parallel vector is copied to each process\n");
144: }
145: return(0);
146: }
148: /*
149: This is special scatter code for when the entire parallel vector is copied to processor 0.
151: */
152: PetscErrorCode VecScatterBegin_MPI_ToOne(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
153: {
154: PetscErrorCode ierr;
155: PetscMPIInt rank;
156: PetscInt yy_n,xx_n;
157: PetscScalar *yv;
158: const PetscScalar *xv;
159: MPI_Comm comm;
162: VecGetLocalSize(y,&yy_n);
163: VecGetLocalSize(x,&xx_n);
164: VecGetArrayRead(x,&xv);
165: VecGetArray(y,&yv);
167: PetscObjectGetComm((PetscObject)x,&comm);
168: MPI_Comm_rank(comm,&rank);
170: /* -------- Reverse scatter; spread from processor 0 to other processors */
171: if (mode & SCATTER_REVERSE) {
172: PetscScalar *yvt;
173: VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
174: PetscInt i;
175: PetscMPIInt *disply = scat->displx;
177: if (addv == INSERT_VALUES) {
178: MPI_Scatterv((PetscScalar*)xv,scat->count,disply,MPIU_SCALAR,yv,yy_n,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
179: } else {
180: if (scat->work2) yvt = scat->work2;
181: else {
182: PetscMalloc1(xx_n,&yvt);
183: scat->work2 = yvt;
184: }
185: MPI_Scatterv((PetscScalar*)xv,scat->count,disply,MPIU_SCALAR,yvt,yy_n,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
186: if (addv == ADD_VALUES) {
187: for (i=0; i<yy_n; i++) yv[i] += yvt[i];
188: #if !defined(PETSC_USE_COMPLEX)
189: } else if (addv == MAX_VALUES) {
190: for (i=0; i<yy_n; i++) yv[i] = PetscMax(yv[i],yvt[i]);
191: #endif
192: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
193: }
194: /* --------- Forward scatter; gather all values onto processor 0 */
195: } else {
196: PetscScalar *yvt = 0;
197: VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
198: PetscInt i;
199: PetscMPIInt *displx = scat->displx;
201: if (addv == INSERT_VALUES) {
202: MPI_Gatherv((PetscScalar*)xv,xx_n,MPIU_SCALAR,yv,scat->count,displx,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
203: } else {
204: if (!rank) {
205: if (scat->work1) yvt = scat->work1;
206: else {
207: PetscMalloc1(yy_n,&yvt);
208: scat->work1 = yvt;
209: }
210: }
211: MPI_Gatherv((PetscScalar*)xv,xx_n,MPIU_SCALAR,yvt,scat->count,displx,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
212: if (!rank) {
213: if (addv == ADD_VALUES) {
214: for (i=0; i<yy_n; i++) yv[i] += yvt[i];
215: #if !defined(PETSC_USE_COMPLEX)
216: } else if (addv == MAX_VALUES) {
217: for (i=0; i<yy_n; i++) yv[i] = PetscMax(yv[i],yvt[i]);
218: #endif
219: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
220: }
221: }
222: }
223: VecRestoreArrayRead(x,&xv);
224: VecRestoreArray(y,&yv);
225: return(0);
226: }
228: /*
229: The follow to are used for both VecScatterBegin_MPI_ToAll() and VecScatterBegin_MPI_ToOne()
230: */
231: PetscErrorCode VecScatterDestroy_MPI_ToAll(VecScatter ctx)
232: {
233: VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
234: PetscErrorCode ierr;
237: PetscFree(scat->work1);
238: PetscFree(scat->work2);
239: PetscFree3(ctx->todata,scat->count,scat->displx);
240: return(0);
241: }
243: PetscErrorCode VecScatterDestroy_SGToSG(VecScatter ctx)
244: {
248: PetscFree2(((VecScatter_Seq_General*)ctx->todata)->vslots,((VecScatter_Seq_General*)ctx->fromdata)->vslots);
249: PetscFree2(ctx->todata,ctx->fromdata);
250: return(0);
251: }
253: PetscErrorCode VecScatterDestroy_SGToSS(VecScatter ctx)
254: {
258: PetscFree(((VecScatter_Seq_General*)ctx->fromdata)->vslots);
259: PetscFree2(ctx->todata,ctx->fromdata);
260: return(0);
261: }
263: PetscErrorCode VecScatterDestroy_SSToSG(VecScatter ctx)
264: {
268: PetscFree(((VecScatter_Seq_General*)ctx->todata)->vslots);
269: PetscFree2(ctx->todata,ctx->fromdata);
270: return(0);
271: }
273: PetscErrorCode VecScatterDestroy_SSToSS(VecScatter ctx)
274: {
278: PetscFree2(ctx->todata,ctx->fromdata);
279: return(0);
280: }
282: /* -------------------------------------------------------------------------*/
283: PetscErrorCode VecScatterCopy_MPI_ToAll(VecScatter in,VecScatter out)
284: {
285: VecScatter_MPI_ToAll *in_to = (VecScatter_MPI_ToAll*)in->todata,*sto;
286: PetscErrorCode ierr;
287: PetscMPIInt size,*count,*displx;
288: PetscInt i;
291: out->ops->begin = in->ops->begin;
292: out->ops->end = in->ops->end;
293: out->ops->copy = in->ops->copy;
294: out->ops->destroy = in->ops->destroy;
295: out->ops->view = in->ops->view;
297: MPI_Comm_size(PetscObjectComm((PetscObject)out),&size);
298: PetscMalloc3(1,&sto,size,&count,size,&displx);
299: sto->type = in_to->type;
300: sto->count = count;
301: sto->displx = displx;
302: for (i=0; i<size; i++) {
303: sto->count[i] = in_to->count[i];
304: sto->displx[i] = in_to->displx[i];
305: }
306: sto->work1 = 0;
307: sto->work2 = 0;
308: out->todata = (void*)sto;
309: out->fromdata = (void*)0;
310: return(0);
311: }
313: /* --------------------------------------------------------------------------------------*/
314: /*
315: Scatter: sequential general to sequential general
316: */
317: PetscErrorCode VecScatterBegin_SGToSG(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
318: {
319: VecScatter_Seq_General *gen_to = (VecScatter_Seq_General*)ctx->todata;
320: VecScatter_Seq_General *gen_from = (VecScatter_Seq_General*)ctx->fromdata;
321: PetscErrorCode ierr;
322: PetscInt i,n = gen_from->n,*fslots,*tslots;
323: PetscScalar *xv,*yv;
326: #if defined(PETSC_HAVE_CUSP)
327: if (x->valid_GPU_array == PETSC_CUSP_GPU) {
328: /* create the scatter indices if not done already */
329: if (!ctx->spptr) {
330: PetscInt tofirst = 0,tostep = 0,fromfirst = 0,fromstep = 0;
331: fslots = gen_from->vslots;
332: tslots = gen_to->vslots;
333: VecScatterCUSPIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUSPIndices*)&(ctx->spptr));
334: }
335: /* next do the scatter */
336: VecScatterCUSP_StoS(x,y,(PetscCUSPIndices)ctx->spptr,addv,mode);
337: return(0);
338: }
339: #elif defined(PETSC_HAVE_VECCUDA)
340: if (x->valid_GPU_array == PETSC_CUDA_GPU) {
341: /* create the scatter indices if not done already */
342: if (!ctx->spptr) {
343: PetscInt tofirst = 0,tostep = 0,fromfirst = 0,fromstep = 0;
344: fslots = gen_from->vslots;
345: tslots = gen_to->vslots;
346: VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
347: }
348: /* next do the scatter */
349: VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
350: return(0);
351: }
352: #endif
354: VecGetArrayPair(x,y,&xv,&yv);
355: if (mode & SCATTER_REVERSE) {
356: gen_to = (VecScatter_Seq_General*)ctx->fromdata;
357: gen_from = (VecScatter_Seq_General*)ctx->todata;
358: }
359: fslots = gen_from->vslots;
360: tslots = gen_to->vslots;
362: if (addv == INSERT_VALUES) {
363: for (i=0; i<n; i++) yv[tslots[i]] = xv[fslots[i]];
364: } else if (addv == ADD_VALUES) {
365: for (i=0; i<n; i++) yv[tslots[i]] += xv[fslots[i]];
366: #if !defined(PETSC_USE_COMPLEX)
367: } else if (addv == MAX_VALUES) {
368: for (i=0; i<n; i++) yv[tslots[i]] = PetscMax(yv[tslots[i]],xv[fslots[i]]);
369: #endif
370: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
371: VecRestoreArrayPair(x,y,&xv,&yv);
372: return(0);
373: }
375: /*
376: Scatter: sequential general to sequential stride 1
377: */
378: PetscErrorCode VecScatterBegin_SGToSS_Stride1(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
379: {
380: VecScatter_Seq_Stride *gen_to = (VecScatter_Seq_Stride*)ctx->todata;
381: VecScatter_Seq_General *gen_from = (VecScatter_Seq_General*)ctx->fromdata;
382: PetscInt i,n = gen_from->n,*fslots = gen_from->vslots;
383: PetscErrorCode ierr;
384: PetscInt first = gen_to->first;
385: PetscScalar *xv,*yv;
388: #if defined(PETSC_HAVE_CUSP)
389: if (x->valid_GPU_array == PETSC_CUSP_GPU) {
390: /* create the scatter indices if not done already */
391: if (!ctx->spptr) {
392: PetscInt tofirst = first,tostep = 1,fromfirst = 0,fromstep = 0;
393: PetscInt *tslots = 0;
394: VecScatterCUSPIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUSPIndices*)&(ctx->spptr));
395: }
396: /* next do the scatter */
397: VecScatterCUSP_StoS(x,y,(PetscCUSPIndices)ctx->spptr,addv,mode);
398: return(0);
399: }
400: #elif defined(PETSC_HAVE_VECCUDA)
401: if (x->valid_GPU_array == PETSC_CUDA_GPU) {
402: /* create the scatter indices if not done already */
403: if (!ctx->spptr) {
404: PetscInt tofirst = first,tostep = 1,fromfirst = 0,fromstep = 0;
405: PetscInt *tslots = 0;
406: VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
407: }
408: /* next do the scatter */
409: VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
410: return(0);
411: }
412: #endif
414: VecGetArrayPair(x,y,&xv,&yv);
415: if (mode & SCATTER_REVERSE) {
416: xv += first;
417: if (addv == INSERT_VALUES) {
418: for (i=0; i<n; i++) yv[fslots[i]] = xv[i];
419: } else if (addv == ADD_VALUES) {
420: for (i=0; i<n; i++) yv[fslots[i]] += xv[i];
421: #if !defined(PETSC_USE_COMPLEX)
422: } else if (addv == MAX_VALUES) {
423: for (i=0; i<n; i++) yv[fslots[i]] = PetscMax(yv[fslots[i]],xv[i]);
424: #endif
425: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
426: } else {
427: yv += first;
428: if (addv == INSERT_VALUES) {
429: for (i=0; i<n; i++) yv[i] = xv[fslots[i]];
430: } else if (addv == ADD_VALUES) {
431: for (i=0; i<n; i++) yv[i] += xv[fslots[i]];
432: #if !defined(PETSC_USE_COMPLEX)
433: } else if (addv == MAX_VALUES) {
434: for (i=0; i<n; i++) yv[i] = PetscMax(yv[i],xv[fslots[i]]);
435: #endif
436: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
437: }
438: VecRestoreArrayPair(x,y,&xv,&yv);
439: return(0);
440: }
442: /*
443: Scatter: sequential general to sequential stride
444: */
445: PetscErrorCode VecScatterBegin_SGToSS(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
446: {
447: VecScatter_Seq_Stride *gen_to = (VecScatter_Seq_Stride*)ctx->todata;
448: VecScatter_Seq_General *gen_from = (VecScatter_Seq_General*)ctx->fromdata;
449: PetscInt i,n = gen_from->n,*fslots = gen_from->vslots;
450: PetscErrorCode ierr;
451: PetscInt first = gen_to->first,step = gen_to->step;
452: PetscScalar *xv,*yv;
455: #if defined(PETSC_HAVE_CUSP)
456: if (x->valid_GPU_array == PETSC_CUSP_GPU) {
457: /* create the scatter indices if not done already */
458: if (!ctx->spptr) {
459: PetscInt tofirst = first,tostep = step,fromfirst = 0,fromstep = 0;
460: PetscInt * tslots = 0;
461: VecScatterCUSPIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUSPIndices*)&(ctx->spptr));
462: }
463: /* next do the scatter */
464: VecScatterCUSP_StoS(x,y,(PetscCUSPIndices)ctx->spptr,addv,mode);
465: return(0);
466: }
467: #elif defined(PETSC_HAVE_VECCUDA)
468: if (x->valid_GPU_array == PETSC_CUDA_GPU) {
469: /* create the scatter indices if not done already */
470: if (!ctx->spptr) {
471: PetscInt tofirst = first,tostep = step,fromfirst = 0,fromstep = 0;
472: PetscInt * tslots = 0;
473: VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
474: }
475: /* next do the scatter */
476: VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
477: return(0);
478: }
479: #endif
481: VecGetArrayPair(x,y,&xv,&yv);
482: if (mode & SCATTER_REVERSE) {
483: if (addv == INSERT_VALUES) {
484: for (i=0; i<n; i++) yv[fslots[i]] = xv[first + i*step];
485: } else if (addv == ADD_VALUES) {
486: for (i=0; i<n; i++) yv[fslots[i]] += xv[first + i*step];
487: #if !defined(PETSC_USE_COMPLEX)
488: } else if (addv == MAX_VALUES) {
489: for (i=0; i<n; i++) yv[fslots[i]] = PetscMax(yv[fslots[i]],xv[first + i*step]);
490: #endif
491: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
492: } else {
493: if (addv == INSERT_VALUES) {
494: for (i=0; i<n; i++) yv[first + i*step] = xv[fslots[i]];
495: } else if (addv == ADD_VALUES) {
496: for (i=0; i<n; i++) yv[first + i*step] += xv[fslots[i]];
497: #if !defined(PETSC_USE_COMPLEX)
498: } else if (addv == MAX_VALUES) {
499: for (i=0; i<n; i++) yv[first + i*step] = PetscMax(yv[first + i*step],xv[fslots[i]]);
500: #endif
501: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
502: }
503: VecRestoreArrayPair(x,y,&xv,&yv);
504: return(0);
505: }
507: /*
508: Scatter: sequential stride 1 to sequential general
509: */
510: PetscErrorCode VecScatterBegin_SSToSG_Stride1(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
511: {
512: VecScatter_Seq_Stride *gen_from = (VecScatter_Seq_Stride*)ctx->fromdata;
513: VecScatter_Seq_General *gen_to = (VecScatter_Seq_General*)ctx->todata;
514: PetscInt i,n = gen_from->n,*tslots = gen_to->vslots;
515: PetscErrorCode ierr;
516: PetscInt first = gen_from->first;
517: PetscScalar *xv,*yv;
520: #if defined(PETSC_HAVE_CUSP)
521: if (x->valid_GPU_array == PETSC_CUSP_GPU) {
522: /* create the scatter indices if not done already */
523: if (!ctx->spptr) {
524: PetscInt tofirst = 0,tostep = 0,fromfirst = first,fromstep = 1;
525: PetscInt *fslots = 0;
526: VecScatterCUSPIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUSPIndices*)&(ctx->spptr));
527: }
528: /* next do the scatter */
529: VecScatterCUSP_StoS(x,y,(PetscCUSPIndices)ctx->spptr,addv,mode);
530: return(0);
531: }
532: #elif defined(PETSC_HAVE_VECCUDA)
533: if (x->valid_GPU_array == PETSC_CUDA_GPU) {
534: /* create the scatter indices if not done already */
535: if (!ctx->spptr) {
536: PetscInt tofirst = 0,tostep = 0,fromfirst = first,fromstep = 1;
537: PetscInt *fslots = 0;
538: VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
539: }
540: /* next do the scatter */
541: VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
542: return(0);
543: }
544: #endif
546: VecGetArrayPair(x,y,&xv,&yv);
547: if (mode & SCATTER_REVERSE) {
548: yv += first;
549: if (addv == INSERT_VALUES) {
550: for (i=0; i<n; i++) yv[i] = xv[tslots[i]];
551: } else if (addv == ADD_VALUES) {
552: for (i=0; i<n; i++) yv[i] += xv[tslots[i]];
553: #if !defined(PETSC_USE_COMPLEX)
554: } else if (addv == MAX_VALUES) {
555: for (i=0; i<n; i++) yv[i] = PetscMax(yv[i],xv[tslots[i]]);
556: #endif
557: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
558: } else {
559: xv += first;
560: if (addv == INSERT_VALUES) {
561: for (i=0; i<n; i++) yv[tslots[i]] = xv[i];
562: } else if (addv == ADD_VALUES) {
563: for (i=0; i<n; i++) yv[tslots[i]] += xv[i];
564: #if !defined(PETSC_USE_COMPLEX)
565: } else if (addv == MAX_VALUES) {
566: for (i=0; i<n; i++) yv[tslots[i]] = PetscMax(yv[tslots[i]],xv[i]);
567: #endif
568: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
569: }
570: VecRestoreArrayPair(x,y,&xv,&yv);
571: return(0);
572: }
574: /*
575: Scatter: sequential stride to sequential general
576: */
577: PetscErrorCode VecScatterBegin_SSToSG(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
578: {
579: VecScatter_Seq_Stride *gen_from = (VecScatter_Seq_Stride*)ctx->fromdata;
580: VecScatter_Seq_General *gen_to = (VecScatter_Seq_General*)ctx->todata;
581: PetscInt i,n = gen_from->n,*tslots = gen_to->vslots;
582: PetscErrorCode ierr;
583: PetscInt first = gen_from->first,step = gen_from->step;
584: PetscScalar *xv,*yv;
587: #if defined(PETSC_HAVE_CUSP)
588: if (x->valid_GPU_array == PETSC_CUSP_GPU) {
589: /* create the scatter indices if not done already */
590: if (!ctx->spptr) {
591: PetscInt tofirst = 0,tostep = 0,fromfirst = first,fromstep = step;
592: PetscInt *fslots = 0;
593: VecScatterCUSPIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUSPIndices*)&(ctx->spptr));
594: }
595: /* next do the scatter */
596: VecScatterCUSP_StoS(x,y,(PetscCUSPIndices)ctx->spptr,addv,mode);
597: return(0);
598: }
599: #elif defined(PETSC_HAVE_VECCUDA)
600: if (x->valid_GPU_array == PETSC_CUDA_GPU) {
601: /* create the scatter indices if not done already */
602: if (!ctx->spptr) {
603: PetscInt tofirst = 0,tostep = 0,fromfirst = first,fromstep = step;
604: PetscInt *fslots = 0;
605: VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
606: }
607: /* next do the scatter */
608: VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
609: return(0);
610: }
611: #endif
613: VecGetArrayPair(x,y,&xv,&yv);
614: if (mode & SCATTER_REVERSE) {
615: if (addv == INSERT_VALUES) {
616: for (i=0; i<n; i++) yv[first + i*step] = xv[tslots[i]];
617: } else if (addv == ADD_VALUES) {
618: for (i=0; i<n; i++) yv[first + i*step] += xv[tslots[i]];
619: #if !defined(PETSC_USE_COMPLEX)
620: } else if (addv == MAX_VALUES) {
621: for (i=0; i<n; i++) yv[first + i*step] = PetscMax(yv[first + i*step],xv[tslots[i]]);
622: #endif
623: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
624: } else {
625: if (addv == INSERT_VALUES) {
626: for (i=0; i<n; i++) yv[tslots[i]] = xv[first + i*step];
627: } else if (addv == ADD_VALUES) {
628: for (i=0; i<n; i++) yv[tslots[i]] += xv[first + i*step];
629: #if !defined(PETSC_USE_COMPLEX)
630: } else if (addv == MAX_VALUES) {
631: for (i=0; i<n; i++) yv[tslots[i]] = PetscMax(yv[tslots[i]],xv[first + i*step]);
632: #endif
633: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
634: }
635: VecRestoreArrayPair(x,y,&xv,&yv);
636: return(0);
637: }
639: PetscErrorCode VecScatterView_SSToSG(VecScatter in,PetscViewer viewer)
640: {
641: PetscErrorCode ierr;
642: VecScatter_Seq_Stride *in_from = (VecScatter_Seq_Stride*)in->fromdata;
643: VecScatter_Seq_General *in_to = (VecScatter_Seq_General*)in->todata;
644: PetscInt i;
645: PetscBool isascii;
648: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
649: if (isascii) {
650: PetscViewerASCIIPrintf(viewer,"Sequential stride to general scatter\n");
651: for (i=0; i<in_to->n; i++) {
652: PetscViewerASCIIPrintf(viewer,"%D to %D\n",in_from->first + in_from->step*i,in_to->vslots[i]);
653: }
654: }
655: return(0);
656: }
657: /*
658: Scatter: sequential stride to sequential stride
659: */
660: PetscErrorCode VecScatterBegin_SSToSS(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
661: {
662: VecScatter_Seq_Stride *gen_to = (VecScatter_Seq_Stride*)ctx->todata;
663: VecScatter_Seq_Stride *gen_from = (VecScatter_Seq_Stride*)ctx->fromdata;
664: PetscInt i,n = gen_from->n,to_first = gen_to->first,to_step = gen_to->step;
665: PetscErrorCode ierr;
666: PetscInt from_first = gen_from->first,from_step = gen_from->step;
667: PetscScalar *xv,*yv;
670: #if defined(PETSC_HAVE_CUSP)
671: if (x->valid_GPU_array == PETSC_CUSP_GPU) {
672: /* create the scatter indices if not done already */
673: if (!ctx->spptr) {
674: PetscInt *tslots = 0,*fslots = 0;
675: VecScatterCUSPIndicesCreate_StoS(n,to_first,from_first,to_step,from_step,tslots,fslots,(PetscCUSPIndices*)&(ctx->spptr));
676: }
677: /* next do the scatter */
678: VecScatterCUSP_StoS(x,y,(PetscCUSPIndices)ctx->spptr,addv,mode);
679: return(0);
680: }
681: #elif defined(PETSC_HAVE_VECCUDA)
682: if (x->valid_GPU_array == PETSC_CUDA_GPU) {
683: /* create the scatter indices if not done already */
684: if (!ctx->spptr) {
685: PetscInt *tslots = 0,*fslots = 0;
686: VecScatterCUDAIndicesCreate_StoS(n,to_first,from_first,to_step,from_step,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
687: }
688: /* next do the scatter */
689: VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
690: return(0);
691: }
692: #endif
694: VecGetArrayPair(x,y,&xv,&yv);
695: if (mode & SCATTER_REVERSE) {
696: from_first = gen_to->first;
697: to_first = gen_from->first;
698: from_step = gen_to->step;
699: to_step = gen_from->step;
700: }
702: if (addv == INSERT_VALUES) {
703: if (to_step == 1 && from_step == 1) {
704: PetscMemcpy(yv+to_first,xv+from_first,n*sizeof(PetscScalar));
705: } else {
706: for (i=0; i<n; i++) yv[to_first + i*to_step] = xv[from_first+i*from_step];
707: }
708: } else if (addv == ADD_VALUES) {
709: if (to_step == 1 && from_step == 1) {
710: yv += to_first; xv += from_first;
711: for (i=0; i<n; i++) yv[i] += xv[i];
712: } else {
713: for (i=0; i<n; i++) yv[to_first + i*to_step] += xv[from_first+i*from_step];
714: }
715: #if !defined(PETSC_USE_COMPLEX)
716: } else if (addv == MAX_VALUES) {
717: if (to_step == 1 && from_step == 1) {
718: yv += to_first; xv += from_first;
719: for (i=0; i<n; i++) yv[i] = PetscMax(yv[i],xv[i]);
720: } else {
721: for (i=0; i<n; i++) yv[to_first + i*to_step] = PetscMax(yv[to_first + i*to_step],xv[from_first+i*from_step]);
722: }
723: #endif
724: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
725: VecRestoreArrayPair(x,y,&xv,&yv);
726: return(0);
727: }
729: /* --------------------------------------------------------------------------*/
732: PetscErrorCode VecScatterCopy_SGToSG(VecScatter in,VecScatter out)
733: {
734: PetscErrorCode ierr;
735: VecScatter_Seq_General *in_to = (VecScatter_Seq_General*)in->todata,*out_to = NULL;
736: VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata,*out_from = NULL;
739: out->ops->begin = in->ops->begin;
740: out->ops->end = in->ops->end;
741: out->ops->copy = in->ops->copy;
742: out->ops->destroy = in->ops->destroy;
743: out->ops->view = in->ops->view;
745: PetscMalloc2(1,&out_to,1,&out_from);
746: PetscMalloc2(in_to->n,&out_to->vslots,in_from->n,&out_from->vslots);
747: out_to->n = in_to->n;
748: out_to->type = in_to->type;
749: out_to->nonmatching_computed = PETSC_FALSE;
750: out_to->slots_nonmatching = 0;
751: out_to->is_copy = PETSC_FALSE;
752: PetscMemcpy(out_to->vslots,in_to->vslots,(out_to->n)*sizeof(PetscInt));
754: out_from->n = in_from->n;
755: out_from->type = in_from->type;
756: out_from->nonmatching_computed = PETSC_FALSE;
757: out_from->slots_nonmatching = 0;
758: out_from->is_copy = PETSC_FALSE;
759: PetscMemcpy(out_from->vslots,in_from->vslots,(out_from->n)*sizeof(PetscInt));
761: out->todata = (void*)out_to;
762: out->fromdata = (void*)out_from;
763: return(0);
764: }
766: PetscErrorCode VecScatterView_SGToSG(VecScatter in,PetscViewer viewer)
767: {
768: PetscErrorCode ierr;
769: VecScatter_Seq_General *in_to = (VecScatter_Seq_General*)in->todata;
770: VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata;
771: PetscInt i;
772: PetscBool isascii;
775: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
776: if (isascii) {
777: PetscViewerASCIIPrintf(viewer,"Sequential general scatter\n");
778: for (i=0; i<in_to->n; i++) {
779: PetscViewerASCIIPrintf(viewer,"%D to %D\n",in_from->vslots[i],in_to->vslots[i]);
780: }
781: }
782: return(0);
783: }
786: PetscErrorCode VecScatterCopy_SGToSS(VecScatter in,VecScatter out)
787: {
788: PetscErrorCode ierr;
789: VecScatter_Seq_Stride *in_to = (VecScatter_Seq_Stride*)in->todata,*out_to = NULL;
790: VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata,*out_from = NULL;
793: out->ops->begin = in->ops->begin;
794: out->ops->end = in->ops->end;
795: out->ops->copy = in->ops->copy;
796: out->ops->destroy = in->ops->destroy;
797: out->ops->view = in->ops->view;
799: PetscMalloc2(1,&out_to,1,&out_from);
800: PetscMalloc1(in_from->n,&out_from->vslots);
801: out_to->n = in_to->n;
802: out_to->type = in_to->type;
803: out_to->first = in_to->first;
804: out_to->step = in_to->step;
805: out_to->type = in_to->type;
807: out_from->n = in_from->n;
808: out_from->type = in_from->type;
809: out_from->nonmatching_computed = PETSC_FALSE;
810: out_from->slots_nonmatching = 0;
811: out_from->is_copy = PETSC_FALSE;
812: PetscMemcpy(out_from->vslots,in_from->vslots,(out_from->n)*sizeof(PetscInt));
814: out->todata = (void*)out_to;
815: out->fromdata = (void*)out_from;
816: return(0);
817: }
819: PetscErrorCode VecScatterView_SGToSS(VecScatter in,PetscViewer viewer)
820: {
821: PetscErrorCode ierr;
822: VecScatter_Seq_Stride *in_to = (VecScatter_Seq_Stride*)in->todata;
823: VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata;
824: PetscInt i;
825: PetscBool isascii;
828: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
829: if (isascii) {
830: PetscViewerASCIIPrintf(viewer,"Sequential general scatter to stride\n");
831: for (i=0; i<in_to->n; i++) {
832: PetscViewerASCIIPrintf(viewer,"%D to %D\n",in_from->vslots[i],in_to->first + in_to->step*i);
833: }
834: }
835: return(0);
836: }
838: /* --------------------------------------------------------------------------*/
839: /*
840: Scatter: parallel to sequential vector, sequential strides for both.
841: */
842: PetscErrorCode VecScatterCopy_SSToSS(VecScatter in,VecScatter out)
843: {
844: VecScatter_Seq_Stride *in_to = (VecScatter_Seq_Stride*)in->todata,*out_to = NULL;
845: VecScatter_Seq_Stride *in_from = (VecScatter_Seq_Stride*)in->fromdata,*out_from = NULL;
846: PetscErrorCode ierr;
849: out->ops->begin = in->ops->begin;
850: out->ops->end = in->ops->end;
851: out->ops->copy = in->ops->copy;
852: out->ops->destroy = in->ops->destroy;
853: out->ops->view = in->ops->view;
855: PetscMalloc2(1,&out_to,1,&out_from);
856: out_to->n = in_to->n;
857: out_to->type = in_to->type;
858: out_to->first = in_to->first;
859: out_to->step = in_to->step;
860: out_to->type = in_to->type;
861: out_from->n = in_from->n;
862: out_from->type = in_from->type;
863: out_from->first = in_from->first;
864: out_from->step = in_from->step;
865: out_from->type = in_from->type;
866: out->todata = (void*)out_to;
867: out->fromdata = (void*)out_from;
868: return(0);
869: }
871: PetscErrorCode VecScatterView_SSToSS(VecScatter in,PetscViewer viewer)
872: {
873: VecScatter_Seq_Stride *in_to = (VecScatter_Seq_Stride*)in->todata;
874: VecScatter_Seq_Stride *in_from = (VecScatter_Seq_Stride*)in->fromdata;
875: PetscErrorCode ierr;
876: PetscBool isascii;
879: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
880: if (isascii) {
881: PetscViewerASCIIPrintf(viewer,"Sequential stride count %D start %D step to start %D stride %D\n",in_to->n,in_to->first,in_to->step,in_from->first,in_from->step);
882: }
883: return(0);
884: }
887: extern PetscErrorCode VecScatterCreate_PtoS(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter);
888: extern PetscErrorCode VecScatterCreate_PtoP(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter);
889: extern PetscErrorCode VecScatterCreate_StoP(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter);
891: /* =======================================================================*/
892: #define VEC_SEQ_ID 0
893: #define VEC_MPI_ID 1
894: #define IS_GENERAL_ID 0
895: #define IS_STRIDE_ID 1
896: #define IS_BLOCK_ID 2
898: /*
899: Blocksizes we have optimized scatters for
900: */
902: #define VecScatterOptimizedBS(mbs) (2 <= mbs)
905: PetscErrorCode VecScatterCreateEmpty(MPI_Comm comm,VecScatter *newctx)
906: {
907: VecScatter ctx;
911: PetscHeaderCreate(ctx,VEC_SCATTER_CLASSID,"VecScatter","VecScatter","Vec",comm,VecScatterDestroy,VecScatterView);
912: ctx->inuse = PETSC_FALSE;
913: ctx->beginandendtogether = PETSC_FALSE;
914: PetscOptionsGetBool(NULL,NULL,"-vecscatter_merge",&ctx->beginandendtogether,NULL);
915: if (ctx->beginandendtogether) {
916: PetscInfo(ctx,"Using combined (merged) vector scatter begin and end\n");
917: }
918: ctx->packtogether = PETSC_FALSE;
919: PetscOptionsGetBool(NULL,NULL,"-vecscatter_packtogether",&ctx->packtogether,NULL);
920: if (ctx->packtogether) {
921: PetscInfo(ctx,"Pack all messages before sending\n");
922: }
923: *newctx = ctx;
924: return(0);
925: }
927: /*@C
928: VecScatterCreate - Creates a vector scatter context.
930: Collective on Vec
932: Input Parameters:
933: + xin - a vector that defines the shape (parallel data layout of the vector)
934: of vectors from which we scatter
935: . yin - a vector that defines the shape (parallel data layout of the vector)
936: of vectors to which we scatter
937: . ix - the indices of xin to scatter (if NULL scatters all values)
938: - iy - the indices of yin to hold results (if NULL fills entire vector yin)
940: Output Parameter:
941: . newctx - location to store the new scatter context
943: Options Database Keys: (uses regular MPI_Sends by default)
944: + -vecscatter_view - Prints detail of communications
945: . -vecscatter_view ::ascii_info - Print less details about communication
946: . -vecscatter_ssend - Uses MPI_Ssend_init() instead of MPI_Send_init()
947: . -vecscatter_rsend - use ready receiver mode for MPI sends
948: . -vecscatter_merge - VecScatterBegin() handles all of the communication, VecScatterEnd() is a nop
949: eliminates the chance for overlap of computation and communication
950: . -vecscatter_sendfirst - Posts sends before receives
951: . -vecscatter_packtogether - Pack all messages before sending, receive all messages before unpacking
952: . -vecscatter_alltoall - Uses MPI all to all communication for scatter
953: . -vecscatter_window - Use MPI 2 window operations to move data
954: . -vecscatter_nopack - Avoid packing to work vector when possible (if used with -vecscatter_alltoall then will use MPI_Alltoallw()
955: - -vecscatter_reproduce - insure that the order of the communications are done the same for each scatter, this under certain circumstances
956: will make the results of scatters deterministic when otherwise they are not (it may be slower also).
958: $
959: $ --When packing is used--
960: $ MPI Datatypes (no packing) sendfirst merge packtogether persistent*
961: $ _nopack _sendfirst _merge _packtogether -vecscatter_
962: $ ----------------------------------------------------------------------------------------------------------------------------
963: $ Message passing Send p X X X always
964: $ Ssend p X X X always _ssend
965: $ Rsend p nonsense X X always _rsend
966: $ AlltoAll v or w X nonsense always X nonsense _alltoall
967: $ MPI_Win p nonsense p p nonsense _window
968: $
969: $ Since persistent sends and receives require a constant memory address they can only be used when data is packed into the work vector
970: $ because the in and out array may be different for each call to VecScatterBegin/End().
971: $
972: $ p indicates possible, but not implemented. X indicates implemented
973: $
975: Level: intermediate
977: Notes:
978: In calls to VecScatterBegin() and VecScatterEnd() you can use different vectors than the xin and
979: yin you used above; BUT they must have the same parallel data layout, for example,
980: they could be obtained from VecDuplicate().
981: A VecScatter context CANNOT be used in two or more simultaneous scatters;
982: that is you cannot call a second VecScatterBegin() with the same scatter
983: context until the VecScatterEnd() has been called on the first VecScatterBegin().
984: In this case a separate VecScatter is needed for each concurrent scatter.
986: Developer Notes:
987: Currently the MPI_Send(), MPI_Ssend() and MPI_Rsend() all use PERSISTENT versions.
988: (this unfortunately requires that the same in and out arrays be used for each use, this
989: is why when not using MPI_alltoallw() we always need to pack the input into the work array before sending
990: and unpack upon receiving instead of using MPI datatypes to avoid the packing/unpacking).
992: Both ix and iy cannot be NULL at the same time.
994: Concepts: scatter^between vectors
995: Concepts: gather^between vectors
997: .seealso: VecScatterDestroy(), VecScatterCreateToAll(), VecScatterCreateToZero()
998: @*/
999: PetscErrorCode VecScatterCreate(Vec xin,IS ix,Vec yin,IS iy,VecScatter *newctx)
1000: {
1001: VecScatter ctx;
1002: PetscErrorCode ierr;
1003: PetscMPIInt size;
1004: PetscInt xin_type = VEC_SEQ_ID,yin_type = VEC_SEQ_ID,*range;
1005: PetscInt ix_type = IS_GENERAL_ID,iy_type = IS_GENERAL_ID;
1006: MPI_Comm comm,ycomm;
1007: PetscBool totalv,ixblock,iyblock,iystride,islocal,cando,flag;
1008: IS tix = 0,tiy = 0;
1011: if (!ix && !iy) SETERRQ(PetscObjectComm((PetscObject)xin),PETSC_ERR_SUP,"Cannot pass default in for both input and output indices");
1013: /*
1014: Determine if the vectors are "parallel", ie. it shares a comm with other processors, or
1015: sequential (it does not share a comm). The difference is that parallel vectors treat the
1016: index set as providing indices in the global parallel numbering of the vector, with
1017: sequential vectors treat the index set as providing indices in the local sequential
1018: numbering
1019: */
1020: PetscObjectGetComm((PetscObject)xin,&comm);
1021: MPI_Comm_size(comm,&size);
1022: if (size > 1) xin_type = VEC_MPI_ID;
1024: PetscObjectGetComm((PetscObject)yin,&ycomm);
1025: MPI_Comm_size(ycomm,&size);
1026: if (size > 1) {comm = ycomm; yin_type = VEC_MPI_ID;}
1028: /* generate the Scatter context */
1029: PetscHeaderCreate(ctx,VEC_SCATTER_CLASSID,"VecScatter","VecScatter","Vec",comm,VecScatterDestroy,VecScatterView);
1030: ctx->inuse = PETSC_FALSE;
1032: ctx->beginandendtogether = PETSC_FALSE;
1033: PetscOptionsGetBool(NULL,NULL,"-vecscatter_merge",&ctx->beginandendtogether,NULL);
1034: if (ctx->beginandendtogether) {
1035: PetscInfo(ctx,"Using combined (merged) vector scatter begin and end\n");
1036: }
1037: ctx->packtogether = PETSC_FALSE;
1038: PetscOptionsGetBool(NULL,NULL,"-vecscatter_packtogether",&ctx->packtogether,NULL);
1039: if (ctx->packtogether) {
1040: PetscInfo(ctx,"Pack all messages before sending\n");
1041: }
1043: VecGetLocalSize(xin,&ctx->from_n);
1044: VecGetLocalSize(yin,&ctx->to_n);
1046: /*
1047: if ix or iy is not included; assume just grabbing entire vector
1048: */
1049: if (!ix && xin_type == VEC_SEQ_ID) {
1050: ISCreateStride(comm,ctx->from_n,0,1,&ix);
1051: tix = ix;
1052: } else if (!ix && xin_type == VEC_MPI_ID) {
1053: if (yin_type == VEC_MPI_ID) {
1054: PetscInt ntmp, low;
1055: VecGetLocalSize(xin,&ntmp);
1056: VecGetOwnershipRange(xin,&low,NULL);
1057: ISCreateStride(comm,ntmp,low,1,&ix);
1058: } else {
1059: PetscInt Ntmp;
1060: VecGetSize(xin,&Ntmp);
1061: ISCreateStride(comm,Ntmp,0,1,&ix);
1062: }
1063: tix = ix;
1064: } else if (!ix) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ix not given, but not Seq or MPI vector");
1066: if (!iy && yin_type == VEC_SEQ_ID) {
1067: ISCreateStride(comm,ctx->to_n,0,1,&iy);
1068: tiy = iy;
1069: } else if (!iy && yin_type == VEC_MPI_ID) {
1070: if (xin_type == VEC_MPI_ID) {
1071: PetscInt ntmp, low;
1072: VecGetLocalSize(yin,&ntmp);
1073: VecGetOwnershipRange(yin,&low,NULL);
1074: ISCreateStride(comm,ntmp,low,1,&iy);
1075: } else {
1076: PetscInt Ntmp;
1077: VecGetSize(yin,&Ntmp);
1078: ISCreateStride(comm,Ntmp,0,1,&iy);
1079: }
1080: tiy = iy;
1081: } else if (!iy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"iy not given, but not Seq or MPI vector");
1083: /*
1084: Determine types of index sets
1085: */
1086: PetscObjectTypeCompare((PetscObject)ix,ISBLOCK,&flag);
1087: if (flag) ix_type = IS_BLOCK_ID;
1088: PetscObjectTypeCompare((PetscObject)iy,ISBLOCK,&flag);
1089: if (flag) iy_type = IS_BLOCK_ID;
1090: PetscObjectTypeCompare((PetscObject)ix,ISSTRIDE,&flag);
1091: if (flag) ix_type = IS_STRIDE_ID;
1092: PetscObjectTypeCompare((PetscObject)iy,ISSTRIDE,&flag);
1093: if (flag) iy_type = IS_STRIDE_ID;
1095: /* ===========================================================================================================
1096: Check for special cases
1097: ==========================================================================================================*/
1098: /* ---------------------------------------------------------------------------*/
1099: if (xin_type == VEC_SEQ_ID && yin_type == VEC_SEQ_ID) {
1100: if (ix_type == IS_GENERAL_ID && iy_type == IS_GENERAL_ID) {
1101: PetscInt nx,ny;
1102: const PetscInt *idx,*idy;
1103: VecScatter_Seq_General *to = NULL,*from = NULL;
1105: ISGetLocalSize(ix,&nx);
1106: ISGetLocalSize(iy,&ny);
1107: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1108: ISGetIndices(ix,&idx);
1109: ISGetIndices(iy,&idy);
1110: PetscMalloc2(1,&to,1,&from);
1111: PetscMalloc2(nx,&to->vslots,nx,&from->vslots);
1112: to->n = nx;
1113: #if defined(PETSC_USE_DEBUG)
1114: VecScatterCheckIndices_Private(ctx->to_n,ny,idy);
1115: #endif
1116: PetscMemcpy(to->vslots,idy,nx*sizeof(PetscInt));
1117: from->n = nx;
1118: #if defined(PETSC_USE_DEBUG)
1119: VecScatterCheckIndices_Private(ctx->from_n,nx,idx);
1120: #endif
1121: PetscMemcpy(from->vslots,idx,nx*sizeof(PetscInt));
1122: to->type = VEC_SCATTER_SEQ_GENERAL;
1123: from->type = VEC_SCATTER_SEQ_GENERAL;
1124: ctx->todata = (void*)to;
1125: ctx->fromdata = (void*)from;
1126: ctx->ops->begin = VecScatterBegin_SGToSG;
1127: ctx->ops->end = 0;
1128: ctx->ops->destroy = VecScatterDestroy_SGToSG;
1129: ctx->ops->copy = VecScatterCopy_SGToSG;
1130: ctx->ops->view = VecScatterView_SGToSG;
1131: PetscInfo(xin,"Special case: sequential vector general scatter\n");
1132: goto functionend;
1133: } else if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
1134: PetscInt nx,ny,to_first,to_step,from_first,from_step;
1135: VecScatter_Seq_Stride *from8 = NULL,*to8 = NULL;
1137: ISGetLocalSize(ix,&nx);
1138: ISGetLocalSize(iy,&ny);
1139: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1140: ISStrideGetInfo(iy,&to_first,&to_step);
1141: ISStrideGetInfo(ix,&from_first,&from_step);
1142: PetscMalloc2(1,&to8,1,&from8);
1143: to8->n = nx;
1144: to8->first = to_first;
1145: to8->step = to_step;
1146: from8->n = nx;
1147: from8->first = from_first;
1148: from8->step = from_step;
1149: to8->type = VEC_SCATTER_SEQ_STRIDE;
1150: from8->type = VEC_SCATTER_SEQ_STRIDE;
1151: ctx->todata = (void*)to8;
1152: ctx->fromdata = (void*)from8;
1153: ctx->ops->begin = VecScatterBegin_SSToSS;
1154: ctx->ops->end = 0;
1155: ctx->ops->destroy = VecScatterDestroy_SSToSS;
1156: ctx->ops->copy = VecScatterCopy_SSToSS;
1157: ctx->ops->view = VecScatterView_SSToSS;
1158: PetscInfo(xin,"Special case: sequential vector stride to stride\n");
1159: goto functionend;
1160: } else if (ix_type == IS_GENERAL_ID && iy_type == IS_STRIDE_ID) {
1161: PetscInt nx,ny,first,step;
1162: const PetscInt *idx;
1163: VecScatter_Seq_General *from9 = NULL;
1164: VecScatter_Seq_Stride *to9 = NULL;
1166: ISGetLocalSize(ix,&nx);
1167: ISGetIndices(ix,&idx);
1168: ISGetLocalSize(iy,&ny);
1169: ISStrideGetInfo(iy,&first,&step);
1170: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1171: PetscMalloc2(1,&to9,1,&from9);
1172: PetscMalloc1(nx,&from9->vslots);
1173: to9->n = nx;
1174: to9->first = first;
1175: to9->step = step;
1176: from9->n = nx;
1177: #if defined(PETSC_USE_DEBUG)
1178: VecScatterCheckIndices_Private(ctx->from_n,nx,idx);
1179: #endif
1180: PetscMemcpy(from9->vslots,idx,nx*sizeof(PetscInt));
1181: ctx->todata = (void*)to9; ctx->fromdata = (void*)from9;
1182: if (step == 1) ctx->ops->begin = VecScatterBegin_SGToSS_Stride1;
1183: else ctx->ops->begin = VecScatterBegin_SGToSS;
1184: ctx->ops->destroy = VecScatterDestroy_SGToSS;
1185: ctx->ops->end = 0;
1186: ctx->ops->copy = VecScatterCopy_SGToSS;
1187: ctx->ops->view = VecScatterView_SGToSS;
1188: to9->type = VEC_SCATTER_SEQ_STRIDE;
1189: from9->type = VEC_SCATTER_SEQ_GENERAL;
1190: PetscInfo(xin,"Special case: sequential vector general to stride\n");
1191: goto functionend;
1192: } else if (ix_type == IS_STRIDE_ID && iy_type == IS_GENERAL_ID) {
1193: PetscInt nx,ny,first,step;
1194: const PetscInt *idy;
1195: VecScatter_Seq_General *to10 = NULL;
1196: VecScatter_Seq_Stride *from10 = NULL;
1198: ISGetLocalSize(ix,&nx);
1199: ISGetIndices(iy,&idy);
1200: ISGetLocalSize(iy,&ny);
1201: ISStrideGetInfo(ix,&first,&step);
1202: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1203: PetscMalloc2(1,&to10,1,&from10);
1204: PetscMalloc1(nx,&to10->vslots);
1205: from10->n = nx;
1206: from10->first = first;
1207: from10->step = step;
1208: to10->n = nx;
1209: #if defined(PETSC_USE_DEBUG)
1210: VecScatterCheckIndices_Private(ctx->to_n,ny,idy);
1211: #endif
1212: PetscMemcpy(to10->vslots,idy,nx*sizeof(PetscInt));
1213: ctx->todata = (void*)to10;
1214: ctx->fromdata = (void*)from10;
1215: if (step == 1) ctx->ops->begin = VecScatterBegin_SSToSG_Stride1;
1216: else ctx->ops->begin = VecScatterBegin_SSToSG;
1217: ctx->ops->destroy = VecScatterDestroy_SSToSG;
1218: ctx->ops->end = 0;
1219: ctx->ops->copy = 0;
1220: ctx->ops->view = VecScatterView_SSToSG;
1221: to10->type = VEC_SCATTER_SEQ_GENERAL;
1222: from10->type = VEC_SCATTER_SEQ_STRIDE;
1223: PetscInfo(xin,"Special case: sequential vector stride to general\n");
1224: goto functionend;
1225: } else {
1226: PetscInt nx,ny;
1227: const PetscInt *idx,*idy;
1228: VecScatter_Seq_General *to11 = NULL,*from11 = NULL;
1229: PetscBool idnx,idny;
1231: ISGetLocalSize(ix,&nx);
1232: ISGetLocalSize(iy,&ny);
1233: if (nx != ny) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match, in %D out %D",nx,ny);
1235: ISIdentity(ix,&idnx);
1236: ISIdentity(iy,&idny);
1237: if (idnx && idny) {
1238: VecScatter_Seq_Stride *to13 = NULL,*from13 = NULL;
1239: PetscMalloc2(1,&to13,1,&from13);
1240: to13->n = nx;
1241: to13->first = 0;
1242: to13->step = 1;
1243: from13->n = nx;
1244: from13->first = 0;
1245: from13->step = 1;
1246: to13->type = VEC_SCATTER_SEQ_STRIDE;
1247: from13->type = VEC_SCATTER_SEQ_STRIDE;
1248: ctx->todata = (void*)to13;
1249: ctx->fromdata = (void*)from13;
1250: ctx->ops->begin = VecScatterBegin_SSToSS;
1251: ctx->ops->end = 0;
1252: ctx->ops->destroy = VecScatterDestroy_SSToSS;
1253: ctx->ops->copy = VecScatterCopy_SSToSS;
1254: ctx->ops->view = VecScatterView_SSToSS;
1255: PetscInfo(xin,"Special case: sequential copy\n");
1256: goto functionend;
1257: }
1259: ISGetIndices(iy,&idy);
1260: ISGetIndices(ix,&idx);
1261: PetscMalloc2(1,&to11,1,&from11);
1262: PetscMalloc2(nx,&to11->vslots,nx,&from11->vslots);
1263: to11->n = nx;
1264: #if defined(PETSC_USE_DEBUG)
1265: VecScatterCheckIndices_Private(ctx->to_n,ny,idy);
1266: #endif
1267: PetscMemcpy(to11->vslots,idy,nx*sizeof(PetscInt));
1268: from11->n = nx;
1269: #if defined(PETSC_USE_DEBUG)
1270: VecScatterCheckIndices_Private(ctx->from_n,nx,idx);
1271: #endif
1272: PetscMemcpy(from11->vslots,idx,nx*sizeof(PetscInt));
1273: to11->type = VEC_SCATTER_SEQ_GENERAL;
1274: from11->type = VEC_SCATTER_SEQ_GENERAL;
1275: ctx->todata = (void*)to11;
1276: ctx->fromdata = (void*)from11;
1277: ctx->ops->begin = VecScatterBegin_SGToSG;
1278: ctx->ops->end = 0;
1279: ctx->ops->destroy = VecScatterDestroy_SGToSG;
1280: ctx->ops->copy = VecScatterCopy_SGToSG;
1281: ctx->ops->view = VecScatterView_SGToSG;
1282: ISRestoreIndices(ix,&idx);
1283: ISRestoreIndices(iy,&idy);
1284: PetscInfo(xin,"Sequential vector scatter with block indices\n");
1285: goto functionend;
1286: }
1287: }
1288: /* ---------------------------------------------------------------------------*/
1289: if (xin_type == VEC_MPI_ID && yin_type == VEC_SEQ_ID) {
1291: /* ===========================================================================================================
1292: Check for special cases
1293: ==========================================================================================================*/
1294: islocal = PETSC_FALSE;
1295: /* special case extracting (subset of) local portion */
1296: if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
1297: PetscInt nx,ny,to_first,to_step,from_first,from_step;
1298: PetscInt start,end,min,max;
1299: VecScatter_Seq_Stride *from12 = NULL,*to12 = NULL;
1301: VecGetOwnershipRange(xin,&start,&end);
1302: ISGetLocalSize(ix,&nx);
1303: ISStrideGetInfo(ix,&from_first,&from_step);
1304: ISGetLocalSize(iy,&ny);
1305: ISStrideGetInfo(iy,&to_first,&to_step);
1306: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1307: ISGetMinMax(ix,&min,&max);
1308: if (min >= start && max < end) islocal = PETSC_TRUE;
1309: else islocal = PETSC_FALSE;
1310: /* cannot use MPIU_Allreduce() since this call matches with the MPI_Allreduce() in the else statement below */
1311: MPI_Allreduce(&islocal,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
1312: if (cando) {
1313: PetscMalloc2(1,&to12,1,&from12);
1314: to12->n = nx;
1315: to12->first = to_first;
1316: to12->step = to_step;
1317: from12->n = nx;
1318: from12->first = from_first-start;
1319: from12->step = from_step;
1320: to12->type = VEC_SCATTER_SEQ_STRIDE;
1321: from12->type = VEC_SCATTER_SEQ_STRIDE;
1322: ctx->todata = (void*)to12;
1323: ctx->fromdata = (void*)from12;
1324: ctx->ops->begin = VecScatterBegin_SSToSS;
1325: ctx->ops->end = 0;
1326: ctx->ops->destroy = VecScatterDestroy_SSToSS;
1327: ctx->ops->copy = VecScatterCopy_SSToSS;
1328: ctx->ops->view = VecScatterView_SSToSS;
1329: PetscInfo(xin,"Special case: processors only getting local values\n");
1330: goto functionend;
1331: }
1332: } else {
1333: MPI_Allreduce(&islocal,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
1334: }
1336: /* test for special case of all processors getting entire vector */
1337: /* contains check that PetscMPIInt can handle the sizes needed */
1338: totalv = PETSC_FALSE;
1339: if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
1340: PetscInt i,nx,ny,to_first,to_step,from_first,from_step,N;
1341: PetscMPIInt *count = NULL,*displx;
1342: VecScatter_MPI_ToAll *sto = NULL;
1344: ISGetLocalSize(ix,&nx);
1345: ISStrideGetInfo(ix,&from_first,&from_step);
1346: ISGetLocalSize(iy,&ny);
1347: ISStrideGetInfo(iy,&to_first,&to_step);
1348: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1349: VecGetSize(xin,&N);
1350: if (nx != N) totalv = PETSC_FALSE;
1351: else if (from_first == 0 && from_step == 1 && from_first == to_first && from_step == to_step) totalv = PETSC_TRUE;
1352: else totalv = PETSC_FALSE;
1353: /* cannot use MPIU_Allreduce() since this call matches with the MPI_Allreduce() in the else statement below */
1354: MPI_Allreduce(&totalv,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
1356: #if defined(PETSC_USE_64BIT_INDICES)
1357: if (cando && (yin->map->N < PETSC_MPI_INT_MAX)) {
1358: #else
1359: if (cando) {
1360: #endif
1361: MPI_Comm_size(PetscObjectComm((PetscObject)ctx),&size);
1362: PetscMalloc3(1,&sto,size,&count,size,&displx);
1363: range = xin->map->range;
1364: for (i=0; i<size; i++) {
1365: PetscMPIIntCast(range[i+1] - range[i],count+i);
1366: PetscMPIIntCast(range[i],displx+i);
1367: }
1368: sto->count = count;
1369: sto->displx = displx;
1370: sto->work1 = 0;
1371: sto->work2 = 0;
1372: sto->type = VEC_SCATTER_MPI_TOALL;
1373: ctx->todata = (void*)sto;
1374: ctx->fromdata = 0;
1375: ctx->ops->begin = VecScatterBegin_MPI_ToAll;
1376: ctx->ops->end = 0;
1377: ctx->ops->destroy = VecScatterDestroy_MPI_ToAll;
1378: ctx->ops->copy = VecScatterCopy_MPI_ToAll;
1379: ctx->ops->view = VecScatterView_MPI_ToAll;
1380: PetscInfo(xin,"Special case: all processors get entire parallel vector\n");
1381: goto functionend;
1382: }
1383: } else {
1384: MPI_Allreduce(&totalv,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
1385: }
1387: /* test for special case of processor 0 getting entire vector */
1388: /* contains check that PetscMPIInt can handle the sizes needed */
1389: totalv = PETSC_FALSE;
1390: if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
1391: PetscInt i,nx,ny,to_first,to_step,from_first,from_step,N;
1392: PetscMPIInt rank,*count = NULL,*displx;
1393: VecScatter_MPI_ToAll *sto = NULL;
1395: PetscObjectGetComm((PetscObject)xin,&comm);
1396: MPI_Comm_rank(comm,&rank);
1397: ISGetLocalSize(ix,&nx);
1398: ISStrideGetInfo(ix,&from_first,&from_step);
1399: ISGetLocalSize(iy,&ny);
1400: ISStrideGetInfo(iy,&to_first,&to_step);
1401: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1402: if (!rank) {
1403: VecGetSize(xin,&N);
1404: if (nx != N) totalv = PETSC_FALSE;
1405: else if (from_first == 0 && from_step == 1 &&
1406: from_first == to_first && from_step == to_step) totalv = PETSC_TRUE;
1407: else totalv = PETSC_FALSE;
1408: } else {
1409: if (!nx) totalv = PETSC_TRUE;
1410: else totalv = PETSC_FALSE;
1411: }
1412: /* cannot use MPIU_Allreduce() since this call matches with the MPI_Allreduce() in the else statement below */
1413: MPI_Allreduce(&totalv,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
1415: #if defined(PETSC_USE_64BIT_INDICES)
1416: if (cando && (yin->map->N < PETSC_MPI_INT_MAX)) {
1417: #else
1418: if (cando) {
1419: #endif
1420: MPI_Comm_size(PetscObjectComm((PetscObject)ctx),&size);
1421: PetscMalloc3(1,&sto,size,&count,size,&displx);
1422: range = xin->map->range;
1423: for (i=0; i<size; i++) {
1424: PetscMPIIntCast(range[i+1] - range[i],count+i);
1425: PetscMPIIntCast(range[i],displx+i);
1426: }
1427: sto->count = count;
1428: sto->displx = displx;
1429: sto->work1 = 0;
1430: sto->work2 = 0;
1431: sto->type = VEC_SCATTER_MPI_TOONE;
1432: ctx->todata = (void*)sto;
1433: ctx->fromdata = 0;
1434: ctx->ops->begin = VecScatterBegin_MPI_ToOne;
1435: ctx->ops->end = 0;
1436: ctx->ops->destroy = VecScatterDestroy_MPI_ToAll;
1437: ctx->ops->copy = VecScatterCopy_MPI_ToAll;
1438: ctx->ops->view = VecScatterView_MPI_ToAll;
1439: PetscInfo(xin,"Special case: processor zero gets entire parallel vector, rest get none\n");
1440: goto functionend;
1441: }
1442: } else {
1443: MPI_Allreduce(&totalv,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
1444: }
1446: PetscObjectTypeCompare((PetscObject)ix,ISBLOCK,&ixblock);
1447: PetscObjectTypeCompare((PetscObject)iy,ISBLOCK,&iyblock);
1448: PetscObjectTypeCompare((PetscObject)iy,ISSTRIDE,&iystride);
1449: if (ixblock) {
1450: /* special case block to block */
1451: if (iyblock) {
1452: PetscInt nx,ny,bsx,bsy;
1453: const PetscInt *idx,*idy;
1454: ISGetBlockSize(iy,&bsy);
1455: ISGetBlockSize(ix,&bsx);
1456: if (bsx == bsy && VecScatterOptimizedBS(bsx)) {
1457: ISBlockGetLocalSize(ix,&nx);
1458: ISBlockGetIndices(ix,&idx);
1459: ISBlockGetLocalSize(iy,&ny);
1460: ISBlockGetIndices(iy,&idy);
1461: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1462: VecScatterCreate_PtoS(nx,idx,ny,idy,xin,yin,bsx,ctx);
1463: ISBlockRestoreIndices(ix,&idx);
1464: ISBlockRestoreIndices(iy,&idy);
1465: PetscInfo(xin,"Special case: blocked indices\n");
1466: goto functionend;
1467: }
1468: /* special case block to stride */
1469: } else if (iystride) {
1470: PetscInt ystart,ystride,ysize,bsx;
1471: ISStrideGetInfo(iy,&ystart,&ystride);
1472: ISGetLocalSize(iy,&ysize);
1473: ISGetBlockSize(ix,&bsx);
1474: /* see if stride index set is equivalent to block index set */
1475: if (VecScatterOptimizedBS(bsx) && ((ystart % bsx) == 0) && (ystride == 1) && ((ysize % bsx) == 0)) {
1476: PetscInt nx,il,*idy;
1477: const PetscInt *idx;
1478: ISBlockGetLocalSize(ix,&nx);
1479: ISBlockGetIndices(ix,&idx);
1480: if (ysize != bsx*nx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1481: PetscMalloc1(nx,&idy);
1482: if (nx) {
1483: idy[0] = ystart/bsx;
1484: for (il=1; il<nx; il++) idy[il] = idy[il-1] + 1;
1485: }
1486: VecScatterCreate_PtoS(nx,idx,nx,idy,xin,yin,bsx,ctx);
1487: PetscFree(idy);
1488: ISBlockRestoreIndices(ix,&idx);
1489: PetscInfo(xin,"Special case: blocked indices to stride\n");
1490: goto functionend;
1491: }
1492: }
1493: }
1494: /* left over general case */
1495: {
1496: PetscInt nx,ny;
1497: const PetscInt *idx,*idy;
1498: ISGetLocalSize(ix,&nx);
1499: ISGetIndices(ix,&idx);
1500: ISGetLocalSize(iy,&ny);
1501: ISGetIndices(iy,&idy);
1502: if (nx != ny) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match (%d %d)",nx,ny);
1503: VecScatterCreate_PtoS(nx,idx,ny,idy,xin,yin,1,ctx);
1504: ISRestoreIndices(ix,&idx);
1505: ISRestoreIndices(iy,&idy);
1506: PetscInfo(xin,"General case: MPI to Seq\n");
1507: goto functionend;
1508: }
1509: }
1510: /* ---------------------------------------------------------------------------*/
1511: if (xin_type == VEC_SEQ_ID && yin_type == VEC_MPI_ID) {
1512: /* ===========================================================================================================
1513: Check for special cases
1514: ==========================================================================================================*/
1515: /* special case local copy portion */
1516: islocal = PETSC_FALSE;
1517: if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
1518: PetscInt nx,ny,to_first,to_step,from_step,start,end,from_first,min,max;
1519: VecScatter_Seq_Stride *from = NULL,*to = NULL;
1521: VecGetOwnershipRange(yin,&start,&end);
1522: ISGetLocalSize(ix,&nx);
1523: ISStrideGetInfo(ix,&from_first,&from_step);
1524: ISGetLocalSize(iy,&ny);
1525: ISStrideGetInfo(iy,&to_first,&to_step);
1526: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1527: ISGetMinMax(iy,&min,&max);
1528: if (min >= start && max < end) islocal = PETSC_TRUE;
1529: else islocal = PETSC_FALSE;
1530: /* cannot use MPIU_Allreduce() since this call matches with the MPI_Allreduce() in the else statement below */
1531: MPI_Allreduce(&islocal,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)yin));
1532: if (cando) {
1533: PetscMalloc2(1,&to,1,&from);
1534: to->n = nx;
1535: to->first = to_first-start;
1536: to->step = to_step;
1537: from->n = nx;
1538: from->first = from_first;
1539: from->step = from_step;
1540: to->type = VEC_SCATTER_SEQ_STRIDE;
1541: from->type = VEC_SCATTER_SEQ_STRIDE;
1542: ctx->todata = (void*)to;
1543: ctx->fromdata = (void*)from;
1544: ctx->ops->begin = VecScatterBegin_SSToSS;
1545: ctx->ops->end = 0;
1546: ctx->ops->destroy = VecScatterDestroy_SSToSS;
1547: ctx->ops->copy = VecScatterCopy_SSToSS;
1548: ctx->ops->view = VecScatterView_SSToSS;
1549: PetscInfo(xin,"Special case: sequential stride to MPI stride\n");
1550: goto functionend;
1551: }
1552: } else {
1553: MPI_Allreduce(&islocal,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)yin));
1554: }
1555: /* special case block to stride */
1556: if (ix_type == IS_BLOCK_ID && iy_type == IS_STRIDE_ID) {
1557: PetscInt ystart,ystride,ysize,bsx;
1558: ISStrideGetInfo(iy,&ystart,&ystride);
1559: ISGetLocalSize(iy,&ysize);
1560: ISGetBlockSize(ix,&bsx);
1561: /* see if stride index set is equivalent to block index set */
1562: if (VecScatterOptimizedBS(bsx) && ((ystart % bsx) == 0) && (ystride == 1) && ((ysize % bsx) == 0)) {
1563: PetscInt nx,il,*idy;
1564: const PetscInt *idx;
1565: ISBlockGetLocalSize(ix,&nx);
1566: ISBlockGetIndices(ix,&idx);
1567: if (ysize != bsx*nx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1568: PetscMalloc1(nx,&idy);
1569: if (nx) {
1570: idy[0] = ystart/bsx;
1571: for (il=1; il<nx; il++) idy[il] = idy[il-1] + 1;
1572: }
1573: VecScatterCreate_StoP(nx,idx,nx,idy,xin,yin,bsx,ctx);
1574: PetscFree(idy);
1575: ISBlockRestoreIndices(ix,&idx);
1576: PetscInfo(xin,"Special case: Blocked indices to stride\n");
1577: goto functionend;
1578: }
1579: }
1581: /* general case */
1582: {
1583: PetscInt nx,ny;
1584: const PetscInt *idx,*idy;
1585: ISGetLocalSize(ix,&nx);
1586: ISGetIndices(ix,&idx);
1587: ISGetLocalSize(iy,&ny);
1588: ISGetIndices(iy,&idy);
1589: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1590: VecScatterCreate_StoP(nx,idx,ny,idy,xin,yin,1,ctx);
1591: ISRestoreIndices(ix,&idx);
1592: ISRestoreIndices(iy,&idy);
1593: PetscInfo(xin,"General case: Seq to MPI\n");
1594: goto functionend;
1595: }
1596: }
1597: /* ---------------------------------------------------------------------------*/
1598: if (xin_type == VEC_MPI_ID && yin_type == VEC_MPI_ID) {
1599: /* no special cases for now */
1600: PetscInt nx,ny;
1601: const PetscInt *idx,*idy;
1602: ISGetLocalSize(ix,&nx);
1603: ISGetIndices(ix,&idx);
1604: ISGetLocalSize(iy,&ny);
1605: ISGetIndices(iy,&idy);
1606: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1607: VecScatterCreate_PtoP(nx,idx,ny,idy,xin,yin,1,ctx);
1608: ISRestoreIndices(ix,&idx);
1609: ISRestoreIndices(iy,&idy);
1610: PetscInfo(xin,"General case: MPI to MPI\n");
1611: goto functionend;
1612: }
1614: functionend:
1615: *newctx = ctx;
1616: ISDestroy(&tix);
1617: ISDestroy(&tiy);
1618: VecScatterViewFromOptions(ctx,NULL,"-vecscatter_view");
1619: return(0);
1620: }
1622: /* ------------------------------------------------------------------*/
1623: /*@
1624: VecScatterGetMerged - Returns true if the scatter is completed in the VecScatterBegin()
1625: and the VecScatterEnd() does nothing
1627: Not Collective
1629: Input Parameter:
1630: . ctx - scatter context created with VecScatterCreate()
1632: Output Parameter:
1633: . flg - PETSC_TRUE if the VecScatterBegin/End() are all done during the VecScatterBegin()
1635: Level: developer
1637: .seealso: VecScatterCreate(), VecScatterEnd(), VecScatterBegin()
1638: @*/
1639: PetscErrorCode VecScatterGetMerged(VecScatter ctx,PetscBool *flg)
1640: {
1643: *flg = ctx->beginandendtogether;
1644: return(0);
1645: }
1647: /*@
1648: VecScatterBegin - Begins a generalized scatter from one vector to
1649: another. Complete the scattering phase with VecScatterEnd().
1651: Neighbor-wise Collective on VecScatter and Vec
1653: Input Parameters:
1654: + inctx - scatter context generated by VecScatterCreate()
1655: . x - the vector from which we scatter
1656: . y - the vector to which we scatter
1657: . addv - either ADD_VALUES or INSERT_VALUES, with INSERT_VALUES mode any location
1658: not scattered to retains its old value; i.e. the vector is NOT first zeroed.
1659: - mode - the scattering mode, usually SCATTER_FORWARD. The available modes are:
1660: SCATTER_FORWARD or SCATTER_REVERSE
1663: Level: intermediate
1665: Options Database: See VecScatterCreate()
1667: Notes:
1668: The vectors x and y need not be the same vectors used in the call
1669: to VecScatterCreate(), but x must have the same parallel data layout
1670: as that passed in as the x to VecScatterCreate(), similarly for the y.
1671: Most likely they have been obtained from VecDuplicate().
1673: You cannot change the values in the input vector between the calls to VecScatterBegin()
1674: and VecScatterEnd().
1676: If you use SCATTER_REVERSE the two arguments x and y should be reversed, from
1677: the SCATTER_FORWARD.
1679: y[iy[i]] = x[ix[i]], for i=0,...,ni-1
1681: This scatter is far more general than the conventional
1682: scatter, since it can be a gather or a scatter or a combination,
1683: depending on the indices ix and iy. If x is a parallel vector and y
1684: is sequential, VecScatterBegin() can serve to gather values to a
1685: single processor. Similarly, if y is parallel and x sequential, the
1686: routine can scatter from one processor to many processors.
1688: Concepts: scatter^between vectors
1689: Concepts: gather^between vectors
1691: .seealso: VecScatterCreate(), VecScatterEnd()
1692: @*/
1693: PetscErrorCode VecScatterBegin(VecScatter inctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
1694: {
1696: #if defined(PETSC_USE_DEBUG)
1697: PetscInt to_n,from_n;
1698: #endif
1703: if (inctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
1705: #if defined(PETSC_USE_DEBUG)
1706: /*
1707: Error checking to make sure these vectors match the vectors used
1708: to create the vector scatter context. -1 in the from_n and to_n indicate the
1709: vector lengths are unknown (for example with mapped scatters) and thus
1710: no error checking is performed.
1711: */
1712: if (inctx->from_n >= 0 && inctx->to_n >= 0) {
1713: VecGetLocalSize(x,&from_n);
1714: VecGetLocalSize(y,&to_n);
1715: if (mode & SCATTER_REVERSE) {
1716: if (to_n != inctx->from_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter reverse and vector to != ctx from size)",to_n,inctx->from_n);
1717: if (from_n != inctx->to_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter reverse and vector from != ctx to size)",from_n,inctx->to_n);
1718: } else {
1719: if (to_n != inctx->to_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter forward and vector to != ctx to size)",to_n,inctx->to_n);
1720: if (from_n != inctx->from_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter forward and vector from != ctx from size)",from_n,inctx->from_n);
1721: }
1722: }
1723: #endif
1725: inctx->inuse = PETSC_TRUE;
1726: PetscLogEventBarrierBegin(VEC_ScatterBarrier,0,0,0,0,PetscObjectComm((PetscObject)inctx));
1727: (*inctx->ops->begin)(inctx,x,y,addv,mode);
1728: if (inctx->beginandendtogether && inctx->ops->end) {
1729: inctx->inuse = PETSC_FALSE;
1730: (*inctx->ops->end)(inctx,x,y,addv,mode);
1731: }
1732: PetscLogEventBarrierEnd(VEC_ScatterBarrier,0,0,0,0,PetscObjectComm((PetscObject)inctx));
1733: return(0);
1734: }
1736: /* --------------------------------------------------------------------*/
1737: /*@
1738: VecScatterEnd - Ends a generalized scatter from one vector to another. Call
1739: after first calling VecScatterBegin().
1741: Neighbor-wise Collective on VecScatter and Vec
1743: Input Parameters:
1744: + ctx - scatter context generated by VecScatterCreate()
1745: . x - the vector from which we scatter
1746: . y - the vector to which we scatter
1747: . addv - either ADD_VALUES or INSERT_VALUES.
1748: - mode - the scattering mode, usually SCATTER_FORWARD. The available modes are:
1749: SCATTER_FORWARD, SCATTER_REVERSE
1751: Level: intermediate
1753: Notes:
1754: If you use SCATTER_REVERSE the arguments x and y should be reversed, from the SCATTER_FORWARD.
1756: y[iy[i]] = x[ix[i]], for i=0,...,ni-1
1758: .seealso: VecScatterBegin(), VecScatterCreate()
1759: @*/
1760: PetscErrorCode VecScatterEnd(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
1761: {
1768: ctx->inuse = PETSC_FALSE;
1769: if (!ctx->ops->end) return(0);
1770: if (!ctx->beginandendtogether) {
1771: PetscLogEventBegin(VEC_ScatterEnd,ctx,x,y,0);
1772: (*(ctx)->ops->end)(ctx,x,y,addv,mode);
1773: PetscLogEventEnd(VEC_ScatterEnd,ctx,x,y,0);
1774: }
1775: return(0);
1776: }
1778: /*@C
1779: VecScatterDestroy - Destroys a scatter context created by
1780: VecScatterCreate().
1782: Collective on VecScatter
1784: Input Parameter:
1785: . ctx - the scatter context
1787: Level: intermediate
1789: .seealso: VecScatterCreate(), VecScatterCopy()
1790: @*/
1791: PetscErrorCode VecScatterDestroy(VecScatter *ctx)
1792: {
1796: if (!*ctx) return(0);
1798: if ((*ctx)->inuse) SETERRQ(((PetscObject)(*ctx))->comm,PETSC_ERR_ARG_WRONGSTATE,"Scatter context is in use");
1799: if (--((PetscObject)(*ctx))->refct > 0) {*ctx = 0; return(0);}
1801: /* if memory was published with SAWs then destroy it */
1802: PetscObjectSAWsViewOff((PetscObject)(*ctx));
1803: (*(*ctx)->ops->destroy)(*ctx);
1804: #if defined(PETSC_HAVE_CUSP)
1805: VecScatterCUSPIndicesDestroy((PetscCUSPIndices*)&((*ctx)->spptr));
1806: #elif defined(PETSC_HAVE_VECCUDA)
1807: VecScatterCUDAIndicesDestroy((PetscCUDAIndices*)&((*ctx)->spptr));
1808: #endif
1809: PetscHeaderDestroy(ctx);
1810: return(0);
1811: }
1813: /*@
1814: VecScatterCopy - Makes a copy of a scatter context.
1816: Collective on VecScatter
1818: Input Parameter:
1819: . sctx - the scatter context
1821: Output Parameter:
1822: . ctx - the context copy
1824: Level: advanced
1826: .seealso: VecScatterCreate(), VecScatterDestroy()
1827: @*/
1828: PetscErrorCode VecScatterCopy(VecScatter sctx,VecScatter *ctx)
1829: {
1835: if (!sctx->ops->copy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot copy this type");
1836: PetscHeaderCreate(*ctx,VEC_SCATTER_CLASSID,"VecScatter","VecScatter","Vec",PetscObjectComm((PetscObject)sctx),VecScatterDestroy,VecScatterView);
1837: (*ctx)->to_n = sctx->to_n;
1838: (*ctx)->from_n = sctx->from_n;
1839: (*sctx->ops->copy)(sctx,*ctx);
1840: return(0);
1841: }
1844: /* ------------------------------------------------------------------*/
1845: /*@C
1846: VecScatterView - Views a vector scatter context.
1848: Collective on VecScatter
1850: Input Parameters:
1851: + ctx - the scatter context
1852: - viewer - the viewer for displaying the context
1854: Level: intermediate
1856: C@*/
1857: PetscErrorCode VecScatterView(VecScatter ctx,PetscViewer viewer)
1858: {
1863: if (!viewer) {
1864: PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)ctx),&viewer);
1865: }
1867: if (ctx->ops->view) {
1868: (*ctx->ops->view)(ctx,viewer);
1869: }
1870: return(0);
1871: }
1873: /*@C
1874: VecScatterRemap - Remaps the "from" and "to" indices in a
1875: vector scatter context. FOR EXPERTS ONLY!
1877: Collective on VecScatter
1879: Input Parameters:
1880: + scat - vector scatter context
1881: . from - remapping for "from" indices (may be NULL)
1882: - to - remapping for "to" indices (may be NULL)
1884: Level: developer
1886: Notes: In the parallel case the todata is actually the indices
1887: from which the data is TAKEN! The from stuff is where the
1888: data is finally put. This is VERY VERY confusing!
1890: In the sequential case the todata is the indices where the
1891: data is put and the fromdata is where it is taken from.
1892: This is backwards from the paralllel case! CRY! CRY! CRY!
1894: @*/
1895: PetscErrorCode VecScatterRemap(VecScatter scat,PetscInt *rto,PetscInt *rfrom)
1896: {
1897: VecScatter_Seq_General *to,*from;
1898: VecScatter_MPI_General *mto;
1899: PetscInt i;
1906: from = (VecScatter_Seq_General*)scat->fromdata;
1907: mto = (VecScatter_MPI_General*)scat->todata;
1909: if (mto->type == VEC_SCATTER_MPI_TOALL) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Not for to all scatter");
1911: if (rto) {
1912: if (mto->type == VEC_SCATTER_MPI_GENERAL) {
1913: /* handle off processor parts */
1914: for (i=0; i<mto->starts[mto->n]; i++) mto->indices[i] = rto[mto->indices[i]];
1916: /* handle local part */
1917: to = &mto->local;
1918: for (i=0; i<to->n; i++) to->vslots[i] = rto[to->vslots[i]];
1919: } else if (from->type == VEC_SCATTER_SEQ_GENERAL) {
1920: for (i=0; i<from->n; i++) from->vslots[i] = rto[from->vslots[i]];
1921: } else if (from->type == VEC_SCATTER_SEQ_STRIDE) {
1922: VecScatter_Seq_Stride *sto = (VecScatter_Seq_Stride*)from;
1924: /* if the remapping is the identity and stride is identity then skip remap */
1925: if (sto->step == 1 && sto->first == 0) {
1926: for (i=0; i<sto->n; i++) {
1927: if (rto[i] != i) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Unable to remap such scatters");
1928: }
1929: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Unable to remap such scatters");
1930: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Unable to remap such scatters");
1931: }
1933: if (rfrom) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unable to remap the FROM in scatters yet");
1935: /*
1936: Mark then vector lengths as unknown because we do not know the
1937: lengths of the remapped vectors
1938: */
1939: scat->from_n = -1;
1940: scat->to_n = -1;
1941: return(0);
1942: }
1944: /*
1945: VecScatterGetTypes_Private - Returns the scatter types.
1947: scatter - The scatter.
1948: from - Upon exit this contains the type of the from scatter.
1949: to - Upon exit this contains the type of the to scatter.
1950: */
1951: PetscErrorCode VecScatterGetTypes_Private(VecScatter scatter,VecScatterType *from,VecScatterType *to)
1952: {
1953: VecScatter_Common* fromdata = (VecScatter_Common*)scatter->fromdata;
1954: VecScatter_Common* todata = (VecScatter_Common*)scatter->todata;
1957: *from = fromdata->type;
1958: *to = todata->type;
1959: return(0);
1960: }
1963: /*
1964: VecScatterIsSequential_Private - Returns true if the scatter is sequential.
1966: scatter - The scatter.
1967: flag - Upon exit flag is true if the scatter is of type VecScatter_Seq_General
1968: or VecScatter_Seq_Stride; otherwise flag is false.
1969: */
1970: PetscErrorCode VecScatterIsSequential_Private(VecScatter_Common *scatter,PetscBool *flag)
1971: {
1972: VecScatterType scatterType = scatter->type;
1975: if (scatterType == VEC_SCATTER_SEQ_GENERAL || scatterType == VEC_SCATTER_SEQ_STRIDE) {
1976: *flag = PETSC_TRUE;
1977: } else {
1978: *flag = PETSC_FALSE;
1979: }
1980: return(0);
1981: }
1983: #if defined(PETSC_HAVE_CUSP) || defined(PETSC_HAVE_VECCUDA)
1985: /*@C
1986: VecScatterInitializeForGPU - Initializes a generalized scatter from one vector
1987: to another for GPU based computation.
1989: Input Parameters:
1990: + inctx - scatter context generated by VecScatterCreate()
1991: . x - the vector from which we scatter
1992: - mode - the scattering mode, usually SCATTER_FORWARD. The available modes are:
1993: SCATTER_FORWARD or SCATTER_REVERSE
1995: Level: intermediate
1997: Notes:
1998: Effectively, this function creates all the necessary indexing buffers and work
1999: vectors needed to move data only those data points in a vector which need to
2000: be communicated across ranks. This is done at the first time this function is
2001: called. Currently, this only used in the context of the parallel SpMV call in
2002: MatMult_MPIAIJCUSP or MatMult_MPIAIJCUSPARSE.
2004: This function is executed before the call to MatMult. This enables the memory
2005: transfers to be overlapped with the MatMult SpMV kernel call.
2007: .seealso: VecScatterFinalizeForGPU(), VecScatterCreate(), VecScatterEnd()
2008: @*/
2009: PETSC_EXTERN PetscErrorCode VecScatterInitializeForGPU(VecScatter inctx,Vec x,ScatterMode mode)
2010: {
2012: VecScatter_MPI_General *to,*from;
2013: PetscErrorCode ierr;
2014: PetscInt i,*indices,*sstartsSends,*sstartsRecvs,nrecvs,nsends,bs;
2015: PetscBool isSeq1,isSeq2;
2018: VecScatterIsSequential_Private((VecScatter_Common*)inctx->fromdata,&isSeq1);
2019: VecScatterIsSequential_Private((VecScatter_Common*)inctx->todata,&isSeq2);
2020: if (isSeq1 || isSeq2) {
2021: return(0);
2022: }
2023: if (mode & SCATTER_REVERSE) {
2024: to = (VecScatter_MPI_General*)inctx->fromdata;
2025: from = (VecScatter_MPI_General*)inctx->todata;
2026: } else {
2027: to = (VecScatter_MPI_General*)inctx->todata;
2028: from = (VecScatter_MPI_General*)inctx->fromdata;
2029: }
2030: bs = to->bs;
2031: nrecvs = from->n;
2032: nsends = to->n;
2033: indices = to->indices;
2034: sstartsSends = to->starts;
2035: sstartsRecvs = from->starts;
2036: #if defined(PETSC_HAVE_CUSP)
2037: if (x->valid_GPU_array != PETSC_CUSP_UNALLOCATED && (nsends>0 || nrecvs>0)) {
2038: #else
2039: if (x->valid_GPU_array != PETSC_CUDA_UNALLOCATED && (nsends>0 || nrecvs>0)) {
2040: #endif
2041: if (!inctx->spptr) {
2042: PetscInt k,*tindicesSends,*sindicesSends,*tindicesRecvs,*sindicesRecvs;
2043: PetscInt ns = sstartsSends[nsends],nr = sstartsRecvs[nrecvs];
2044: /* Here we create indices for both the senders and receivers. */
2045: PetscMalloc1(ns,&tindicesSends);
2046: PetscMalloc1(nr,&tindicesRecvs);
2048: PetscMemcpy(tindicesSends,indices,ns*sizeof(PetscInt));
2049: PetscMemcpy(tindicesRecvs,from->indices,nr*sizeof(PetscInt));
2051: PetscSortRemoveDupsInt(&ns,tindicesSends);
2052: PetscSortRemoveDupsInt(&nr,tindicesRecvs);
2054: PetscMalloc1(bs*ns,&sindicesSends);
2055: PetscMalloc1(from->bs*nr,&sindicesRecvs);
2057: /* sender indices */
2058: for (i=0; i<ns; i++) {
2059: for (k=0; k<bs; k++) sindicesSends[i*bs+k] = tindicesSends[i]+k;
2060: }
2061: PetscFree(tindicesSends);
2063: /* receiver indices */
2064: for (i=0; i<nr; i++) {
2065: for (k=0; k<from->bs; k++) sindicesRecvs[i*from->bs+k] = tindicesRecvs[i]+k;
2066: }
2067: PetscFree(tindicesRecvs);
2069: /* create GPU indices, work vectors, ... */
2070: #if defined(PETSC_HAVE_CUSP)
2071: VecScatterCUSPIndicesCreate_PtoP(ns*bs,sindicesSends,nr*from->bs,sindicesRecvs,(PetscCUSPIndices*)&inctx->spptr);
2072: #else
2073: VecScatterCUDAIndicesCreate_PtoP(ns*bs,sindicesSends,nr*from->bs,sindicesRecvs,(PetscCUDAIndices*)&inctx->spptr);
2074: #endif
2075: PetscFree(sindicesSends);
2076: PetscFree(sindicesRecvs);
2077: }
2078: }
2079: return(0);
2080: }
2082: /*@C
2083: VecScatterFinalizeForGPU - Finalizes a generalized scatter from one vector to
2084: another for GPU based computation.
2086: Input Parameter:
2087: + inctx - scatter context generated by VecScatterCreate()
2089: Level: intermediate
2091: Notes:
2092: Effectively, this function resets the temporary buffer flags. Currently, this
2093: only used in the context of the parallel SpMV call in in MatMult_MPIAIJCUDA
2094: or MatMult_MPIAIJCUDAARSE. Once the MatMultAdd is finished, the GPU temporary
2095: buffers used for messaging are no longer valid.
2097: .seealso: VecScatterInitializeForGPU(), VecScatterCreate(), VecScatterEnd()
2098: @*/
2099: PETSC_EXTERN PetscErrorCode VecScatterFinalizeForGPU(VecScatter inctx)
2100: {
2102: return(0);
2103: }
2105: #endif