Actual source code: vscat.c
petsc-3.10.5 2019-03-28
2: /*
3: Code for creating scatters between vectors. This file
4: includes the code for scattering between sequential vectors and
5: some special cases for parallel scatters.
6: */
8: #include <petsc/private/vecscatterimpl.h>
10: #if defined(PETSC_HAVE_VECCUDA)
11: #include <../src/vec/vec/impls/seq/seqcuda/cudavecimpl.h>
12: #endif
14: #if defined(PETSC_USE_DEBUG)
15: /*
16: Checks if any indices are less than zero and generates an error
17: */
18: PetscErrorCode VecScatterCheckIndices_Private(PetscInt nmax,PetscInt n,const PetscInt *idx)
19: {
20: PetscInt i;
23: for (i=0; i<n; i++) {
24: if (idx[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative index %D at %D location",idx[i],i);
25: if (idx[i] >= nmax) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index %D at %D location greater than max %D",idx[i],i,nmax);
26: }
27: return(0);
28: }
29: #endif
31: /*
32: This is special scatter code for when the entire parallel vector is copied to each processor.
34: This code was written by Cameron Cooper, Occidental College, Fall 1995,
35: will working at ANL as a SERS student.
36: */
37: PetscErrorCode VecScatterBegin_MPI_ToAll(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
38: {
40: PetscInt yy_n,xx_n;
41: PetscScalar *xv,*yv;
44: VecGetLocalSize(y,&yy_n);
45: VecGetLocalSize(x,&xx_n);
46: VecGetArrayPair(x,y,&xv,&yv);
48: if (mode & SCATTER_REVERSE) {
49: PetscScalar *xvt,*xvt2;
50: VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
51: PetscInt i;
52: PetscMPIInt *disply = scat->displx;
54: if (addv == INSERT_VALUES) {
55: PetscInt rstart,rend;
56: /*
57: copy the correct part of the local vector into the local storage of
58: the MPI one. Note: this operation only makes sense if all the local
59: vectors have the same values
60: */
61: VecGetOwnershipRange(y,&rstart,&rend);
62: PetscMemcpy(yv,xv+rstart,yy_n*sizeof(PetscScalar));
63: } else {
64: MPI_Comm comm;
65: PetscMPIInt rank;
66: PetscObjectGetComm((PetscObject)y,&comm);
67: MPI_Comm_rank(comm,&rank);
68: if (scat->work1) xvt = scat->work1;
69: else {
70: PetscMalloc1(xx_n,&xvt);
71: scat->work1 = xvt;
72: }
73: if (!rank) { /* I am the zeroth processor, values are accumulated here */
74: if (scat->work2) xvt2 = scat->work2;
75: else {
76: PetscMalloc1(xx_n,&xvt2);
77: scat->work2 = xvt2;
78: }
79: MPI_Gatherv(yv,yy_n,MPIU_SCALAR,xvt2,scat->count,disply,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
80: MPI_Reduce(xv,xvt,xx_n,MPIU_SCALAR,MPIU_SUM,0,PetscObjectComm((PetscObject)ctx));
81: if (addv == ADD_VALUES) {
82: for (i=0; i<xx_n; i++) xvt[i] += xvt2[i];
83: #if !defined(PETSC_USE_COMPLEX)
84: } else if (addv == MAX_VALUES) {
85: for (i=0; i<xx_n; i++) xvt[i] = PetscMax(xvt[i],xvt2[i]);
86: #endif
87: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
88: MPI_Scatterv(xvt,scat->count,disply,MPIU_SCALAR,yv,yy_n,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
89: } else {
90: MPI_Gatherv(yv,yy_n,MPIU_SCALAR,0, 0,0,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
91: MPI_Reduce(xv,xvt,xx_n,MPIU_SCALAR,MPIU_SUM,0,PetscObjectComm((PetscObject)ctx));
92: MPI_Scatterv(0,scat->count,disply,MPIU_SCALAR,yv,yy_n,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
93: }
94: }
95: } else {
96: PetscScalar *yvt;
97: VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
98: PetscInt i;
99: PetscMPIInt *displx = scat->displx;
101: if (addv == INSERT_VALUES) {
102: MPI_Allgatherv(xv,xx_n,MPIU_SCALAR,yv,scat->count,displx,MPIU_SCALAR,PetscObjectComm((PetscObject)ctx));
103: } else {
104: if (scat->work1) yvt = scat->work1;
105: else {
106: PetscMalloc1(yy_n,&yvt);
107: scat->work1 = yvt;
108: }
109: MPI_Allgatherv(xv,xx_n,MPIU_SCALAR,yvt,scat->count,displx,MPIU_SCALAR,PetscObjectComm((PetscObject)ctx));
110: if (addv == ADD_VALUES) {
111: for (i=0; i<yy_n; i++) yv[i] += yvt[i];
112: #if !defined(PETSC_USE_COMPLEX)
113: } else if (addv == MAX_VALUES) {
114: for (i=0; i<yy_n; i++) yv[i] = PetscMax(yv[i],yvt[i]);
115: #endif
116: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
117: }
118: }
119: VecRestoreArrayPair(x,y,&xv,&yv);
120: return(0);
121: }
123: PetscErrorCode VecScatterView_MPI_ToAll(VecScatter in,PetscViewer viewer)
124: {
126: PetscBool isascii;
129: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
130: if (isascii) {
131: PetscViewerASCIIPrintf(viewer,"Entire parallel vector is copied to each process\n");
132: }
133: return(0);
134: }
136: /*
137: This is special scatter code for when the entire parallel vector is copied to processor 0.
139: */
140: PetscErrorCode VecScatterBegin_MPI_ToOne(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
141: {
142: PetscErrorCode ierr;
143: PetscMPIInt rank;
144: PetscInt yy_n,xx_n;
145: PetscScalar *yv;
146: const PetscScalar *xv;
147: MPI_Comm comm;
150: VecGetLocalSize(y,&yy_n);
151: VecGetLocalSize(x,&xx_n);
152: VecGetArrayRead(x,&xv);
153: VecGetArray(y,&yv);
155: PetscObjectGetComm((PetscObject)x,&comm);
156: MPI_Comm_rank(comm,&rank);
158: /* -------- Reverse scatter; spread from processor 0 to other processors */
159: if (mode & SCATTER_REVERSE) {
160: PetscScalar *yvt;
161: VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
162: PetscInt i;
163: PetscMPIInt *disply = scat->displx;
165: if (addv == INSERT_VALUES) {
166: MPI_Scatterv((PetscScalar*)xv,scat->count,disply,MPIU_SCALAR,yv,yy_n,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
167: } else {
168: if (scat->work2) yvt = scat->work2;
169: else {
170: PetscInt xx_nt;
171: MPI_Allreduce(&xx_n,&xx_nt,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)y));
172: PetscMalloc1(xx_nt,&yvt);
173: scat->work2 = yvt;
174: }
175: MPI_Scatterv((PetscScalar*)xv,scat->count,disply,MPIU_SCALAR,yvt,yy_n,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
176: if (addv == ADD_VALUES) {
177: for (i=0; i<yy_n; i++) yv[i] += yvt[i];
178: #if !defined(PETSC_USE_COMPLEX)
179: } else if (addv == MAX_VALUES) {
180: for (i=0; i<yy_n; i++) yv[i] = PetscMax(yv[i],yvt[i]);
181: #endif
182: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
183: }
184: /* --------- Forward scatter; gather all values onto processor 0 */
185: } else {
186: PetscScalar *yvt = 0;
187: VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
188: PetscInt i;
189: PetscMPIInt *displx = scat->displx;
191: if (addv == INSERT_VALUES) {
192: MPI_Gatherv((PetscScalar*)xv,xx_n,MPIU_SCALAR,yv,scat->count,displx,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
193: } else {
194: if (!rank) {
195: if (scat->work1) yvt = scat->work1;
196: else {
197: PetscMalloc1(yy_n,&yvt);
198: scat->work1 = yvt;
199: }
200: }
201: MPI_Gatherv((PetscScalar*)xv,xx_n,MPIU_SCALAR,yvt,scat->count,displx,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
202: if (!rank) {
203: if (addv == ADD_VALUES) {
204: for (i=0; i<yy_n; i++) yv[i] += yvt[i];
205: #if !defined(PETSC_USE_COMPLEX)
206: } else if (addv == MAX_VALUES) {
207: for (i=0; i<yy_n; i++) yv[i] = PetscMax(yv[i],yvt[i]);
208: #endif
209: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
210: }
211: }
212: }
213: VecRestoreArrayRead(x,&xv);
214: VecRestoreArray(y,&yv);
215: return(0);
216: }
218: /*
219: The follow to are used for both VecScatterBegin_MPI_ToAll() and VecScatterBegin_MPI_ToOne()
220: */
221: PetscErrorCode VecScatterDestroy_MPI_ToAll(VecScatter ctx)
222: {
223: VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
224: PetscErrorCode ierr;
227: PetscFree(scat->work1);
228: PetscFree(scat->work2);
229: PetscFree3(ctx->todata,scat->count,scat->displx);
230: return(0);
231: }
233: PetscErrorCode VecScatterDestroy_SGToSG(VecScatter ctx)
234: {
238: PetscFree2(((VecScatter_Seq_General*)ctx->todata)->vslots,((VecScatter_Seq_General*)ctx->fromdata)->vslots);
239: VecScatterMemcpyPlanDestroy(&((VecScatter_Seq_General*)ctx->fromdata)->memcpy_plan);;
240: VecScatterMemcpyPlanDestroy(&((VecScatter_Seq_General*)ctx->todata)->memcpy_plan);;
241: PetscFree2(ctx->todata,ctx->fromdata);
242: return(0);
243: }
245: PetscErrorCode VecScatterDestroy_SGToSS(VecScatter ctx)
246: {
250: PetscFree(((VecScatter_Seq_General*)ctx->fromdata)->vslots);
251: VecScatterMemcpyPlanDestroy(&((VecScatter_Seq_General*)ctx->fromdata)->memcpy_plan);;
252: PetscFree2(ctx->todata,ctx->fromdata);
253: return(0);
254: }
256: PetscErrorCode VecScatterDestroy_SSToSG(VecScatter ctx)
257: {
261: PetscFree(((VecScatter_Seq_General*)ctx->todata)->vslots);
262: VecScatterMemcpyPlanDestroy(&((VecScatter_Seq_General*)ctx->todata)->memcpy_plan);;
263: PetscFree2(ctx->todata,ctx->fromdata);
264: return(0);
265: }
267: PetscErrorCode VecScatterDestroy_SSToSS(VecScatter ctx)
268: {
272: PetscFree2(ctx->todata,ctx->fromdata);
273: return(0);
274: }
276: /* -------------------------------------------------------------------------*/
277: PetscErrorCode VecScatterCopy_MPI_ToAll(VecScatter in,VecScatter out)
278: {
279: VecScatter_MPI_ToAll *in_to = (VecScatter_MPI_ToAll*)in->todata,*sto;
280: PetscErrorCode ierr;
281: PetscMPIInt size,*count,*displx;
282: PetscInt i;
285: out->ops->begin = in->ops->begin;
286: out->ops->end = in->ops->end;
287: out->ops->copy = in->ops->copy;
288: out->ops->destroy = in->ops->destroy;
289: out->ops->view = in->ops->view;
291: MPI_Comm_size(PetscObjectComm((PetscObject)out),&size);
292: PetscMalloc3(1,&sto,size,&count,size,&displx);
293: sto->format = in_to->format;
294: sto->count = count;
295: sto->displx = displx;
296: for (i=0; i<size; i++) {
297: sto->count[i] = in_to->count[i];
298: sto->displx[i] = in_to->displx[i];
299: }
300: sto->work1 = 0;
301: sto->work2 = 0;
302: out->todata = (void*)sto;
303: out->fromdata = (void*)0;
304: return(0);
305: }
307: /* --------------------------------------------------------------------------------------*/
308: /*
309: Scatter: sequential general to sequential general
310: */
311: PetscErrorCode VecScatterBegin_SGToSG(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
312: {
313: VecScatter_Seq_General *gen_to = (VecScatter_Seq_General*)ctx->todata;
314: VecScatter_Seq_General *gen_from = (VecScatter_Seq_General*)ctx->fromdata;
315: PetscErrorCode ierr;
316: PetscInt i,n = gen_from->n,*fslots,*tslots;
317: PetscScalar *xv,*yv;
318: #if defined(PETSC_HAVE_VECCUDA)
319: PetscBool is_veccuda;
320: #endif
323: #if defined(PETSC_HAVE_VECCUDA)
324: PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
325: if (is_veccuda && x->valid_GPU_array == PETSC_OFFLOAD_GPU) {
326: /* create the scatter indices if not done already */
327: if (!ctx->spptr) {
328: PetscInt tofirst = 0,tostep = 0,fromfirst = 0,fromstep = 0;
329: fslots = gen_from->vslots;
330: tslots = gen_to->vslots;
331: VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
332: }
333: /* next do the scatter */
334: VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
335: return(0);
336: }
337: #endif
339: VecGetArrayPair(x,y,&xv,&yv);
340: if (mode & SCATTER_REVERSE) {
341: gen_to = (VecScatter_Seq_General*)ctx->fromdata;
342: gen_from = (VecScatter_Seq_General*)ctx->todata;
343: }
344: fslots = gen_from->vslots;
345: tslots = gen_to->vslots;
347: if (gen_from->memcpy_plan.optimized[0]) { VecScatterMemcpyPlanExecute_Scatter(0,xv,&gen_from->memcpy_plan,yv,&gen_to->memcpy_plan,addv); }
348: else if (addv == INSERT_VALUES) { for (i=0; i<n; i++) yv[tslots[i]] = xv[fslots[i]]; }
349: else if (addv == ADD_VALUES) { for (i=0; i<n; i++) yv[tslots[i]] += xv[fslots[i]]; }
350: #if !defined(PETSC_USE_COMPLEX)
351: else if (addv == MAX_VALUES) { for (i=0; i<n; i++) yv[tslots[i]] = PetscMax(yv[tslots[i]],xv[fslots[i]]); }
352: #endif
353: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
354: VecRestoreArrayPair(x,y,&xv,&yv);
355: return(0);
356: }
358: /*
359: Scatter: sequential general to sequential stride 1
360: */
361: PetscErrorCode VecScatterBegin_SGToSS_Stride1(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
362: {
363: VecScatter_Seq_Stride *gen_to = (VecScatter_Seq_Stride*)ctx->todata;
364: VecScatter_Seq_General *gen_from = (VecScatter_Seq_General*)ctx->fromdata;
365: PetscInt i,n = gen_from->n,*fslots = gen_from->vslots;
366: PetscErrorCode ierr;
367: PetscInt first = gen_to->first;
368: PetscScalar *xv,*yv;
369: #if defined(PETSC_HAVE_VECCUDA)
370: PetscBool is_veccuda;
371: #endif
374: #if defined(PETSC_HAVE_VECCUDA)
375: PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
376: if (is_veccuda && x->valid_GPU_array == PETSC_OFFLOAD_GPU) {
377: /* create the scatter indices if not done already */
378: if (!ctx->spptr) {
379: PetscInt tofirst = first,tostep = 1,fromfirst = 0,fromstep = 0;
380: PetscInt *tslots = 0;
381: VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
382: }
383: /* next do the scatter */
384: VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
385: return(0);
386: }
387: #endif
389: VecGetArrayPair(x,y,&xv,&yv);
390: if (mode & SCATTER_REVERSE) {
391: PetscScalar *xxv = xv + first;
392: if (gen_from->memcpy_plan.optimized[0]) { VecScatterMemcpyPlanExecute_Unpack(0,xxv,yv,&gen_from->memcpy_plan,addv,1); }
393: else if (addv == INSERT_VALUES) { for (i=0; i<n; i++) yv[fslots[i]] = xxv[i]; }
394: else if (addv == ADD_VALUES) { for (i=0; i<n; i++) yv[fslots[i]] += xxv[i]; }
395: #if !defined(PETSC_USE_COMPLEX)
396: else if (addv == MAX_VALUES) { for (i=0; i<n; i++) yv[fslots[i]] = PetscMax(yv[fslots[i]],xxv[i]); }
397: #endif
398: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
399: } else {
400: PetscScalar *yyv = yv + first;
401: if (gen_from->memcpy_plan.optimized[0]) { VecScatterMemcpyPlanExecute_Pack(0,xv,&gen_from->memcpy_plan,yyv,addv,1); }
402: else if (addv == INSERT_VALUES) { for (i=0; i<n; i++) yyv[i] = xv[fslots[i]]; }
403: else if (addv == ADD_VALUES) { for (i=0; i<n; i++) yyv[i] += xv[fslots[i]]; }
404: #if !defined(PETSC_USE_COMPLEX)
405: else if (addv == MAX_VALUES) { for (i=0; i<n; i++) yyv[i] = PetscMax(yyv[i],xv[fslots[i]]); }
406: #endif
407: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
408: }
409: VecRestoreArrayPair(x,y,&xv,&yv);
410: return(0);
411: }
413: /*
414: Scatter: sequential general to sequential stride
415: */
416: PetscErrorCode VecScatterBegin_SGToSS(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
417: {
418: VecScatter_Seq_Stride *gen_to = (VecScatter_Seq_Stride*)ctx->todata;
419: VecScatter_Seq_General *gen_from = (VecScatter_Seq_General*)ctx->fromdata;
420: PetscInt i,n = gen_from->n,*fslots = gen_from->vslots;
421: PetscErrorCode ierr;
422: PetscInt first = gen_to->first,step = gen_to->step;
423: PetscScalar *xv,*yv;
424: #if defined(PETSC_HAVE_VECCUDA)
425: PetscBool is_veccuda;
426: #endif
429: #if defined(PETSC_HAVE_VECCUDA)
430: PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
431: if (is_veccuda && x->valid_GPU_array == PETSC_OFFLOAD_GPU) {
432: /* create the scatter indices if not done already */
433: if (!ctx->spptr) {
434: PetscInt tofirst = first,tostep = step,fromfirst = 0,fromstep = 0;
435: PetscInt * tslots = 0;
436: VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
437: }
438: /* next do the scatter */
439: VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
440: return(0);
441: }
442: #endif
444: VecGetArrayPair(x,y,&xv,&yv);
445: if (mode & SCATTER_REVERSE) {
446: if (addv == INSERT_VALUES) {
447: for (i=0; i<n; i++) yv[fslots[i]] = xv[first + i*step];
448: } else if (addv == ADD_VALUES) {
449: for (i=0; i<n; i++) yv[fslots[i]] += xv[first + i*step];
450: #if !defined(PETSC_USE_COMPLEX)
451: } else if (addv == MAX_VALUES) {
452: for (i=0; i<n; i++) yv[fslots[i]] = PetscMax(yv[fslots[i]],xv[first + i*step]);
453: #endif
454: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
455: } else {
456: if (addv == INSERT_VALUES) {
457: for (i=0; i<n; i++) yv[first + i*step] = xv[fslots[i]];
458: } else if (addv == ADD_VALUES) {
459: for (i=0; i<n; i++) yv[first + i*step] += xv[fslots[i]];
460: #if !defined(PETSC_USE_COMPLEX)
461: } else if (addv == MAX_VALUES) {
462: for (i=0; i<n; i++) yv[first + i*step] = PetscMax(yv[first + i*step],xv[fslots[i]]);
463: #endif
464: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
465: }
466: VecRestoreArrayPair(x,y,&xv,&yv);
467: return(0);
468: }
470: /*
471: Scatter: sequential stride 1 to sequential general
472: */
473: PetscErrorCode VecScatterBegin_SSToSG_Stride1(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
474: {
475: VecScatter_Seq_Stride *gen_from = (VecScatter_Seq_Stride*)ctx->fromdata;
476: VecScatter_Seq_General *gen_to = (VecScatter_Seq_General*)ctx->todata;
477: PetscInt i,n = gen_from->n,*tslots = gen_to->vslots;
478: PetscErrorCode ierr;
479: PetscInt first = gen_from->first;
480: PetscScalar *xv,*yv;
481: #if defined(PETSC_HAVE_VECCUDA)
482: PetscBool is_veccuda;
483: #endif
486: #if defined(PETSC_HAVE_VECCUDA)
487: PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
488: if (is_veccuda && x->valid_GPU_array == PETSC_OFFLOAD_GPU) {
489: /* create the scatter indices if not done already */
490: if (!ctx->spptr) {
491: PetscInt tofirst = 0,tostep = 0,fromfirst = first,fromstep = 1;
492: PetscInt *fslots = 0;
493: VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
494: }
495: /* next do the scatter */
496: VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
497: return(0);
498: }
499: #endif
501: VecGetArrayPair(x,y,&xv,&yv);
502: if (mode & SCATTER_REVERSE) {
503: PetscScalar *yyv = yv + first;
504: if (gen_to->memcpy_plan.optimized[0]) { VecScatterMemcpyPlanExecute_Pack(0,xv,&gen_to->memcpy_plan,yyv,addv,1); }
505: else if (addv == INSERT_VALUES) { for (i=0; i<n; i++) yyv[i] = xv[tslots[i]]; }
506: else if (addv == ADD_VALUES) { for (i=0; i<n; i++) yyv[i] += xv[tslots[i]]; }
507: #if !defined(PETSC_USE_COMPLEX)
508: else if (addv == MAX_VALUES) { for (i=0; i<n; i++) yyv[i] = PetscMax(yyv[i],xv[tslots[i]]); }
509: #endif
510: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
511: } else {
512: PetscScalar *xxv = xv + first;
513: if (gen_to->memcpy_plan.optimized[0]) { VecScatterMemcpyPlanExecute_Unpack(0,xxv,yv,&gen_to->memcpy_plan,addv,1); }
514: else if (addv == INSERT_VALUES) { for (i=0; i<n; i++) yv[tslots[i]] = xxv[i]; }
515: else if (addv == ADD_VALUES) { for (i=0; i<n; i++) yv[tslots[i]] += xxv[i]; }
516: #if !defined(PETSC_USE_COMPLEX)
517: else if (addv == MAX_VALUES) { for (i=0; i<n; i++) yv[tslots[i]] = PetscMax(yv[tslots[i]],xxv[i]); }
518: #endif
519: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
520: }
521: VecRestoreArrayPair(x,y,&xv,&yv);
522: return(0);
523: }
525: /*
526: Scatter: sequential stride to sequential general
527: */
528: PetscErrorCode VecScatterBegin_SSToSG(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
529: {
530: VecScatter_Seq_Stride *gen_from = (VecScatter_Seq_Stride*)ctx->fromdata;
531: VecScatter_Seq_General *gen_to = (VecScatter_Seq_General*)ctx->todata;
532: PetscInt i,n = gen_from->n,*tslots = gen_to->vslots;
533: PetscErrorCode ierr;
534: PetscInt first = gen_from->first,step = gen_from->step;
535: PetscScalar *xv,*yv;
536: #if defined(PETSC_HAVE_VECCUDA)
537: PetscBool is_veccuda;
538: #endif
541: #if defined(PETSC_HAVE_VECCUDA)
542: PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
543: if (is_veccuda && x->valid_GPU_array == PETSC_OFFLOAD_GPU) {
544: /* create the scatter indices if not done already */
545: if (!ctx->spptr) {
546: PetscInt tofirst = 0,tostep = 0,fromfirst = first,fromstep = step;
547: PetscInt *fslots = 0;
548: VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
549: }
550: /* next do the scatter */
551: VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
552: return(0);
553: }
554: #endif
556: VecGetArrayPair(x,y,&xv,&yv);
557: if (mode & SCATTER_REVERSE) {
558: if (addv == INSERT_VALUES) {
559: for (i=0; i<n; i++) yv[first + i*step] = xv[tslots[i]];
560: } else if (addv == ADD_VALUES) {
561: for (i=0; i<n; i++) yv[first + i*step] += xv[tslots[i]];
562: #if !defined(PETSC_USE_COMPLEX)
563: } else if (addv == MAX_VALUES) {
564: for (i=0; i<n; i++) yv[first + i*step] = PetscMax(yv[first + i*step],xv[tslots[i]]);
565: #endif
566: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
567: } else {
568: if (addv == INSERT_VALUES) {
569: for (i=0; i<n; i++) yv[tslots[i]] = xv[first + i*step];
570: } else if (addv == ADD_VALUES) {
571: for (i=0; i<n; i++) yv[tslots[i]] += xv[first + i*step];
572: #if !defined(PETSC_USE_COMPLEX)
573: } else if (addv == MAX_VALUES) {
574: for (i=0; i<n; i++) yv[tslots[i]] = PetscMax(yv[tslots[i]],xv[first + i*step]);
575: #endif
576: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
577: }
578: VecRestoreArrayPair(x,y,&xv,&yv);
579: return(0);
580: }
582: PetscErrorCode VecScatterView_SSToSG(VecScatter in,PetscViewer viewer)
583: {
584: PetscErrorCode ierr;
585: VecScatter_Seq_Stride *in_from = (VecScatter_Seq_Stride*)in->fromdata;
586: VecScatter_Seq_General *in_to = (VecScatter_Seq_General*)in->todata;
587: PetscInt i;
588: PetscBool isascii;
591: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
592: if (isascii) {
593: PetscViewerASCIIPrintf(viewer,"Sequential stride to general scatter\n");
594: for (i=0; i<in_to->n; i++) {
595: PetscViewerASCIIPrintf(viewer,"%D to %D\n",in_from->first + in_from->step*i,in_to->vslots[i]);
596: }
597: if (in_to->memcpy_plan.optimized[0]) {
598: PetscViewerASCIIPrintf(viewer,"This stride1 to general scatter is made of %D copies\n",in_to->memcpy_plan.copy_offsets[1]);
599: }
600: }
601: return(0);
602: }
603: /*
604: Scatter: sequential stride to sequential stride
605: */
606: PetscErrorCode VecScatterBegin_SSToSS(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
607: {
608: VecScatter_Seq_Stride *gen_to = (VecScatter_Seq_Stride*)ctx->todata;
609: VecScatter_Seq_Stride *gen_from = (VecScatter_Seq_Stride*)ctx->fromdata;
610: PetscInt i,n = gen_from->n,to_first = gen_to->first,to_step = gen_to->step;
611: PetscErrorCode ierr;
612: PetscInt from_first = gen_from->first,from_step = gen_from->step;
613: PetscScalar *xv,*yv;
614: #if defined(PETSC_HAVE_VECCUDA)
615: PetscBool is_veccuda;
616: #endif
619: #if defined(PETSC_HAVE_VECCUDA)
620: PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
621: if (is_veccuda && x->valid_GPU_array == PETSC_OFFLOAD_GPU) {
622: /* create the scatter indices if not done already */
623: if (!ctx->spptr) {
624: PetscInt *tslots = 0,*fslots = 0;
625: VecScatterCUDAIndicesCreate_StoS(n,to_first,from_first,to_step,from_step,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
626: }
627: /* next do the scatter */
628: VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
629: return(0);
630: }
631: #endif
633: VecGetArrayPair(x,y,&xv,&yv);
634: if (mode & SCATTER_REVERSE) {
635: from_first = gen_to->first;
636: to_first = gen_from->first;
637: from_step = gen_to->step;
638: to_step = gen_from->step;
639: }
641: if (addv == INSERT_VALUES) {
642: if (to_step == 1 && from_step == 1) {
643: PetscMemcpy(yv+to_first,xv+from_first,n*sizeof(PetscScalar));
644: } else {
645: for (i=0; i<n; i++) yv[to_first + i*to_step] = xv[from_first+i*from_step];
646: }
647: } else if (addv == ADD_VALUES) {
648: if (to_step == 1 && from_step == 1) {
649: PetscScalar *yyv = yv + to_first, *xxv = xv + from_first;
650: for (i=0; i<n; i++) yyv[i] += xxv[i];
651: } else {
652: for (i=0; i<n; i++) yv[to_first + i*to_step] += xv[from_first+i*from_step];
653: }
654: #if !defined(PETSC_USE_COMPLEX)
655: } else if (addv == MAX_VALUES) {
656: if (to_step == 1 && from_step == 1) {
657: PetscScalar *yyv = yv + to_first, *xxv = xv + from_first;
658: for (i=0; i<n; i++) yyv[i] = PetscMax(yyv[i],xxv[i]);
659: } else {
660: for (i=0; i<n; i++) yv[to_first + i*to_step] = PetscMax(yv[to_first + i*to_step],xv[from_first+i*from_step]);
661: }
662: #endif
663: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
664: VecRestoreArrayPair(x,y,&xv,&yv);
665: return(0);
666: }
668: /* --------------------------------------------------------------------------*/
671: PetscErrorCode VecScatterCopy_SGToSG(VecScatter in,VecScatter out)
672: {
673: PetscErrorCode ierr;
674: VecScatter_Seq_General *in_to = (VecScatter_Seq_General*)in->todata,*out_to = NULL;
675: VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata,*out_from = NULL;
678: out->ops->begin = in->ops->begin;
679: out->ops->end = in->ops->end;
680: out->ops->copy = in->ops->copy;
681: out->ops->destroy = in->ops->destroy;
682: out->ops->view = in->ops->view;
684: PetscMalloc2(1,&out_to,1,&out_from);
685: PetscMalloc2(in_to->n,&out_to->vslots,in_from->n,&out_from->vslots);
686: out_to->n = in_to->n;
687: out_to->format = in_to->format;
688: out_to->nonmatching_computed = PETSC_FALSE;
689: out_to->slots_nonmatching = 0;
690: PetscMemcpy(out_to->vslots,in_to->vslots,(out_to->n)*sizeof(PetscInt));
691: VecScatterMemcpyPlanCopy(&in_to->memcpy_plan,&out_to->memcpy_plan);
693: out_from->n = in_from->n;
694: out_from->format = in_from->format;
695: out_from->nonmatching_computed = PETSC_FALSE;
696: out_from->slots_nonmatching = 0;
697: PetscMemcpy(out_from->vslots,in_from->vslots,(out_from->n)*sizeof(PetscInt));
698: VecScatterMemcpyPlanCopy(&in_from->memcpy_plan,&out_from->memcpy_plan);
700: out->todata = (void*)out_to;
701: out->fromdata = (void*)out_from;
702: return(0);
703: }
705: PetscErrorCode VecScatterView_SGToSG(VecScatter in,PetscViewer viewer)
706: {
707: PetscErrorCode ierr;
708: VecScatter_Seq_General *in_to = (VecScatter_Seq_General*)in->todata;
709: VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata;
710: PetscInt i;
711: PetscBool isascii;
714: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
715: if (isascii) {
716: PetscViewerASCIIPrintf(viewer,"Sequential general scatter\n");
717: for (i=0; i<in_to->n; i++) {
718: PetscViewerASCIIPrintf(viewer,"%D to %D\n",in_from->vslots[i],in_to->vslots[i]);
719: }
720: if (in_from->memcpy_plan.optimized[0]) {
721: PetscViewerASCIIPrintf(viewer,"This general to general scatter is made of %D copies\n",in_from->memcpy_plan.copy_offsets[1]);
722: }
723: }
724: return(0);
725: }
728: PetscErrorCode VecScatterCopy_SGToSS(VecScatter in,VecScatter out)
729: {
730: PetscErrorCode ierr;
731: VecScatter_Seq_Stride *in_to = (VecScatter_Seq_Stride*)in->todata,*out_to = NULL;
732: VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata,*out_from = NULL;
735: out->ops->begin = in->ops->begin;
736: out->ops->end = in->ops->end;
737: out->ops->copy = in->ops->copy;
738: out->ops->destroy = in->ops->destroy;
739: out->ops->view = in->ops->view;
741: PetscMalloc2(1,&out_to,1,&out_from);
742: PetscMalloc1(in_from->n,&out_from->vslots);
743: out_to->n = in_to->n;
744: out_to->format = in_to->format;
745: out_to->first = in_to->first;
746: out_to->step = in_to->step;
747: out_to->format = in_to->format;
749: out_from->n = in_from->n;
750: out_from->format = in_from->format;
751: out_from->nonmatching_computed = PETSC_FALSE;
752: out_from->slots_nonmatching = 0;
753: PetscMemcpy(out_from->vslots,in_from->vslots,(out_from->n)*sizeof(PetscInt));
754: VecScatterMemcpyPlanCopy(&in_from->memcpy_plan,&out_from->memcpy_plan);
756: out->todata = (void*)out_to;
757: out->fromdata = (void*)out_from;
758: return(0);
759: }
761: PetscErrorCode VecScatterView_SGToSS(VecScatter in,PetscViewer viewer)
762: {
763: PetscErrorCode ierr;
764: VecScatter_Seq_Stride *in_to = (VecScatter_Seq_Stride*)in->todata;
765: VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata;
766: PetscInt i;
767: PetscBool isascii;
770: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
771: if (isascii) {
772: PetscViewerASCIIPrintf(viewer,"Sequential general scatter to stride\n");
773: for (i=0; i<in_to->n; i++) {
774: PetscViewerASCIIPrintf(viewer,"%D to %D\n",in_from->vslots[i],in_to->first + in_to->step*i);
775: }
776: if (in_from->memcpy_plan.optimized[0]) {
777: PetscViewerASCIIPrintf(viewer,"This general to stride1 scatter is made of %D copies\n",in_from->memcpy_plan.copy_offsets[1]);
778: }
779: }
780: return(0);
781: }
783: /* --------------------------------------------------------------------------*/
784: /*
785: Scatter: parallel to sequential vector, sequential strides for both.
786: */
787: PetscErrorCode VecScatterCopy_SSToSS(VecScatter in,VecScatter out)
788: {
789: VecScatter_Seq_Stride *in_to = (VecScatter_Seq_Stride*)in->todata,*out_to = NULL;
790: VecScatter_Seq_Stride *in_from = (VecScatter_Seq_Stride*)in->fromdata,*out_from = NULL;
791: PetscErrorCode ierr;
794: out->ops->begin = in->ops->begin;
795: out->ops->end = in->ops->end;
796: out->ops->copy = in->ops->copy;
797: out->ops->destroy = in->ops->destroy;
798: out->ops->view = in->ops->view;
800: PetscMalloc2(1,&out_to,1,&out_from);
801: out_to->n = in_to->n;
802: out_to->format = in_to->format;
803: out_to->first = in_to->first;
804: out_to->step = in_to->step;
805: out_to->format = in_to->format;
806: out_from->n = in_from->n;
807: out_from->format= in_from->format;
808: out_from->first = in_from->first;
809: out_from->step = in_from->step;
810: out_from->format= in_from->format;
811: out->todata = (void*)out_to;
812: out->fromdata = (void*)out_from;
813: return(0);
814: }
816: PetscErrorCode VecScatterView_SSToSS(VecScatter in,PetscViewer viewer)
817: {
818: VecScatter_Seq_Stride *in_to = (VecScatter_Seq_Stride*)in->todata;
819: VecScatter_Seq_Stride *in_from = (VecScatter_Seq_Stride*)in->fromdata;
820: PetscErrorCode ierr;
821: PetscBool isascii;
824: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
825: if (isascii) {
826: PetscViewerASCIIPrintf(viewer,"Sequential stride count %D start %D step to start %D stride %D\n",in_to->n,in_to->first,in_to->step,in_from->first,in_from->step);
827: }
828: return(0);
829: }
831: /*
832: Create memcpy optimization plans based on indices of vector entries we want to scatter
834: Input Parameters:
835: + n - number of target processors
836: . starts - [n+1] for the i-th processor, its associated indices are indices[starts[i], starts[i+1])
837: . indices - [] array storing indices. Its length is starts[n+1]
838: - bs - block size
840: Output Parameters:
841: + plan - the memcpy plan
842: */
843: PetscErrorCode VecScatterMemcpyPlanCreate_Index(PetscInt n,const PetscInt *starts,const PetscInt *indices,PetscInt bs,VecScatterMemcpyPlan *plan)
844: {
846: PetscInt i,j,k,my_copies,n_copies=0,step;
847: PetscBool strided,has_strided;
850: PetscMemzero(plan,sizeof(VecScatterMemcpyPlan));
851: plan->n = n;
852: PetscMalloc2(n,&plan->optimized,n+1,&plan->copy_offsets);
854: /* check if each remote part of the scatter is made of copies, and count total_copies */
855: for (i=0; i<n; i++) { /* for each target processor procs[i] */
856: my_copies = 1; /* count num. of copies for procs[i] */
857: for (j=starts[i]; j<starts[i+1]-1; j++) { /* go through indices from the first to the second to last */
858: if (indices[j]+bs != indices[j+1]) my_copies++;
859: }
860: if (bs*(starts[i+1]-starts[i])*sizeof(PetscScalar)/my_copies >= 256) { /* worth using memcpy? */
861: plan->optimized[i] = PETSC_TRUE;
862: n_copies += my_copies;
863: } else {
864: plan->optimized[i] = PETSC_FALSE;
865: }
866: }
868: /* do malloc with the recently known n_copies */
869: PetscMalloc2(n_copies,&plan->copy_starts,n_copies,&plan->copy_lengths);
871: /* analyze the total_copies one by one */
872: k = 0; /* k-th copy */
873: plan->copy_offsets[0] = 0;
874: for (i=0; i<n; i++) { /* for each target processor procs[i] */
875: if (plan->optimized[i]) {
876: my_copies = 1;
877: plan->copy_starts[k] = indices[starts[i]];
878: for (j=starts[i]; j<starts[i+1]-1; j++) {
879: if (indices[j]+bs != indices[j+1]) { /* meet end of a copy (and next copy must exist) */
880: my_copies++;
881: plan->copy_starts[k+1] = indices[j+1];
882: plan->copy_lengths[k] = sizeof(PetscScalar)*(indices[j]+bs-plan->copy_starts[k]);
883: k++;
884: }
885: }
886: /* set copy length of the last copy for this remote proc */
887: plan->copy_lengths[k] = sizeof(PetscScalar)*(indices[j]+bs-plan->copy_starts[k]);
888: k++;
889: }
891: /* set offset for next proc. When optimized[i] is false, copy_offsets[i] = copy_offsets[i+1] */
892: plan->copy_offsets[i+1] = k;
893: }
895: /* try the last chance to optimize. If a scatter is not memory copies, then is it strided? */
896: has_strided = PETSC_FALSE;
897: PetscMalloc3(n,&plan->stride_first,n,&plan->stride_step,n,&plan->stride_n);
898: for (i=0; i<n; i++) { /* for each target processor procs[i] */
899: if (!plan->optimized[i] && starts[i+1] - starts[i] >= 16) { /* few indices (<16) are not worth striding */
900: strided = PETSC_TRUE;
901: step = indices[starts[i]+1] - indices[starts[i]];
902: for (j=starts[i]; j<starts[i+1]-1; j++) {
903: if (indices[j]+step != indices[j+1]) { strided = PETSC_FALSE; break; }
904: }
905: if (strided) {
906: plan->optimized[i] = PETSC_TRUE;
907: plan->stride_first[i] = indices[starts[i]];
908: plan->stride_step[i] = step;
909: plan->stride_n[i] = starts[i+1] - starts[i];
910: has_strided = PETSC_TRUE;
911: }
912: }
913: }
914: /* if none is strided, free the arrays to save memory here and also in plan copying */
915: if (!has_strided) { PetscFree3(plan->stride_first,plan->stride_step,plan->stride_n); }
916: return(0);
917: }
919: /* --------------------------------------------------------------------------------------*/
920: /* Create a memcpy plan for a sequential general (SG) to SG scatter */
921: PetscErrorCode VecScatterMemcpyPlanCreate_SGToSG(PetscInt bs,VecScatter_Seq_General *to,VecScatter_Seq_General *from)
922: {
923: PetscInt n = to->n,i,*to_slots = to->vslots,*from_slots = from->vslots;
924: PetscInt j,n_copies;
926: PetscBool same_copy_starts;
929: PetscMemzero(&to->memcpy_plan,sizeof(VecScatterMemcpyPlan));
930: PetscMemzero(&from->memcpy_plan,sizeof(VecScatterMemcpyPlan));
931: to->memcpy_plan.n = 1;
932: from->memcpy_plan.n = 1;
934: /* malloc and init the two fields to false and zero */
935: PetscCalloc2(1,&to->memcpy_plan.optimized,2,&to->memcpy_plan.copy_offsets);
936: PetscCalloc2(1,&from->memcpy_plan.optimized,2,&from->memcpy_plan.copy_offsets);
938: /* count number of copies, which runs from 1 to n */
939: n_copies = 1;
940: for (i=0; i<n-1; i++) {
941: if (to_slots[i]+bs != to_slots[i+1] || from_slots[i]+bs != from_slots[i+1]) n_copies++;
942: }
944: /* if average copy size >= 256 bytes, use memcpy instead of load/store */
945: if (bs*n*sizeof(PetscScalar)/n_copies >= 256) {
946: PetscMalloc2(n_copies,&to->memcpy_plan.copy_starts,n_copies,&to->memcpy_plan.copy_lengths);
947: PetscMalloc2(n_copies,&from->memcpy_plan.copy_starts,n_copies,&from->memcpy_plan.copy_lengths);
949: /* set up copy_starts[] & copy_lenghts[] of to and from */
950: to->memcpy_plan.copy_starts[0] = to_slots[0];
951: from->memcpy_plan.copy_starts[0] = from_slots[0];
953: if (n_copies != 1) { /* one copy is trival and we can save some work */
954: j = 0; /* j-th copy */
955: for (i=0; i<n-1; i++) {
956: if (to_slots[i]+bs != to_slots[i+1] || from_slots[i]+bs != from_slots[i+1]) {
957: to->memcpy_plan.copy_lengths[j] = sizeof(PetscScalar)*(to_slots[i]+bs-to->memcpy_plan.copy_starts[j]);
958: from->memcpy_plan.copy_lengths[j] = sizeof(PetscScalar)*(from_slots[i]+bs-from->memcpy_plan.copy_starts[j]);
959: to->memcpy_plan.copy_starts[j+1] = to_slots[i+1];
960: from->memcpy_plan.copy_starts[j+1] = from_slots[i+1];
961: j++;
962: }
963: }
964: }
966: /* set up copy_lengths[] of the last copy */
967: to->memcpy_plan.copy_lengths[n_copies-1] = sizeof(PetscScalar)*(to_slots[n-1]+bs-to->memcpy_plan.copy_starts[n_copies-1]);
968: from->memcpy_plan.copy_lengths[n_copies-1] = sizeof(PetscScalar)*(from_slots[n-1]+bs-from->memcpy_plan.copy_starts[n_copies-1]);
970: /* check if to and from have the same copy_starts[] values */
971: same_copy_starts = PETSC_TRUE;
972: for (i=0; i<n_copies; i++) {
973: if (to->memcpy_plan.copy_starts[i] != from->memcpy_plan.copy_starts[i]) { same_copy_starts = PETSC_FALSE; break; }
974: }
976: to->memcpy_plan.optimized[0] = PETSC_TRUE;
977: from->memcpy_plan.optimized[0] = PETSC_TRUE;
978: to->memcpy_plan.copy_offsets[1] = n_copies;
979: from->memcpy_plan.copy_offsets[1] = n_copies;
980: to->memcpy_plan.same_copy_starts = same_copy_starts;
981: from->memcpy_plan.same_copy_starts = same_copy_starts;
982: }
984: /* we do not do stride optimzation for this kind of scatter since the chance is rare. All related fields are zeroed out */
985: return(0);
986: }
988: /* Copy the memcpy plan from in to out */
989: PetscErrorCode VecScatterMemcpyPlanCopy(const VecScatterMemcpyPlan *in,VecScatterMemcpyPlan *out)
990: {
994: PetscMemzero(out,sizeof(VecScatterMemcpyPlan));
995: out->n = in->n;
996: PetscMalloc2(in->n,&out->optimized,in->n+1,&out->copy_offsets);
997: PetscMalloc2(in->copy_offsets[in->n],&out->copy_starts,in->copy_offsets[in->n],&out->copy_lengths);
998: PetscMemcpy(out->optimized,in->optimized,sizeof(PetscBool)*in->n);
999: PetscMemcpy(out->copy_offsets,in->copy_offsets,sizeof(PetscInt)*(in->n+1));
1000: PetscMemcpy(out->copy_starts,in->copy_starts,sizeof(PetscInt)*in->copy_offsets[in->n]);
1001: PetscMemcpy(out->copy_lengths,in->copy_lengths,sizeof(PetscInt)*in->copy_offsets[in->n]);
1002: if (in->stride_first) {
1003: PetscMalloc3(in->n,&out->stride_first,in->n,&out->stride_step,in->n,&out->stride_n);
1004: PetscMemcpy(out->stride_first,in->stride_first,sizeof(PetscInt)*in->n);
1005: PetscMemcpy(out->stride_step,in->stride_step,sizeof(PetscInt)*in->n);
1006: PetscMemcpy(out->stride_n,in->stride_n,sizeof(PetscInt)*in->n);
1007: }
1008: return(0);
1009: }
1011: /* Destroy the vecscatter memcpy plan */
1012: PetscErrorCode VecScatterMemcpyPlanDestroy(VecScatterMemcpyPlan *plan)
1013: {
1017: PetscFree2(plan->optimized,plan->copy_offsets);
1018: PetscFree2(plan->copy_starts,plan->copy_lengths);
1019: PetscFree3(plan->stride_first,plan->stride_step,plan->stride_n);
1020: return(0);
1021: }
1023: #if defined(PETSC_HAVE_MPI_WIN_CREATE_FEATURE)
1024: extern PetscErrorCode VecScatterCreateLocal_PtoS_MPI3(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter);
1025: extern PetscErrorCode VecScatterCreateLocal_PtoP_MPI3(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter);
1026: extern PetscErrorCode VecScatterCreateLocal_StoP_MPI3(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter);
1027: #endif
1029: extern PetscErrorCode VecScatterCreateLocal_PtoS_MPI1(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter);
1030: extern PetscErrorCode VecScatterCreateLocal_PtoP_MPI1(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter);
1031: extern PetscErrorCode VecScatterCreateLocal_StoP_MPI1(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter);
1033: static PetscErrorCode GetInputISType_private(VecScatter,PetscInt,PetscInt,PetscInt*,IS*,PetscInt*,IS*);
1035: /* =======================================================================*/
1036: #define VEC_SEQ_ID 0
1037: #define VEC_MPI_ID 1
1038: #define IS_GENERAL_ID 0
1039: #define IS_STRIDE_ID 1
1040: #define IS_BLOCK_ID 2
1042: /*
1043: Blocksizes we have optimized scatters for
1044: */
1046: #define VecScatterOptimizedBS(mbs) (2 <= mbs)
1049: PetscErrorCode VecScatterCreateEmpty(MPI_Comm comm,VecScatter *newctx)
1050: {
1051: VecScatter ctx;
1055: PetscHeaderCreate(ctx,VEC_SCATTER_CLASSID,"VecScatter","VecScatter","Vec",comm,VecScatterDestroy,VecScatterView);
1056: ctx->inuse = PETSC_FALSE;
1057: ctx->beginandendtogether = PETSC_FALSE;
1058: PetscOptionsGetBool(NULL,NULL,"-vecscatter_merge",&ctx->beginandendtogether,NULL);
1059: if (ctx->beginandendtogether) {
1060: PetscInfo(ctx,"Using combined (merged) vector scatter begin and end\n");
1061: }
1062: *newctx = ctx;
1063: return(0);
1064: }
1066: /* -------------------------------------------------- */
1067: PetscErrorCode VecScatterCreate_Seq(VecScatter ctx)
1068: {
1069: PetscErrorCode ierr;
1070: PetscInt ix_type=-1,iy_type=-1;
1071: IS tix=NULL,tiy=NULL,ix=ctx->from_is,iy=ctx->to_is;
1072: Vec xin=ctx->from_v;
1075: PetscObjectChangeTypeName((PetscObject)ctx,VECSCATTERSEQ);
1076: GetInputISType_private(ctx,VEC_SEQ_ID,VEC_SEQ_ID,&ix_type,&tix,&iy_type,&tiy);
1077: if (tix) ix = tix;
1078: if (tiy) iy = tiy;
1080: if (ix_type == IS_GENERAL_ID && iy_type == IS_GENERAL_ID) {
1081: PetscInt nx,ny;
1082: const PetscInt *idx,*idy;
1083: VecScatter_Seq_General *to = NULL,*from = NULL;
1085: ISGetLocalSize(ix,&nx);
1086: ISGetLocalSize(iy,&ny);
1087: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1088: ISGetIndices(ix,&idx);
1089: ISGetIndices(iy,&idy);
1090: PetscMalloc2(1,&to,1,&from);
1091: PetscMalloc2(nx,&to->vslots,nx,&from->vslots);
1092: to->n = nx;
1093: #if defined(PETSC_USE_DEBUG)
1094: VecScatterCheckIndices_Private(ctx->to_n,ny,idy);
1095: #endif
1096: PetscMemcpy(to->vslots,idy,nx*sizeof(PetscInt));
1097: from->n = nx;
1098: #if defined(PETSC_USE_DEBUG)
1099: VecScatterCheckIndices_Private(ctx->from_n,nx,idx);
1100: #endif
1101: PetscMemcpy(from->vslots,idx,nx*sizeof(PetscInt));
1102: to->format = VEC_SCATTER_SEQ_GENERAL;
1103: from->format = VEC_SCATTER_SEQ_GENERAL;
1104: ctx->todata = (void*)to;
1105: ctx->fromdata = (void*)from;
1106: VecScatterMemcpyPlanCreate_SGToSG(1,to,from);
1107: ctx->ops->begin = VecScatterBegin_SGToSG;
1108: ctx->ops->end = 0;
1109: ctx->ops->destroy = VecScatterDestroy_SGToSG;
1110: ctx->ops->copy = VecScatterCopy_SGToSG;
1111: ctx->ops->view = VecScatterView_SGToSG;
1112: PetscInfo(xin,"Special case: sequential vector general scatter\n");
1113: goto functionend;
1114: } else if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
1115: PetscInt nx,ny,to_first,to_step,from_first,from_step;
1116: VecScatter_Seq_Stride *from8 = NULL,*to8 = NULL;
1118: ISGetLocalSize(ix,&nx);
1119: ISGetLocalSize(iy,&ny);
1120: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1121: ISStrideGetInfo(iy,&to_first,&to_step);
1122: ISStrideGetInfo(ix,&from_first,&from_step);
1123: PetscMalloc2(1,&to8,1,&from8);
1124: to8->n = nx;
1125: to8->first = to_first;
1126: to8->step = to_step;
1127: from8->n = nx;
1128: from8->first = from_first;
1129: from8->step = from_step;
1130: to8->format = VEC_SCATTER_SEQ_STRIDE;
1131: from8->format = VEC_SCATTER_SEQ_STRIDE;
1132: ctx->todata = (void*)to8;
1133: ctx->fromdata = (void*)from8;
1134: ctx->ops->begin = VecScatterBegin_SSToSS;
1135: ctx->ops->end = 0;
1136: ctx->ops->destroy = VecScatterDestroy_SSToSS;
1137: ctx->ops->copy = VecScatterCopy_SSToSS;
1138: ctx->ops->view = VecScatterView_SSToSS;
1139: PetscInfo(xin,"Special case: sequential vector stride to stride\n");
1140: goto functionend;
1141: } else if (ix_type == IS_GENERAL_ID && iy_type == IS_STRIDE_ID) {
1142: PetscInt nx,ny,first,step;
1143: const PetscInt *idx;
1144: VecScatter_Seq_General *from9 = NULL;
1145: VecScatter_Seq_Stride *to9 = NULL;
1147: ISGetLocalSize(ix,&nx);
1148: ISGetIndices(ix,&idx);
1149: ISGetLocalSize(iy,&ny);
1150: ISStrideGetInfo(iy,&first,&step);
1151: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1152: PetscMalloc2(1,&to9,1,&from9);
1153: PetscMemzero(&from9->memcpy_plan,sizeof(VecScatterMemcpyPlan));
1154: PetscMalloc1(nx,&from9->vslots);
1155: to9->n = nx;
1156: to9->first = first;
1157: to9->step = step;
1158: from9->n = nx;
1159: #if defined(PETSC_USE_DEBUG)
1160: VecScatterCheckIndices_Private(ctx->from_n,nx,idx);
1161: #endif
1162: PetscMemcpy(from9->vslots,idx,nx*sizeof(PetscInt));
1163: ctx->todata = (void*)to9; ctx->fromdata = (void*)from9;
1164: if (step == 1) {
1165: PetscInt tmp[2];
1166: tmp[0] = 0; tmp[1] = nx;
1167: VecScatterMemcpyPlanCreate_Index(1,tmp,from9->vslots,1,&from9->memcpy_plan);
1168: ctx->ops->begin = VecScatterBegin_SGToSS_Stride1;
1169: } else {
1170: ctx->ops->begin = VecScatterBegin_SGToSS;
1171: }
1172: ctx->ops->destroy = VecScatterDestroy_SGToSS;
1173: ctx->ops->end = 0;
1174: ctx->ops->copy = VecScatterCopy_SGToSS;
1175: ctx->ops->view = VecScatterView_SGToSS;
1176: to9->format = VEC_SCATTER_SEQ_STRIDE;
1177: from9->format = VEC_SCATTER_SEQ_GENERAL;
1178: PetscInfo(xin,"Special case: sequential vector general to stride\n");
1179: goto functionend;
1180: } else if (ix_type == IS_STRIDE_ID && iy_type == IS_GENERAL_ID) {
1181: PetscInt nx,ny,first,step;
1182: const PetscInt *idy;
1183: VecScatter_Seq_General *to10 = NULL;
1184: VecScatter_Seq_Stride *from10 = NULL;
1186: ISGetLocalSize(ix,&nx);
1187: ISGetIndices(iy,&idy);
1188: ISGetLocalSize(iy,&ny);
1189: ISStrideGetInfo(ix,&first,&step);
1190: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1191: PetscMalloc2(1,&to10,1,&from10);
1192: PetscMemzero(&to10->memcpy_plan,sizeof(VecScatterMemcpyPlan));
1193: PetscMalloc1(nx,&to10->vslots);
1194: from10->n = nx;
1195: from10->first = first;
1196: from10->step = step;
1197: to10->n = nx;
1198: #if defined(PETSC_USE_DEBUG)
1199: VecScatterCheckIndices_Private(ctx->to_n,ny,idy);
1200: #endif
1201: PetscMemcpy(to10->vslots,idy,nx*sizeof(PetscInt));
1202: ctx->todata = (void*)to10;
1203: ctx->fromdata = (void*)from10;
1204: if (step == 1) {
1205: PetscInt tmp[2];
1206: tmp[0] = 0; tmp[1] = nx;
1207: VecScatterMemcpyPlanCreate_Index(1,tmp,to10->vslots,1,&to10->memcpy_plan);
1208: ctx->ops->begin = VecScatterBegin_SSToSG_Stride1;
1209: } else {
1210: ctx->ops->begin = VecScatterBegin_SSToSG;
1211: }
1212: ctx->ops->destroy = VecScatterDestroy_SSToSG;
1213: ctx->ops->end = 0;
1214: ctx->ops->copy = 0;
1215: ctx->ops->view = VecScatterView_SSToSG;
1216: to10->format = VEC_SCATTER_SEQ_GENERAL;
1217: from10->format = VEC_SCATTER_SEQ_STRIDE;
1218: PetscInfo(xin,"Special case: sequential vector stride to general\n");
1219: goto functionend;
1220: } else {
1221: PetscInt nx,ny;
1222: const PetscInt *idx,*idy;
1223: VecScatter_Seq_General *to11 = NULL,*from11 = NULL;
1224: PetscBool idnx,idny;
1226: ISGetLocalSize(ix,&nx);
1227: ISGetLocalSize(iy,&ny);
1228: if (nx != ny) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match, in %D out %D",nx,ny);
1230: ISIdentity(ix,&idnx);
1231: ISIdentity(iy,&idny);
1232: if (idnx && idny) {
1233: VecScatter_Seq_Stride *to13 = NULL,*from13 = NULL;
1234: PetscMalloc2(1,&to13,1,&from13);
1235: to13->n = nx;
1236: to13->first = 0;
1237: to13->step = 1;
1238: from13->n = nx;
1239: from13->first = 0;
1240: from13->step = 1;
1241: to13->format = VEC_SCATTER_SEQ_STRIDE;
1242: from13->format = VEC_SCATTER_SEQ_STRIDE;
1243: ctx->todata = (void*)to13;
1244: ctx->fromdata = (void*)from13;
1245: ctx->ops->begin = VecScatterBegin_SSToSS;
1246: ctx->ops->end = 0;
1247: ctx->ops->destroy = VecScatterDestroy_SSToSS;
1248: ctx->ops->copy = VecScatterCopy_SSToSS;
1249: ctx->ops->view = VecScatterView_SSToSS;
1250: PetscInfo(xin,"Special case: sequential copy\n");
1251: goto functionend;
1252: }
1254: ISGetIndices(iy,&idy);
1255: ISGetIndices(ix,&idx);
1256: PetscMalloc2(1,&to11,1,&from11);
1257: PetscMalloc2(nx,&to11->vslots,nx,&from11->vslots);
1258: to11->n = nx;
1259: #if defined(PETSC_USE_DEBUG)
1260: VecScatterCheckIndices_Private(ctx->to_n,ny,idy);
1261: #endif
1262: PetscMemcpy(to11->vslots,idy,nx*sizeof(PetscInt));
1263: from11->n = nx;
1264: #if defined(PETSC_USE_DEBUG)
1265: VecScatterCheckIndices_Private(ctx->from_n,nx,idx);
1266: #endif
1267: PetscMemcpy(from11->vslots,idx,nx*sizeof(PetscInt));
1268: to11->format = VEC_SCATTER_SEQ_GENERAL;
1269: from11->format = VEC_SCATTER_SEQ_GENERAL;
1270: ctx->todata = (void*)to11;
1271: ctx->fromdata = (void*)from11;
1272: ctx->ops->begin = VecScatterBegin_SGToSG;
1273: ctx->ops->end = 0;
1274: ctx->ops->destroy = VecScatterDestroy_SGToSG;
1275: ctx->ops->copy = VecScatterCopy_SGToSG;
1276: ctx->ops->view = VecScatterView_SGToSG;
1277: VecScatterMemcpyPlanCreate_SGToSG(1,to11,from11);
1278: ISRestoreIndices(ix,&idx);
1279: ISRestoreIndices(iy,&idy);
1280: PetscInfo(xin,"Sequential vector scatter with block indices\n");
1281: goto functionend;
1282: }
1283: functionend:
1284: ISDestroy(&tix);
1285: ISDestroy(&tiy);
1286: VecScatterViewFromOptions(ctx,NULL,"-vecscatter_view");
1287: return(0);
1288: }
1290: static PetscErrorCode VecScatterCreate_PtoS(VecScatter ctx)
1291: {
1292: PetscErrorCode ierr;
1293: PetscMPIInt size;
1294: PetscInt ix_type=-1,iy_type=-1,*range;
1295: MPI_Comm comm;
1296: IS tix=NULL,tiy=NULL,ix=ctx->from_is,iy=ctx->to_is;
1297: Vec xin=ctx->from_v,yin=ctx->to_v;
1298: VecScatterType type;
1299: PetscBool vec_mpi1_flg;
1300: PetscBool totalv,ixblock,iyblock,iystride,islocal,cando;
1303: PetscObjectGetComm((PetscObject)ctx,&comm);
1304: GetInputISType_private(ctx,VEC_MPI_ID,VEC_SEQ_ID,&ix_type,&tix,&iy_type,&tiy);
1305: if (tix) ix = tix;
1306: if (tiy) iy = tiy;
1308: VecScatterGetType(ctx,&type);
1309: PetscStrcmp(type,"mpi1",&vec_mpi1_flg);
1311: islocal = PETSC_FALSE;
1312: /* special case extracting (subset of) local portion */
1313: if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
1314: /* Case (2-a) */
1315: PetscInt nx,ny,to_first,to_step,from_first,from_step;
1316: PetscInt start,end,min,max;
1317: VecScatter_Seq_Stride *from12 = NULL,*to12 = NULL;
1319: VecGetOwnershipRange(xin,&start,&end);
1320: ISGetLocalSize(ix,&nx);
1321: ISStrideGetInfo(ix,&from_first,&from_step);
1322: ISGetLocalSize(iy,&ny);
1323: ISStrideGetInfo(iy,&to_first,&to_step);
1324: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1325: ISGetMinMax(ix,&min,&max);
1326: if (min >= start && max < end) islocal = PETSC_TRUE;
1327: else islocal = PETSC_FALSE;
1328: /* cannot use MPIU_Allreduce() since this call matches with the MPI_Allreduce() in the else statement below */
1329: MPI_Allreduce(&islocal,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
1330: if (cando) {
1331: PetscMalloc2(1,&to12,1,&from12);
1332: to12->n = nx;
1333: to12->first = to_first;
1334: to12->step = to_step;
1335: from12->n = nx;
1336: from12->first = from_first-start;
1337: from12->step = from_step;
1338: to12->format = VEC_SCATTER_SEQ_STRIDE;
1339: from12->format = VEC_SCATTER_SEQ_STRIDE;
1340: ctx->todata = (void*)to12;
1341: ctx->fromdata = (void*)from12;
1342: ctx->ops->begin = VecScatterBegin_SSToSS;
1343: ctx->ops->end = 0;
1344: ctx->ops->destroy = VecScatterDestroy_SSToSS;
1345: ctx->ops->copy = VecScatterCopy_SSToSS;
1346: ctx->ops->view = VecScatterView_SSToSS;
1347: PetscInfo(xin,"Special case: processors only getting local values\n");
1348: goto functionend;
1349: }
1350: } else {
1351: MPI_Allreduce(&islocal,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
1352: }
1354: /* test for special case of all processors getting entire vector */
1355: /* contains check that PetscMPIInt can handle the sizes needed */
1356: totalv = PETSC_FALSE;
1357: if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
1358: /* Case (2-b) */
1359: PetscInt i,nx,ny,to_first,to_step,from_first,from_step,N;
1360: PetscMPIInt *count = NULL,*displx;
1361: VecScatter_MPI_ToAll *sto = NULL;
1363: ISGetLocalSize(ix,&nx);
1364: ISStrideGetInfo(ix,&from_first,&from_step);
1365: ISGetLocalSize(iy,&ny);
1366: ISStrideGetInfo(iy,&to_first,&to_step);
1367: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1368: VecGetSize(xin,&N);
1369: if (nx != N) totalv = PETSC_FALSE;
1370: else if (from_first == 0 && from_step == 1 && from_first == to_first && from_step == to_step) totalv = PETSC_TRUE;
1371: else totalv = PETSC_FALSE;
1372: /* cannot use MPIU_Allreduce() since this call matches with the MPI_Allreduce() in the else statement below */
1373: MPI_Allreduce(&totalv,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
1375: #if defined(PETSC_USE_64BIT_INDICES)
1376: if (cando && (yin->map->N < PETSC_MPI_INT_MAX)) {
1377: #else
1378: if (cando) {
1379: #endif
1380: MPI_Comm_size(PetscObjectComm((PetscObject)ctx),&size);
1381: PetscMalloc3(1,&sto,size,&count,size,&displx);
1382: range = xin->map->range;
1383: for (i=0; i<size; i++) {
1384: PetscMPIIntCast(range[i+1] - range[i],count+i);
1385: PetscMPIIntCast(range[i],displx+i);
1386: }
1387: sto->count = count;
1388: sto->displx = displx;
1389: sto->work1 = 0;
1390: sto->work2 = 0;
1391: sto->format = VEC_SCATTER_MPI_TOALL;
1392: ctx->todata = (void*)sto;
1393: ctx->fromdata = 0;
1394: ctx->ops->begin = VecScatterBegin_MPI_ToAll;
1395: ctx->ops->end = 0;
1396: ctx->ops->destroy = VecScatterDestroy_MPI_ToAll;
1397: ctx->ops->copy = VecScatterCopy_MPI_ToAll;
1398: ctx->ops->view = VecScatterView_MPI_ToAll;
1399: PetscInfo(xin,"Special case: all processors get entire parallel vector\n");
1400: goto functionend;
1401: }
1402: } else {
1403: MPI_Allreduce(&totalv,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
1404: }
1406: /* test for special case of processor 0 getting entire vector */
1407: /* contains check that PetscMPIInt can handle the sizes needed */
1408: totalv = PETSC_FALSE;
1409: if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
1410: /* Case (2-c) */
1411: PetscInt i,nx,ny,to_first,to_step,from_first,from_step,N;
1412: PetscMPIInt rank,*count = NULL,*displx;
1413: VecScatter_MPI_ToAll *sto = NULL;
1415: PetscObjectGetComm((PetscObject)xin,&comm);
1416: MPI_Comm_rank(comm,&rank);
1417: ISGetLocalSize(ix,&nx);
1418: ISStrideGetInfo(ix,&from_first,&from_step);
1419: ISGetLocalSize(iy,&ny);
1420: ISStrideGetInfo(iy,&to_first,&to_step);
1421: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1422: if (!rank) {
1423: VecGetSize(xin,&N);
1424: if (nx != N) totalv = PETSC_FALSE;
1425: else if (from_first == 0 && from_step == 1 &&
1426: from_first == to_first && from_step == to_step) totalv = PETSC_TRUE;
1427: else totalv = PETSC_FALSE;
1428: } else {
1429: if (!nx) totalv = PETSC_TRUE;
1430: else totalv = PETSC_FALSE;
1431: }
1432: /* cannot use MPIU_Allreduce() since this call matches with the MPI_Allreduce() in the else statement below */
1433: MPI_Allreduce(&totalv,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
1435: #if defined(PETSC_USE_64BIT_INDICES)
1436: if (cando && (yin->map->N < PETSC_MPI_INT_MAX)) {
1437: #else
1438: if (cando) {
1439: #endif
1440: MPI_Comm_size(PetscObjectComm((PetscObject)ctx),&size);
1441: PetscMalloc3(1,&sto,size,&count,size,&displx);
1442: range = xin->map->range;
1443: for (i=0; i<size; i++) {
1444: PetscMPIIntCast(range[i+1] - range[i],count+i);
1445: PetscMPIIntCast(range[i],displx+i);
1446: }
1447: sto->count = count;
1448: sto->displx = displx;
1449: sto->work1 = 0;
1450: sto->work2 = 0;
1451: sto->format = VEC_SCATTER_MPI_TOONE;
1452: ctx->todata = (void*)sto;
1453: ctx->fromdata = 0;
1454: ctx->ops->begin = VecScatterBegin_MPI_ToOne;
1455: ctx->ops->end = 0;
1456: ctx->ops->destroy = VecScatterDestroy_MPI_ToAll;
1457: ctx->ops->copy = VecScatterCopy_MPI_ToAll;
1458: ctx->ops->view = VecScatterView_MPI_ToAll;
1459: PetscInfo(xin,"Special case: processor zero gets entire parallel vector, rest get none\n");
1460: goto functionend;
1461: }
1462: } else {
1463: MPI_Allreduce(&totalv,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
1464: }
1466: /* Case 2-d */
1467: PetscObjectTypeCompare((PetscObject)ix,ISBLOCK,&ixblock);
1468: PetscObjectTypeCompare((PetscObject)iy,ISBLOCK,&iyblock);
1469: PetscObjectTypeCompare((PetscObject)iy,ISSTRIDE,&iystride);
1470: if (ixblock) {
1471: /* special case block to block */
1472: if (iyblock) {
1473: PetscInt nx,ny,bsx,bsy;
1474: const PetscInt *idx,*idy;
1475: ISGetBlockSize(iy,&bsy);
1476: ISGetBlockSize(ix,&bsx);
1477: if (bsx == bsy && VecScatterOptimizedBS(bsx)) {
1478: ISBlockGetLocalSize(ix,&nx);
1479: ISBlockGetIndices(ix,&idx);
1480: ISBlockGetLocalSize(iy,&ny);
1481: ISBlockGetIndices(iy,&idy);
1482: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1483: #if defined(PETSC_HAVE_MPI_WIN_CREATE_FEATURE)
1484: if (vec_mpi1_flg) {
1485: VecScatterCreateLocal_PtoS_MPI1(nx,idx,ny,idy,xin,yin,bsx,ctx);
1486: } else {
1487: VecScatterCreateLocal_PtoS_MPI3(nx,idx,ny,idy,xin,yin,bsx,ctx);
1488: }
1489: #else
1490: VecScatterCreateLocal_PtoS_MPI1(nx,idx,ny,idy,xin,yin,bsx,ctx);
1491: #endif
1492: ISBlockRestoreIndices(ix,&idx);
1493: ISBlockRestoreIndices(iy,&idy);
1494: PetscInfo(xin,"Special case: blocked indices\n");
1495: goto functionend;
1496: }
1497: /* special case block to stride */
1498: } else if (iystride) {
1499: /* Case (2-e) */
1500: PetscInt ystart,ystride,ysize,bsx;
1501: ISStrideGetInfo(iy,&ystart,&ystride);
1502: ISGetLocalSize(iy,&ysize);
1503: ISGetBlockSize(ix,&bsx);
1504: /* see if stride index set is equivalent to block index set */
1505: if (VecScatterOptimizedBS(bsx) && ((ystart % bsx) == 0) && (ystride == 1) && ((ysize % bsx) == 0)) {
1506: PetscInt nx,il,*idy;
1507: const PetscInt *idx;
1508: ISBlockGetLocalSize(ix,&nx);
1509: ISBlockGetIndices(ix,&idx);
1510: if (ysize != bsx*nx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1511: PetscMalloc1(nx,&idy);
1512: if (nx) {
1513: idy[0] = ystart/bsx;
1514: for (il=1; il<nx; il++) idy[il] = idy[il-1] + 1;
1515: }
1516: #if defined(PETSC_HAVE_MPI_WIN_CREATE_FEATURE)
1517: if (vec_mpi1_flg) {
1518: VecScatterCreateLocal_PtoS_MPI1(nx,idx,nx,idy,xin,yin,bsx,ctx);
1519: } else {
1520: VecScatterCreateLocal_PtoS_MPI3(nx,idx,nx,idy,xin,yin,bsx,ctx);
1521: }
1522: #else
1523: VecScatterCreateLocal_PtoS_MPI1(nx,idx,nx,idy,xin,yin,bsx,ctx);
1524: #endif
1525: PetscFree(idy);
1526: ISBlockRestoreIndices(ix,&idx);
1527: PetscInfo(xin,"Special case: blocked indices to stride\n");
1528: goto functionend;
1529: }
1530: }
1531: }
1533: /* left over general case (2-f) */
1534: {
1535: PetscInt nx,ny;
1536: const PetscInt *idx,*idy;
1537: ISGetLocalSize(ix,&nx);
1538: ISGetIndices(ix,&idx);
1539: ISGetLocalSize(iy,&ny);
1540: ISGetIndices(iy,&idy);
1541: if (nx != ny) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match (%D %D)",nx,ny);
1542: #if defined(PETSC_HAVE_MPI_WIN_CREATE_FEATURE)
1543: if (vec_mpi1_flg) {
1544: VecScatterCreateLocal_PtoS_MPI1(nx,idx,ny,idy,xin,yin,1,ctx);
1545: } else {
1546: VecScatterCreateLocal_PtoS_MPI3(nx,idx,ny,idy,xin,yin,1,ctx);
1547: }
1548: #else
1549: VecScatterCreateLocal_PtoS_MPI1(nx,idx,ny,idy,xin,yin,1,ctx);
1550: #endif
1551: ISRestoreIndices(ix,&idx);
1552: ISRestoreIndices(iy,&idy);
1553: PetscInfo(xin,"General case: MPI to Seq\n");
1554: goto functionend;
1555: }
1556: functionend:
1557: ISDestroy(&tix);
1558: ISDestroy(&tiy);
1559: VecScatterViewFromOptions(ctx,NULL,"-vecscatter_view");
1560: return(0);
1561: }
1563: static PetscErrorCode VecScatterCreate_StoP(VecScatter ctx)
1564: {
1565: PetscErrorCode ierr;
1566: PetscInt ix_type=-1,iy_type=-1;
1567: IS tix=NULL,tiy=NULL,ix=ctx->from_is,iy=ctx->to_is;
1568: Vec xin=ctx->from_v,yin=ctx->to_v;
1569: VecScatterType type;
1570: PetscBool vscat_mpi1;
1571: PetscBool islocal,cando;
1574: GetInputISType_private(ctx,VEC_MPI_ID,VEC_SEQ_ID,&ix_type,&tix,&iy_type,&tiy);
1575: if (tix) ix = tix;
1576: if (tiy) iy = tiy;
1578: VecScatterGetType(ctx,&type);
1579: PetscStrcmp(type,"mpi1",&vscat_mpi1);
1581: /* special case local copy portion */
1582: islocal = PETSC_FALSE;
1583: if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
1584: PetscInt nx,ny,to_first,to_step,from_step,start,end,from_first,min,max;
1585: VecScatter_Seq_Stride *from = NULL,*to = NULL;
1587: VecGetOwnershipRange(yin,&start,&end);
1588: ISGetLocalSize(ix,&nx);
1589: ISStrideGetInfo(ix,&from_first,&from_step);
1590: ISGetLocalSize(iy,&ny);
1591: ISStrideGetInfo(iy,&to_first,&to_step);
1592: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1593: ISGetMinMax(iy,&min,&max);
1594: if (min >= start && max < end) islocal = PETSC_TRUE;
1595: else islocal = PETSC_FALSE;
1596: /* cannot use MPIU_Allreduce() since this call matches with the MPI_Allreduce() in the else statement below */
1597: MPI_Allreduce(&islocal,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)yin));
1598: if (cando) {
1599: PetscMalloc2(1,&to,1,&from);
1600: to->n = nx;
1601: to->first = to_first-start;
1602: to->step = to_step;
1603: from->n = nx;
1604: from->first = from_first;
1605: from->step = from_step;
1606: to->format = VEC_SCATTER_SEQ_STRIDE;
1607: from->format = VEC_SCATTER_SEQ_STRIDE;
1608: ctx->todata = (void*)to;
1609: ctx->fromdata = (void*)from;
1610: ctx->ops->begin = VecScatterBegin_SSToSS;
1611: ctx->ops->end = 0;
1612: ctx->ops->destroy = VecScatterDestroy_SSToSS;
1613: ctx->ops->copy = VecScatterCopy_SSToSS;
1614: ctx->ops->view = VecScatterView_SSToSS;
1615: PetscInfo(xin,"Special case: sequential stride to MPI stride\n");
1616: goto functionend;
1617: }
1618: } else {
1619: MPI_Allreduce(&islocal,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)yin));
1620: }
1621: /* special case block to stride */
1622: if (ix_type == IS_BLOCK_ID && iy_type == IS_STRIDE_ID) {
1623: PetscInt ystart,ystride,ysize,bsx;
1624: ISStrideGetInfo(iy,&ystart,&ystride);
1625: ISGetLocalSize(iy,&ysize);
1626: ISGetBlockSize(ix,&bsx);
1627: /* see if stride index set is equivalent to block index set */
1628: if (VecScatterOptimizedBS(bsx) && ((ystart % bsx) == 0) && (ystride == 1) && ((ysize % bsx) == 0)) {
1629: PetscInt nx,il,*idy;
1630: const PetscInt *idx;
1631: ISBlockGetLocalSize(ix,&nx);
1632: ISBlockGetIndices(ix,&idx);
1633: if (ysize != bsx*nx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1634: PetscMalloc1(nx,&idy);
1635: if (nx) {
1636: idy[0] = ystart/bsx;
1637: for (il=1; il<nx; il++) idy[il] = idy[il-1] + 1;
1638: }
1639: if (vscat_mpi1) {
1640: VecScatterCreateLocal_StoP_MPI1(nx,idx,nx,idy,xin,yin,bsx,ctx);
1641: }
1642: #if defined(PETSC_HAVE_MPI_WIN_CREATE_FEATURE)
1643: else {
1644: VecScatterCreateLocal_StoP_MPI3(nx,idx,nx,idy,xin,yin,bsx,ctx);
1645: }
1646: #endif
1647: PetscFree(idy);
1648: ISBlockRestoreIndices(ix,&idx);
1649: PetscInfo(xin,"Special case: Blocked indices to stride\n");
1650: goto functionend;
1651: }
1652: }
1654: /* general case */
1655: {
1656: PetscInt nx,ny;
1657: const PetscInt *idx,*idy;
1658: ISGetLocalSize(ix,&nx);
1659: ISGetIndices(ix,&idx);
1660: ISGetLocalSize(iy,&ny);
1661: ISGetIndices(iy,&idy);
1662: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1663: if (vscat_mpi1) {
1664: VecScatterCreateLocal_StoP_MPI1(nx,idx,ny,idy,xin,yin,1,ctx);
1665: }
1666: #if defined(PETSC_HAVE_MPI_WIN_CREATE_FEATURE)
1667: else {
1668: VecScatterCreateLocal_StoP_MPI3(nx,idx,ny,idy,xin,yin,1,ctx);
1669: }
1670: #endif
1671: ISRestoreIndices(ix,&idx);
1672: ISRestoreIndices(iy,&idy);
1673: PetscInfo(xin,"General case: Seq to MPI\n");
1674: goto functionend;
1675: }
1676: functionend:
1677: ISDestroy(&tix);
1678: ISDestroy(&tiy);
1679: VecScatterViewFromOptions(ctx,NULL,"-vecscatter_view");
1680: return(0);
1681: }
1683: static PetscErrorCode VecScatterCreate_PtoP(VecScatter ctx)
1684: {
1685: PetscErrorCode ierr;
1686: PetscInt ix_type=-1,iy_type=-1;
1687: IS tix=NULL,tiy=NULL,ix=ctx->from_is,iy=ctx->to_is;
1688: Vec xin=ctx->from_v,yin=ctx->to_v;
1689: VecScatterType type;
1690: PetscBool vscat_mpi1;
1691: PetscInt nx,ny;
1692: const PetscInt *idx,*idy;
1695: GetInputISType_private(ctx,VEC_MPI_ID,VEC_MPI_ID,&ix_type,&tix,&iy_type,&tiy);
1696: if (tix) ix = tix;
1697: if (tiy) iy = tiy;
1699: VecScatterGetType(ctx,&type);
1700: PetscStrcmp(type,"mpi1",&vscat_mpi1);
1702: /* no special cases for now */
1703: ISGetLocalSize(ix,&nx);
1704: ISGetIndices(ix,&idx);
1705: ISGetLocalSize(iy,&ny);
1706: ISGetIndices(iy,&idy);
1707: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1708: if (vscat_mpi1) {
1709: VecScatterCreateLocal_PtoP_MPI1(nx,idx,ny,idy,xin,yin,1,ctx);
1710: }
1711: #if defined(PETSC_HAVE_MPI_WIN_CREATE_FEATURE)
1712: else {
1713: VecScatterCreateLocal_PtoP_MPI3(nx,idx,ny,idy,xin,yin,1,ctx);
1714: }
1715: #endif
1716: ISRestoreIndices(ix,&idx);
1717: ISRestoreIndices(iy,&idy);
1718: PetscInfo(xin,"General case: MPI to MPI\n");
1720: ISDestroy(&tix);
1721: ISDestroy(&tiy);
1722: VecScatterViewFromOptions(ctx,NULL,"-vecscatter_view");
1723: return(0);
1724: }
1726: static PetscErrorCode VecScatterGetInputVecType_private(VecScatter ctx,PetscInt *xin_type1,PetscInt *yin_type1)
1727: {
1728: PetscErrorCode ierr;
1729: MPI_Comm comm,ycomm;
1730: PetscMPIInt size;
1731: Vec xin = ctx->from_v,yin = ctx->to_v;
1732: PetscInt xin_type,yin_type;
1735: /*
1736: Determine if the vectors are "parallel", ie. it shares a comm with other processors, or
1737: sequential (it does not share a comm). The difference is that parallel vectors treat the
1738: index set as providing indices in the global parallel numbering of the vector, with
1739: sequential vectors treat the index set as providing indices in the local sequential
1740: numbering
1741: */
1742: PetscObjectGetComm((PetscObject)xin,&comm);
1743: MPI_Comm_size(comm,&size);
1744: if (size > 1) {
1745: xin_type = VEC_MPI_ID;
1746: } else xin_type = VEC_SEQ_ID;
1747: *xin_type1 = xin_type;
1749: PetscObjectGetComm((PetscObject)yin,&ycomm);
1750: MPI_Comm_size(ycomm,&size);
1751: if (size > 1) {
1752: yin_type = VEC_MPI_ID;
1753: } else yin_type = VEC_SEQ_ID;
1754: *yin_type1 = yin_type;
1755: return(0);
1756: }
1758: static PetscErrorCode GetInputISType_private(VecScatter ctx,PetscInt xin_type,PetscInt yin_type,PetscInt *ix_type1,IS *tix1,PetscInt *iy_type1,IS *tiy1)
1759: {
1760: PetscErrorCode ierr;
1761: MPI_Comm comm;
1762: Vec xin = ctx->from_v,yin = ctx->to_v;
1763: IS tix = 0,tiy = 0,ix = ctx->from_is, iy = ctx->to_is;
1764: PetscInt ix_type = IS_GENERAL_ID,iy_type = IS_GENERAL_ID;
1765: PetscBool flag;
1768: PetscObjectGetComm((PetscObject)ctx,&comm);
1770: /* if ix or iy is not included; assume just grabbing entire vector */
1771: if (!ix && xin_type == VEC_SEQ_ID) {
1772: ISCreateStride(comm,ctx->from_n,0,1,&ix);
1773: tix = ix;
1774: } else if (!ix && xin_type == VEC_MPI_ID) {
1775: if (yin_type == VEC_MPI_ID) {
1776: PetscInt ntmp, low;
1777: VecGetLocalSize(xin,&ntmp);
1778: VecGetOwnershipRange(xin,&low,NULL);
1779: ISCreateStride(comm,ntmp,low,1,&ix);
1780: } else {
1781: PetscInt Ntmp;
1782: VecGetSize(xin,&Ntmp);
1783: ISCreateStride(comm,Ntmp,0,1,&ix);
1784: }
1785: tix = ix;
1786: } else if (!ix) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ix not given, but not Seq or MPI vector");
1788: if (!iy && yin_type == VEC_SEQ_ID) {
1789: ISCreateStride(comm,ctx->to_n,0,1,&iy);
1790: tiy = iy;
1791: } else if (!iy && yin_type == VEC_MPI_ID) {
1792: if (xin_type == VEC_MPI_ID) {
1793: PetscInt ntmp, low;
1794: VecGetLocalSize(yin,&ntmp);
1795: VecGetOwnershipRange(yin,&low,NULL);
1796: ISCreateStride(comm,ntmp,low,1,&iy);
1797: } else {
1798: PetscInt Ntmp;
1799: VecGetSize(yin,&Ntmp);
1800: ISCreateStride(comm,Ntmp,0,1,&iy);
1801: }
1802: tiy = iy;
1803: } else if (!iy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"iy not given, but not Seq or MPI vector");
1805: /* Determine types of index sets */
1806: PetscObjectTypeCompare((PetscObject)ix,ISBLOCK,&flag);
1807: if (flag) ix_type = IS_BLOCK_ID;
1808: PetscObjectTypeCompare((PetscObject)iy,ISBLOCK,&flag);
1809: if (flag) iy_type = IS_BLOCK_ID;
1810: PetscObjectTypeCompare((PetscObject)ix,ISSTRIDE,&flag);
1811: if (flag) ix_type = IS_STRIDE_ID;
1812: PetscObjectTypeCompare((PetscObject)iy,ISSTRIDE,&flag);
1813: if (flag) iy_type = IS_STRIDE_ID;
1815: if (ix_type1) *ix_type1 = ix_type;
1816: if (iy_type1) *iy_type1 = iy_type;
1817: if (tix1) *tix1 = tix;
1818: if (tiy1) *tiy1 = tiy;
1819: return(0);
1820: }
1822: static PetscErrorCode VecScatterCreate_vectype_private(VecScatter ctx)
1823: {
1824: PetscErrorCode ierr;
1825: PetscInt xin_type=-1,yin_type=-1;
1828: VecScatterGetInputVecType_private(ctx,&xin_type,&yin_type);
1829: if (xin_type == VEC_MPI_ID && yin_type == VEC_SEQ_ID) {
1830: VecScatterCreate_PtoS(ctx);
1831: } else if (xin_type == VEC_SEQ_ID && yin_type == VEC_MPI_ID) {
1832: VecScatterCreate_StoP(ctx);
1833: } else if (xin_type == VEC_MPI_ID && yin_type == VEC_MPI_ID) {
1834: VecScatterCreate_PtoP(ctx);
1835: }
1836: return(0);
1837: }
1839: PetscErrorCode VecScatterCreate_MPI1(VecScatter ctx)
1840: {
1841: PetscErrorCode ierr;
1844: /* subroutines called in VecScatterCreate_vectype_private() need scatter_type as an input */
1845: PetscObjectChangeTypeName((PetscObject)ctx,VECSCATTERMPI1);
1846: PetscInfo(ctx,"Using MPI1 for vector scatter\n");
1847: VecScatterCreate_vectype_private(ctx);
1848: return(0);
1849: }
1851: PetscErrorCode VecScatterCreate_MPI3(VecScatter ctx)
1852: {
1853: PetscErrorCode ierr;
1856: /* subroutines called in VecScatterCreate_vectype_private() need scatter_type as an input */
1857: PetscObjectChangeTypeName((PetscObject)ctx,VECSCATTERMPI3);
1858: PetscInfo(ctx,"Using MPI3 for vector scatter\n");
1859: VecScatterCreate_vectype_private(ctx);
1860: return(0);
1861: }
1863: PetscErrorCode VecScatterCreate_MPI3Node(VecScatter ctx)
1864: {
1865: PetscErrorCode ierr;
1868: /* subroutines called in VecScatterCreate_vectype_private() need scatter_type as an input */
1869: PetscObjectChangeTypeName((PetscObject)ctx,VECSCATTERMPI3NODE);
1870: PetscInfo(ctx,"Using MPI3NODE for vector scatter\n");
1871: VecScatterCreate_vectype_private(ctx);
1872: return(0);
1873: }