Actual source code: seqvscat.c
petsc-3.14.6 2021-03-30
2: /*
3: Code for creating scatters between vectors. This file
4: includes the code for scattering between sequential vectors and
5: some special cases for parallel scatters.
6: */
8: #include <petsc/private/vecscatterimpl.h>
10: #if defined(PETSC_HAVE_CUDA)
11: #include <petsc/private/cudavecimpl.h>
12: #endif
14: /*
15: Checks if any indices are less than zero and generates an error
16: */
17: static PetscErrorCode VecScatterCheckIndices_Private(PetscInt nmax,PetscInt n,const PetscInt *idx)
18: {
19: PetscInt i;
22: if (PetscDefined(USE_DEBUG)) {
23: for (i=0; i<n; i++) {
24: if (idx[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative index %D at %D location",idx[i],i);
25: if (idx[i] >= nmax) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index %D at %D location greater than max %D",idx[i],i,nmax);
26: }
27: }
28: return(0);
29: }
31: PetscErrorCode VecScatterDestroy_SGToSG(VecScatter ctx)
32: {
36: PetscFree2(((VecScatter_Seq_General*)ctx->todata)->vslots,((VecScatter_Seq_General*)ctx->fromdata)->vslots);
37: VecScatterMemcpyPlanDestroy(&((VecScatter_Seq_General*)ctx->fromdata)->memcpy_plan);
38: VecScatterMemcpyPlanDestroy(&((VecScatter_Seq_General*)ctx->todata)->memcpy_plan);
39: PetscFree2(ctx->todata,ctx->fromdata);
40: return(0);
41: }
43: PetscErrorCode VecScatterDestroy_SGToSS(VecScatter ctx)
44: {
48: PetscFree(((VecScatter_Seq_General*)ctx->fromdata)->vslots);
49: VecScatterMemcpyPlanDestroy(&((VecScatter_Seq_General*)ctx->fromdata)->memcpy_plan);
50: PetscFree2(ctx->todata,ctx->fromdata);
51: return(0);
52: }
54: PetscErrorCode VecScatterDestroy_SSToSG(VecScatter ctx)
55: {
59: PetscFree(((VecScatter_Seq_General*)ctx->todata)->vslots);
60: VecScatterMemcpyPlanDestroy(&((VecScatter_Seq_General*)ctx->todata)->memcpy_plan);
61: PetscFree2(ctx->todata,ctx->fromdata);
62: return(0);
63: }
65: PetscErrorCode VecScatterDestroy_SSToSS(VecScatter ctx)
66: {
70: PetscFree2(ctx->todata,ctx->fromdata);
71: return(0);
72: }
74: /* --------------------------------------------------------------------------------------*/
75: /*
76: Scatter: sequential general to sequential general
77: */
78: PetscErrorCode VecScatterBegin_SGToSG(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
79: {
80: VecScatter_Seq_General *gen_to = (VecScatter_Seq_General*)ctx->todata;
81: VecScatter_Seq_General *gen_from = (VecScatter_Seq_General*)ctx->fromdata;
82: PetscErrorCode ierr;
83: PetscInt i,n = gen_from->n,*fslots,*tslots;
84: PetscScalar *xv,*yv;
85: #if defined(PETSC_HAVE_CUDA)
86: PetscBool is_veccuda,isy_veccuda;
87: #endif
90: #if defined(PETSC_HAVE_CUDA)
91: PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
92: PetscObjectTypeCompareAny((PetscObject)y,&isy_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
93: if (is_veccuda && isy_veccuda && x->offloadmask == PETSC_OFFLOAD_GPU) {
94: /* create the scatter indices if not done already */
95: if (!ctx->spptr) {
96: PetscInt tofirst = 0,tostep = 0,fromfirst = 0,fromstep = 0;
97: fslots = gen_from->vslots;
98: tslots = gen_to->vslots;
99: VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
100: }
101: /* next do the scatter */
102: VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
103: return(0);
104: }
105: #endif
107: VecGetArrayPair(x,y,&xv,&yv);
108: if (mode & SCATTER_REVERSE) {
109: gen_to = (VecScatter_Seq_General*)ctx->fromdata;
110: gen_from = (VecScatter_Seq_General*)ctx->todata;
111: }
112: fslots = gen_from->vslots;
113: tslots = gen_to->vslots;
115: if (gen_from->memcpy_plan.optimized[0]) { VecScatterMemcpyPlanExecute_Scatter(0,xv,&gen_from->memcpy_plan,yv,&gen_to->memcpy_plan,addv); }
116: else if (addv == INSERT_VALUES) { for (i=0; i<n; i++) yv[tslots[i]] = xv[fslots[i]]; }
117: else if (addv == ADD_VALUES) { for (i=0; i<n; i++) yv[tslots[i]] += xv[fslots[i]]; }
118: #if !defined(PETSC_USE_COMPLEX)
119: else if (addv == MAX_VALUES) { for (i=0; i<n; i++) yv[tslots[i]] = PetscMax(yv[tslots[i]],xv[fslots[i]]); }
120: #endif
121: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
122: VecRestoreArrayPair(x,y,&xv,&yv);
123: return(0);
124: }
126: /*
127: Scatter: sequential general to sequential stride 1
128: */
129: PetscErrorCode VecScatterBegin_SGToSS_Stride1(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
130: {
131: VecScatter_Seq_Stride *gen_to = (VecScatter_Seq_Stride*)ctx->todata;
132: VecScatter_Seq_General *gen_from = (VecScatter_Seq_General*)ctx->fromdata;
133: PetscInt i,n = gen_from->n,*fslots = gen_from->vslots;
134: PetscErrorCode ierr;
135: PetscInt first = gen_to->first;
136: PetscScalar *xv,*yv;
137: #if defined(PETSC_HAVE_CUDA)
138: PetscBool is_veccuda,isy_veccuda;
139: #endif
142: #if defined(PETSC_HAVE_CUDA)
143: PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
144: PetscObjectTypeCompareAny((PetscObject)y,&isy_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
145: if (is_veccuda && isy_veccuda && x->offloadmask == PETSC_OFFLOAD_GPU) {
146: /* create the scatter indices if not done already */
147: if (!ctx->spptr) {
148: PetscInt tofirst = first,tostep = 1,fromfirst = 0,fromstep = 0;
149: PetscInt *tslots = 0;
150: VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
151: }
152: /* next do the scatter */
153: VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
154: return(0);
155: }
156: #endif
158: VecGetArrayPair(x,y,&xv,&yv);
159: if (mode & SCATTER_REVERSE) {
160: PetscScalar *xxv = xv + first;
161: if (gen_from->memcpy_plan.optimized[0]) { VecScatterMemcpyPlanExecute_Unpack(0,xxv,yv,&gen_from->memcpy_plan,addv,1); }
162: else if (addv == INSERT_VALUES) { for (i=0; i<n; i++) yv[fslots[i]] = xxv[i]; }
163: else if (addv == ADD_VALUES) { for (i=0; i<n; i++) yv[fslots[i]] += xxv[i]; }
164: #if !defined(PETSC_USE_COMPLEX)
165: else if (addv == MAX_VALUES) { for (i=0; i<n; i++) yv[fslots[i]] = PetscMax(yv[fslots[i]],xxv[i]); }
166: #endif
167: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
168: } else {
169: PetscScalar *yyv = yv + first;
170: if (gen_from->memcpy_plan.optimized[0]) { VecScatterMemcpyPlanExecute_Pack(0,xv,&gen_from->memcpy_plan,yyv,addv,1); }
171: else if (addv == INSERT_VALUES) { for (i=0; i<n; i++) yyv[i] = xv[fslots[i]]; }
172: else if (addv == ADD_VALUES) { for (i=0; i<n; i++) yyv[i] += xv[fslots[i]]; }
173: #if !defined(PETSC_USE_COMPLEX)
174: else if (addv == MAX_VALUES) { for (i=0; i<n; i++) yyv[i] = PetscMax(yyv[i],xv[fslots[i]]); }
175: #endif
176: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
177: }
178: VecRestoreArrayPair(x,y,&xv,&yv);
179: return(0);
180: }
182: /*
183: Scatter: sequential general to sequential stride
184: */
185: PetscErrorCode VecScatterBegin_SGToSS(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
186: {
187: VecScatter_Seq_Stride *gen_to = (VecScatter_Seq_Stride*)ctx->todata;
188: VecScatter_Seq_General *gen_from = (VecScatter_Seq_General*)ctx->fromdata;
189: PetscInt i,n = gen_from->n,*fslots = gen_from->vslots;
190: PetscErrorCode ierr;
191: PetscInt first = gen_to->first,step = gen_to->step;
192: PetscScalar *xv,*yv;
193: #if defined(PETSC_HAVE_CUDA)
194: PetscBool is_veccuda,isy_veccuda;
195: #endif
198: #if defined(PETSC_HAVE_CUDA)
199: PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
200: PetscObjectTypeCompareAny((PetscObject)y,&isy_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
201: if (is_veccuda && isy_veccuda && x->offloadmask == PETSC_OFFLOAD_GPU) {
202: /* create the scatter indices if not done already */
203: if (!ctx->spptr) {
204: PetscInt tofirst = first,tostep = step,fromfirst = 0,fromstep = 0;
205: PetscInt * tslots = 0;
206: VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
207: }
208: /* next do the scatter */
209: VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
210: return(0);
211: }
212: #endif
214: VecGetArrayPair(x,y,&xv,&yv);
215: if (mode & SCATTER_REVERSE) {
216: if (addv == INSERT_VALUES) {
217: for (i=0; i<n; i++) yv[fslots[i]] = xv[first + i*step];
218: } else if (addv == ADD_VALUES) {
219: for (i=0; i<n; i++) yv[fslots[i]] += xv[first + i*step];
220: #if !defined(PETSC_USE_COMPLEX)
221: } else if (addv == MAX_VALUES) {
222: for (i=0; i<n; i++) yv[fslots[i]] = PetscMax(yv[fslots[i]],xv[first + i*step]);
223: #endif
224: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
225: } else {
226: if (addv == INSERT_VALUES) {
227: for (i=0; i<n; i++) yv[first + i*step] = xv[fslots[i]];
228: } else if (addv == ADD_VALUES) {
229: for (i=0; i<n; i++) yv[first + i*step] += xv[fslots[i]];
230: #if !defined(PETSC_USE_COMPLEX)
231: } else if (addv == MAX_VALUES) {
232: for (i=0; i<n; i++) yv[first + i*step] = PetscMax(yv[first + i*step],xv[fslots[i]]);
233: #endif
234: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
235: }
236: VecRestoreArrayPair(x,y,&xv,&yv);
237: return(0);
238: }
240: /*
241: Scatter: sequential stride 1 to sequential general
242: */
243: PetscErrorCode VecScatterBegin_SSToSG_Stride1(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
244: {
245: VecScatter_Seq_Stride *gen_from = (VecScatter_Seq_Stride*)ctx->fromdata;
246: VecScatter_Seq_General *gen_to = (VecScatter_Seq_General*)ctx->todata;
247: PetscInt i,n = gen_from->n,*tslots = gen_to->vslots;
248: PetscErrorCode ierr;
249: PetscInt first = gen_from->first;
250: PetscScalar *xv,*yv;
251: #if defined(PETSC_HAVE_CUDA)
252: PetscBool is_veccuda,isy_veccuda;
253: #endif
256: #if defined(PETSC_HAVE_CUDA)
257: PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
258: PetscObjectTypeCompareAny((PetscObject)y,&isy_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
259: if (is_veccuda && isy_veccuda && x->offloadmask == PETSC_OFFLOAD_GPU) {
260: /* create the scatter indices if not done already */
261: if (!ctx->spptr) {
262: PetscInt tofirst = 0,tostep = 0,fromfirst = first,fromstep = 1;
263: PetscInt *fslots = 0;
264: VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
265: }
266: /* next do the scatter */
267: VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
268: return(0);
269: }
270: #endif
272: VecGetArrayPair(x,y,&xv,&yv);
273: if (mode & SCATTER_REVERSE) {
274: PetscScalar *yyv = yv + first;
275: if (gen_to->memcpy_plan.optimized[0]) { VecScatterMemcpyPlanExecute_Pack(0,xv,&gen_to->memcpy_plan,yyv,addv,1); }
276: else if (addv == INSERT_VALUES) { for (i=0; i<n; i++) yyv[i] = xv[tslots[i]]; }
277: else if (addv == ADD_VALUES) { for (i=0; i<n; i++) yyv[i] += xv[tslots[i]]; }
278: #if !defined(PETSC_USE_COMPLEX)
279: else if (addv == MAX_VALUES) { for (i=0; i<n; i++) yyv[i] = PetscMax(yyv[i],xv[tslots[i]]); }
280: #endif
281: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
282: } else {
283: PetscScalar *xxv = xv + first;
284: if (gen_to->memcpy_plan.optimized[0]) { VecScatterMemcpyPlanExecute_Unpack(0,xxv,yv,&gen_to->memcpy_plan,addv,1); }
285: else if (addv == INSERT_VALUES) { for (i=0; i<n; i++) yv[tslots[i]] = xxv[i]; }
286: else if (addv == ADD_VALUES) { for (i=0; i<n; i++) yv[tslots[i]] += xxv[i]; }
287: #if !defined(PETSC_USE_COMPLEX)
288: else if (addv == MAX_VALUES) { for (i=0; i<n; i++) yv[tslots[i]] = PetscMax(yv[tslots[i]],xxv[i]); }
289: #endif
290: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
291: }
292: VecRestoreArrayPair(x,y,&xv,&yv);
293: return(0);
294: }
296: /*
297: Scatter: sequential stride to sequential general
298: */
299: PetscErrorCode VecScatterBegin_SSToSG(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
300: {
301: VecScatter_Seq_Stride *gen_from = (VecScatter_Seq_Stride*)ctx->fromdata;
302: VecScatter_Seq_General *gen_to = (VecScatter_Seq_General*)ctx->todata;
303: PetscInt i,n = gen_from->n,*tslots = gen_to->vslots;
304: PetscErrorCode ierr;
305: PetscInt first = gen_from->first,step = gen_from->step;
306: PetscScalar *xv,*yv;
307: #if defined(PETSC_HAVE_CUDA)
308: PetscBool is_veccuda,isy_veccuda;
309: #endif
312: #if defined(PETSC_HAVE_CUDA)
313: PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
314: PetscObjectTypeCompareAny((PetscObject)y,&isy_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
315: if (is_veccuda && isy_veccuda && x->offloadmask == PETSC_OFFLOAD_GPU) {
316: /* create the scatter indices if not done already */
317: if (!ctx->spptr) {
318: PetscInt tofirst = 0,tostep = 0,fromfirst = first,fromstep = step;
319: PetscInt *fslots = 0;
320: VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
321: }
322: /* next do the scatter */
323: VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
324: return(0);
325: }
326: #endif
328: VecGetArrayPair(x,y,&xv,&yv);
329: if (mode & SCATTER_REVERSE) {
330: if (addv == INSERT_VALUES) {
331: for (i=0; i<n; i++) yv[first + i*step] = xv[tslots[i]];
332: } else if (addv == ADD_VALUES) {
333: for (i=0; i<n; i++) yv[first + i*step] += xv[tslots[i]];
334: #if !defined(PETSC_USE_COMPLEX)
335: } else if (addv == MAX_VALUES) {
336: for (i=0; i<n; i++) yv[first + i*step] = PetscMax(yv[first + i*step],xv[tslots[i]]);
337: #endif
338: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
339: } else {
340: if (addv == INSERT_VALUES) {
341: for (i=0; i<n; i++) yv[tslots[i]] = xv[first + i*step];
342: } else if (addv == ADD_VALUES) {
343: for (i=0; i<n; i++) yv[tslots[i]] += xv[first + i*step];
344: #if !defined(PETSC_USE_COMPLEX)
345: } else if (addv == MAX_VALUES) {
346: for (i=0; i<n; i++) yv[tslots[i]] = PetscMax(yv[tslots[i]],xv[first + i*step]);
347: #endif
348: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
349: }
350: VecRestoreArrayPair(x,y,&xv,&yv);
351: return(0);
352: }
354: PetscErrorCode VecScatterView_SSToSG(VecScatter in,PetscViewer viewer)
355: {
356: PetscErrorCode ierr;
357: VecScatter_Seq_Stride *in_from = (VecScatter_Seq_Stride*)in->fromdata;
358: VecScatter_Seq_General *in_to = (VecScatter_Seq_General*)in->todata;
359: PetscInt i;
360: PetscBool isascii;
363: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
364: if (isascii) {
365: PetscViewerASCIIPrintf(viewer,"Sequential stride to general scatter\n");
366: for (i=0; i<in_to->n; i++) {
367: PetscViewerASCIIPrintf(viewer,"%D to %D\n",in_from->first + in_from->step*i,in_to->vslots[i]);
368: }
369: if (in_to->memcpy_plan.optimized[0]) {
370: PetscViewerASCIIPrintf(viewer,"This stride1 to general scatter is made of %D copies\n",in_to->memcpy_plan.copy_offsets[1]);
371: }
372: }
373: return(0);
374: }
375: /*
376: Scatter: sequential stride to sequential stride
377: */
378: PetscErrorCode VecScatterBegin_SSToSS(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
379: {
380: VecScatter_Seq_Stride *gen_to = (VecScatter_Seq_Stride*)ctx->todata;
381: VecScatter_Seq_Stride *gen_from = (VecScatter_Seq_Stride*)ctx->fromdata;
382: PetscInt i,n = gen_from->n,to_first = gen_to->first,to_step = gen_to->step;
383: PetscErrorCode ierr;
384: PetscInt from_first = gen_from->first,from_step = gen_from->step;
385: PetscScalar *xv,*yv;
386: #if defined(PETSC_HAVE_CUDA)
387: PetscBool is_veccuda,isy_veccuda;
388: #endif
391: #if defined(PETSC_HAVE_CUDA)
392: PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
393: PetscObjectTypeCompareAny((PetscObject)y,&isy_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
394: if (is_veccuda && isy_veccuda && x->offloadmask == PETSC_OFFLOAD_GPU) {
395: /* create the scatter indices if not done already */
396: if (!ctx->spptr) {
397: PetscInt *tslots = 0,*fslots = 0;
398: VecScatterCUDAIndicesCreate_StoS(n,to_first,from_first,to_step,from_step,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
399: }
400: /* next do the scatter */
401: VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
402: return(0);
403: }
404: #endif
406: VecGetArrayPair(x,y,&xv,&yv);
407: if (mode & SCATTER_REVERSE) {
408: from_first = gen_to->first;
409: to_first = gen_from->first;
410: from_step = gen_to->step;
411: to_step = gen_from->step;
412: }
414: if (addv == INSERT_VALUES) {
415: if (to_step == 1 && from_step == 1) {
416: PetscArraycpy(yv+to_first,xv+from_first,n);
417: } else {
418: for (i=0; i<n; i++) yv[to_first + i*to_step] = xv[from_first+i*from_step];
419: }
420: } else if (addv == ADD_VALUES) {
421: if (to_step == 1 && from_step == 1) {
422: PetscScalar *yyv = yv + to_first, *xxv = xv + from_first;
423: for (i=0; i<n; i++) yyv[i] += xxv[i];
424: } else {
425: for (i=0; i<n; i++) yv[to_first + i*to_step] += xv[from_first+i*from_step];
426: }
427: #if !defined(PETSC_USE_COMPLEX)
428: } else if (addv == MAX_VALUES) {
429: if (to_step == 1 && from_step == 1) {
430: PetscScalar *yyv = yv + to_first, *xxv = xv + from_first;
431: for (i=0; i<n; i++) yyv[i] = PetscMax(yyv[i],xxv[i]);
432: } else {
433: for (i=0; i<n; i++) yv[to_first + i*to_step] = PetscMax(yv[to_first + i*to_step],xv[from_first+i*from_step]);
434: }
435: #endif
436: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
437: VecRestoreArrayPair(x,y,&xv,&yv);
438: return(0);
439: }
441: /* --------------------------------------------------------------------------*/
444: PetscErrorCode VecScatterCopy_SGToSG(VecScatter in,VecScatter out)
445: {
446: PetscErrorCode ierr;
447: VecScatter_Seq_General *in_to = (VecScatter_Seq_General*)in->todata,*out_to = NULL;
448: VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata,*out_from = NULL;
451: out->ops->begin = in->ops->begin;
452: out->ops->end = in->ops->end;
453: out->ops->copy = in->ops->copy;
454: out->ops->destroy = in->ops->destroy;
455: out->ops->view = in->ops->view;
457: PetscMalloc2(1,&out_to,1,&out_from);
458: PetscMalloc2(in_to->n,&out_to->vslots,in_from->n,&out_from->vslots);
459: out_to->n = in_to->n;
460: out_to->format = in_to->format;
461: out_to->nonmatching_computed = PETSC_FALSE;
462: out_to->slots_nonmatching = NULL;
463: PetscArraycpy(out_to->vslots,in_to->vslots,out_to->n);
464: VecScatterMemcpyPlanCopy(&in_to->memcpy_plan,&out_to->memcpy_plan);
466: out_from->n = in_from->n;
467: out_from->format = in_from->format;
468: out_from->nonmatching_computed = PETSC_FALSE;
469: out_from->slots_nonmatching = NULL;
470: PetscArraycpy(out_from->vslots,in_from->vslots,out_from->n);
471: VecScatterMemcpyPlanCopy(&in_from->memcpy_plan,&out_from->memcpy_plan);
473: out->todata = (void*)out_to;
474: out->fromdata = (void*)out_from;
475: return(0);
476: }
478: PetscErrorCode VecScatterView_SGToSG(VecScatter in,PetscViewer viewer)
479: {
480: PetscErrorCode ierr;
481: VecScatter_Seq_General *in_to = (VecScatter_Seq_General*)in->todata;
482: VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata;
483: PetscInt i;
484: PetscBool isascii;
487: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
488: if (isascii) {
489: PetscViewerASCIIPrintf(viewer,"Sequential general scatter\n");
490: for (i=0; i<in_to->n; i++) {
491: PetscViewerASCIIPrintf(viewer,"%D to %D\n",in_from->vslots[i],in_to->vslots[i]);
492: }
493: if (in_from->memcpy_plan.optimized[0]) {
494: PetscViewerASCIIPrintf(viewer,"This general to general scatter is made of %D copies\n",in_from->memcpy_plan.copy_offsets[1]);
495: }
496: }
497: return(0);
498: }
501: PetscErrorCode VecScatterCopy_SGToSS(VecScatter in,VecScatter out)
502: {
503: PetscErrorCode ierr;
504: VecScatter_Seq_Stride *in_to = (VecScatter_Seq_Stride*)in->todata,*out_to = NULL;
505: VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata,*out_from = NULL;
508: out->ops->begin = in->ops->begin;
509: out->ops->end = in->ops->end;
510: out->ops->copy = in->ops->copy;
511: out->ops->destroy = in->ops->destroy;
512: out->ops->view = in->ops->view;
514: PetscMalloc2(1,&out_to,1,&out_from);
515: PetscMalloc1(in_from->n,&out_from->vslots);
516: out_to->n = in_to->n;
517: out_to->format = in_to->format;
518: out_to->first = in_to->first;
519: out_to->step = in_to->step;
520: out_to->format = in_to->format;
522: out_from->n = in_from->n;
523: out_from->format = in_from->format;
524: out_from->nonmatching_computed = PETSC_FALSE;
525: out_from->slots_nonmatching = NULL;
526: PetscArraycpy(out_from->vslots,in_from->vslots,out_from->n);
527: VecScatterMemcpyPlanCopy(&in_from->memcpy_plan,&out_from->memcpy_plan);
529: out->todata = (void*)out_to;
530: out->fromdata = (void*)out_from;
531: return(0);
532: }
534: PetscErrorCode VecScatterView_SGToSS(VecScatter in,PetscViewer viewer)
535: {
536: PetscErrorCode ierr;
537: VecScatter_Seq_Stride *in_to = (VecScatter_Seq_Stride*)in->todata;
538: VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata;
539: PetscInt i;
540: PetscBool isascii;
543: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
544: if (isascii) {
545: PetscViewerASCIIPrintf(viewer,"Sequential general scatter to stride\n");
546: for (i=0; i<in_to->n; i++) {
547: PetscViewerASCIIPrintf(viewer,"%D to %D\n",in_from->vslots[i],in_to->first + in_to->step*i);
548: }
549: if (in_from->memcpy_plan.optimized[0]) {
550: PetscViewerASCIIPrintf(viewer,"This general to stride1 scatter is made of %D copies\n",in_from->memcpy_plan.copy_offsets[1]);
551: }
552: }
553: return(0);
554: }
556: /* --------------------------------------------------------------------------*/
557: /*
558: Scatter: parallel to sequential vector, sequential strides for both.
559: */
560: PetscErrorCode VecScatterCopy_SSToSS(VecScatter in,VecScatter out)
561: {
562: VecScatter_Seq_Stride *in_to = (VecScatter_Seq_Stride*)in->todata,*out_to = NULL;
563: VecScatter_Seq_Stride *in_from = (VecScatter_Seq_Stride*)in->fromdata,*out_from = NULL;
564: PetscErrorCode ierr;
567: out->ops->begin = in->ops->begin;
568: out->ops->end = in->ops->end;
569: out->ops->copy = in->ops->copy;
570: out->ops->destroy = in->ops->destroy;
571: out->ops->view = in->ops->view;
573: PetscMalloc2(1,&out_to,1,&out_from);
574: out_to->n = in_to->n;
575: out_to->format = in_to->format;
576: out_to->first = in_to->first;
577: out_to->step = in_to->step;
578: out_to->format = in_to->format;
579: out_from->n = in_from->n;
580: out_from->format= in_from->format;
581: out_from->first = in_from->first;
582: out_from->step = in_from->step;
583: out_from->format= in_from->format;
584: out->todata = (void*)out_to;
585: out->fromdata = (void*)out_from;
586: return(0);
587: }
589: PetscErrorCode VecScatterView_SSToSS(VecScatter in,PetscViewer viewer)
590: {
591: VecScatter_Seq_Stride *in_to = (VecScatter_Seq_Stride*)in->todata;
592: VecScatter_Seq_Stride *in_from = (VecScatter_Seq_Stride*)in->fromdata;
593: PetscErrorCode ierr;
594: PetscBool isascii;
597: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
598: if (isascii) {
599: PetscViewerASCIIPrintf(viewer,"Sequential stride count %D start %D step to start %D stride %D\n",in_to->n,in_to->first,in_to->step,in_from->first,in_from->step);
600: }
601: return(0);
602: }
604: /* --------------------------------------------------------------------------------------*/
605: /* Create a memcpy plan for a sequential general (SG) to SG scatter */
606: PetscErrorCode VecScatterMemcpyPlanCreate_SGToSG(PetscInt bs,VecScatter_Seq_General *to,VecScatter_Seq_General *from)
607: {
608: PetscInt n = to->n,i,*to_slots = to->vslots,*from_slots = from->vslots;
609: PetscInt j,n_copies;
611: PetscBool same_copy_starts;
614: PetscMemzero(&to->memcpy_plan,sizeof(VecScatterMemcpyPlan));
615: PetscMemzero(&from->memcpy_plan,sizeof(VecScatterMemcpyPlan));
616: to->memcpy_plan.n = 1;
617: from->memcpy_plan.n = 1;
619: /* malloc and init the two fields to false and zero */
620: PetscCalloc2(1,&to->memcpy_plan.optimized,2,&to->memcpy_plan.copy_offsets);
621: PetscCalloc2(1,&from->memcpy_plan.optimized,2,&from->memcpy_plan.copy_offsets);
623: /* count number of copies, which runs from 1 to n */
624: n_copies = 1;
625: for (i=0; i<n-1; i++) {
626: if (to_slots[i]+bs != to_slots[i+1] || from_slots[i]+bs != from_slots[i+1]) n_copies++;
627: }
629: /* if average copy size >= 256 bytes, use memcpy instead of load/store */
630: if (bs*n*sizeof(PetscScalar)/n_copies >= 256) {
631: PetscMalloc2(n_copies,&to->memcpy_plan.copy_starts,n_copies,&to->memcpy_plan.copy_lengths);
632: PetscMalloc2(n_copies,&from->memcpy_plan.copy_starts,n_copies,&from->memcpy_plan.copy_lengths);
634: /* set up copy_starts[] & copy_lenghts[] of to and from */
635: to->memcpy_plan.copy_starts[0] = to_slots[0];
636: from->memcpy_plan.copy_starts[0] = from_slots[0];
638: if (n_copies != 1) { /* one copy is trival and we can save some work */
639: j = 0; /* j-th copy */
640: for (i=0; i<n-1; i++) {
641: if (to_slots[i]+bs != to_slots[i+1] || from_slots[i]+bs != from_slots[i+1]) {
642: to->memcpy_plan.copy_lengths[j] = to_slots[i]+bs-to->memcpy_plan.copy_starts[j];
643: from->memcpy_plan.copy_lengths[j] = from_slots[i]+bs-from->memcpy_plan.copy_starts[j];
644: to->memcpy_plan.copy_starts[j+1] = to_slots[i+1];
645: from->memcpy_plan.copy_starts[j+1] = from_slots[i+1];
646: j++;
647: }
648: }
649: }
651: /* set up copy_lengths[] of the last copy */
652: to->memcpy_plan.copy_lengths[n_copies-1] = to_slots[n-1]+bs-to->memcpy_plan.copy_starts[n_copies-1];
653: from->memcpy_plan.copy_lengths[n_copies-1] = from_slots[n-1]+bs-from->memcpy_plan.copy_starts[n_copies-1];
655: /* check if to and from have the same copy_starts[] values */
656: same_copy_starts = PETSC_TRUE;
657: for (i=0; i<n_copies; i++) {
658: if (to->memcpy_plan.copy_starts[i] != from->memcpy_plan.copy_starts[i]) { same_copy_starts = PETSC_FALSE; break; }
659: }
661: to->memcpy_plan.optimized[0] = PETSC_TRUE;
662: from->memcpy_plan.optimized[0] = PETSC_TRUE;
663: to->memcpy_plan.copy_offsets[1] = n_copies;
664: from->memcpy_plan.copy_offsets[1] = n_copies;
665: to->memcpy_plan.same_copy_starts = same_copy_starts;
666: from->memcpy_plan.same_copy_starts = same_copy_starts;
667: }
669: /* we do not do stride optimzation for this kind of scatter since the chance is rare. All related fields are zeroed out */
670: return(0);
671: }
673: /* -------------------------------------------------- */
674: PetscErrorCode VecScatterSetUp_Seq(VecScatter ctx)
675: {
676: PetscErrorCode ierr;
677: PetscInt ix_type=-1,iy_type=-1;
678: IS tix = NULL,tiy = NULL,ix=ctx->from_is,iy=ctx->to_is;
681: GetInputISType_private(ctx,VEC_SEQ_ID,VEC_SEQ_ID,&ix_type,&tix,&iy_type,&tiy);
682: if (tix) ix = tix;
683: if (tiy) iy = tiy;
685: if (ix_type == IS_GENERAL_ID && iy_type == IS_GENERAL_ID) {
686: PetscInt nx,ny;
687: const PetscInt *idx,*idy;
688: VecScatter_Seq_General *to = NULL,*from = NULL;
690: ISGetLocalSize(ix,&nx);
691: ISGetLocalSize(iy,&ny);
692: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
693: ISGetIndices(ix,&idx);
694: ISGetIndices(iy,&idy);
695: PetscMalloc2(1,&to,1,&from);
696: PetscMalloc2(nx,&to->vslots,nx,&from->vslots);
697: to->n = nx;
698: VecScatterCheckIndices_Private(ctx->to_n,ny,idy);
699: PetscArraycpy(to->vslots,idy,nx);
700: from->n = nx;
701: VecScatterCheckIndices_Private(ctx->from_n,nx,idx);
702: PetscArraycpy(from->vslots,idx,nx);
703: to->format = VEC_SCATTER_SEQ_GENERAL;
704: from->format = VEC_SCATTER_SEQ_GENERAL;
705: ctx->todata = (void*)to;
706: ctx->fromdata = (void*)from;
707: VecScatterMemcpyPlanCreate_SGToSG(1,to,from);
708: ctx->ops->begin = VecScatterBegin_SGToSG;
709: ctx->ops->end = NULL;
710: ctx->ops->destroy = VecScatterDestroy_SGToSG;
711: ctx->ops->copy = VecScatterCopy_SGToSG;
712: ctx->ops->view = VecScatterView_SGToSG;
713: PetscInfo(ctx->from_v,"Special case: sequential vector general scatter\n");
714: goto functionend;
715: } else if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
716: PetscInt nx,ny,to_first,to_step,from_first,from_step;
717: VecScatter_Seq_Stride *from8 = NULL,*to8 = NULL;
719: ISGetLocalSize(ix,&nx);
720: ISGetLocalSize(iy,&ny);
721: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
722: ISStrideGetInfo(iy,&to_first,&to_step);
723: ISStrideGetInfo(ix,&from_first,&from_step);
724: PetscMalloc2(1,&to8,1,&from8);
725: to8->n = nx;
726: to8->first = to_first;
727: to8->step = to_step;
728: from8->n = nx;
729: from8->first = from_first;
730: from8->step = from_step;
731: to8->format = VEC_SCATTER_SEQ_STRIDE;
732: from8->format = VEC_SCATTER_SEQ_STRIDE;
733: ctx->todata = (void*)to8;
734: ctx->fromdata = (void*)from8;
735: ctx->ops->begin = VecScatterBegin_SSToSS;
736: ctx->ops->end = NULL;
737: ctx->ops->destroy = VecScatterDestroy_SSToSS;
738: ctx->ops->copy = VecScatterCopy_SSToSS;
739: ctx->ops->view = VecScatterView_SSToSS;
740: PetscInfo(ctx->from_v,"Special case: sequential vector stride to stride\n");
741: goto functionend;
742: } else if (ix_type == IS_GENERAL_ID && iy_type == IS_STRIDE_ID) {
743: PetscInt nx,ny,first,step;
744: const PetscInt *idx;
745: VecScatter_Seq_General *from9 = NULL;
746: VecScatter_Seq_Stride *to9 = NULL;
748: ISGetLocalSize(ix,&nx);
749: ISGetIndices(ix,&idx);
750: ISGetLocalSize(iy,&ny);
751: ISStrideGetInfo(iy,&first,&step);
752: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
753: PetscMalloc2(1,&to9,1,&from9);
754: PetscMemzero(&from9->memcpy_plan,sizeof(VecScatterMemcpyPlan));
755: PetscMalloc1(nx,&from9->vslots);
756: to9->n = nx;
757: to9->first = first;
758: to9->step = step;
759: from9->n = nx;
760: VecScatterCheckIndices_Private(ctx->from_n,nx,idx);
761: PetscArraycpy(from9->vslots,idx,nx);
762: ctx->todata = (void*)to9; ctx->fromdata = (void*)from9;
763: if (step == 1) {
764: PetscInt tmp[2];
765: tmp[0] = 0; tmp[1] = nx;
766: VecScatterMemcpyPlanCreate_Index(1,tmp,from9->vslots,1,&from9->memcpy_plan);
767: ctx->ops->begin = VecScatterBegin_SGToSS_Stride1;
768: } else {
769: ctx->ops->begin = VecScatterBegin_SGToSS;
770: }
771: ctx->ops->destroy = VecScatterDestroy_SGToSS;
772: ctx->ops->end = NULL;
773: ctx->ops->copy = VecScatterCopy_SGToSS;
774: ctx->ops->view = VecScatterView_SGToSS;
775: to9->format = VEC_SCATTER_SEQ_STRIDE;
776: from9->format = VEC_SCATTER_SEQ_GENERAL;
777: PetscInfo(ctx->from_v,"Special case: sequential vector general to stride\n");
778: goto functionend;
779: } else if (ix_type == IS_STRIDE_ID && iy_type == IS_GENERAL_ID) {
780: PetscInt nx,ny,first,step;
781: const PetscInt *idy;
782: VecScatter_Seq_General *to10 = NULL;
783: VecScatter_Seq_Stride *from10 = NULL;
785: ISGetLocalSize(ix,&nx);
786: ISGetIndices(iy,&idy);
787: ISGetLocalSize(iy,&ny);
788: ISStrideGetInfo(ix,&first,&step);
789: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
790: PetscMalloc2(1,&to10,1,&from10);
791: PetscMemzero(&to10->memcpy_plan,sizeof(VecScatterMemcpyPlan));
792: PetscMalloc1(nx,&to10->vslots);
793: from10->n = nx;
794: from10->first = first;
795: from10->step = step;
796: to10->n = nx;
797: VecScatterCheckIndices_Private(ctx->to_n,ny,idy);
798: PetscArraycpy(to10->vslots,idy,nx);
799: ctx->todata = (void*)to10;
800: ctx->fromdata = (void*)from10;
801: if (step == 1) {
802: PetscInt tmp[2];
803: tmp[0] = 0; tmp[1] = nx;
804: VecScatterMemcpyPlanCreate_Index(1,tmp,to10->vslots,1,&to10->memcpy_plan);
805: ctx->ops->begin = VecScatterBegin_SSToSG_Stride1;
806: } else {
807: ctx->ops->begin = VecScatterBegin_SSToSG;
808: }
809: ctx->ops->destroy = VecScatterDestroy_SSToSG;
810: ctx->ops->end = NULL;
811: ctx->ops->copy = NULL;
812: ctx->ops->view = VecScatterView_SSToSG;
813: to10->format = VEC_SCATTER_SEQ_GENERAL;
814: from10->format = VEC_SCATTER_SEQ_STRIDE;
815: PetscInfo(ctx->from_v,"Special case: sequential vector stride to general\n");
816: goto functionend;
817: } else {
818: PetscInt nx,ny;
819: const PetscInt *idx,*idy;
820: VecScatter_Seq_General *to11 = NULL,*from11 = NULL;
821: PetscBool idnx,idny;
823: ISGetLocalSize(ix,&nx);
824: ISGetLocalSize(iy,&ny);
825: if (nx != ny) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match, in %D out %D",nx,ny);
827: ISIdentity(ix,&idnx);
828: ISIdentity(iy,&idny);
829: if (idnx && idny) {
830: VecScatter_Seq_Stride *to13 = NULL,*from13 = NULL;
831: PetscMalloc2(1,&to13,1,&from13);
832: to13->n = nx;
833: to13->first = 0;
834: to13->step = 1;
835: from13->n = nx;
836: from13->first = 0;
837: from13->step = 1;
838: to13->format = VEC_SCATTER_SEQ_STRIDE;
839: from13->format = VEC_SCATTER_SEQ_STRIDE;
840: ctx->todata = (void*)to13;
841: ctx->fromdata = (void*)from13;
842: ctx->ops->begin = VecScatterBegin_SSToSS;
843: ctx->ops->end = NULL;
844: ctx->ops->destroy = VecScatterDestroy_SSToSS;
845: ctx->ops->copy = VecScatterCopy_SSToSS;
846: ctx->ops->view = VecScatterView_SSToSS;
847: PetscInfo(ctx->from_v,"Special case: sequential copy\n");
848: goto functionend;
849: }
851: ISGetIndices(iy,&idy);
852: ISGetIndices(ix,&idx);
853: PetscMalloc2(1,&to11,1,&from11);
854: PetscMalloc2(nx,&to11->vslots,nx,&from11->vslots);
855: to11->n = nx;
856: VecScatterCheckIndices_Private(ctx->to_n,ny,idy);
857: PetscArraycpy(to11->vslots,idy,nx);
858: from11->n = nx;
859: VecScatterCheckIndices_Private(ctx->from_n,nx,idx);
860: PetscArraycpy(from11->vslots,idx,nx);
861: to11->format = VEC_SCATTER_SEQ_GENERAL;
862: from11->format = VEC_SCATTER_SEQ_GENERAL;
863: ctx->todata = (void*)to11;
864: ctx->fromdata = (void*)from11;
865: ctx->ops->begin = VecScatterBegin_SGToSG;
866: ctx->ops->end = NULL;
867: ctx->ops->destroy = VecScatterDestroy_SGToSG;
868: ctx->ops->copy = VecScatterCopy_SGToSG;
869: ctx->ops->view = VecScatterView_SGToSG;
870: VecScatterMemcpyPlanCreate_SGToSG(1,to11,from11);
871: ISRestoreIndices(ix,&idx);
872: ISRestoreIndices(iy,&idy);
873: PetscInfo(ctx->from_v,"Sequential vector scatter with block indices\n");
874: goto functionend;
875: }
876: functionend:
877: ISDestroy(&tix);
878: ISDestroy(&tiy);
879: VecScatterViewFromOptions(ctx,NULL,"-vecscatter_view");
880: return(0);
881: }
883: PetscErrorCode VecScatterCreate_Seq(VecScatter ctx)
884: {
885: PetscErrorCode ierr;
888: ctx->ops->setup = VecScatterSetUp_Seq;
889: PetscObjectChangeTypeName((PetscObject)ctx,VECSCATTERSEQ);
890: return(0);
891: }