Actual source code: seqvscat.c
petsc-3.11.4 2019-09-28
2: /*
3: Code for creating scatters between vectors. This file
4: includes the code for scattering between sequential vectors and
5: some special cases for parallel scatters.
6: */
8: #include <petsc/private/vecscatterimpl.h>
10: #if defined(PETSC_HAVE_CUDA)
11: #include <../src/vec/vec/impls/seq/seqcuda/cudavecimpl.h>
12: #endif
14: #if defined(PETSC_USE_DEBUG)
15: /*
16: Checks if any indices are less than zero and generates an error
17: */
18: static PetscErrorCode VecScatterCheckIndices_Private(PetscInt nmax,PetscInt n,const PetscInt *idx)
19: {
20: PetscInt i;
23: for (i=0; i<n; i++) {
24: if (idx[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative index %D at %D location",idx[i],i);
25: if (idx[i] >= nmax) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index %D at %D location greater than max %D",idx[i],i,nmax);
26: }
27: return(0);
28: }
29: #endif
31: PetscErrorCode VecScatterDestroy_SGToSG(VecScatter ctx)
32: {
36: PetscFree2(((VecScatter_Seq_General*)ctx->todata)->vslots,((VecScatter_Seq_General*)ctx->fromdata)->vslots);
37: VecScatterMemcpyPlanDestroy(&((VecScatter_Seq_General*)ctx->fromdata)->memcpy_plan);;
38: VecScatterMemcpyPlanDestroy(&((VecScatter_Seq_General*)ctx->todata)->memcpy_plan);;
39: PetscFree2(ctx->todata,ctx->fromdata);
40: return(0);
41: }
43: PetscErrorCode VecScatterDestroy_SGToSS(VecScatter ctx)
44: {
48: PetscFree(((VecScatter_Seq_General*)ctx->fromdata)->vslots);
49: VecScatterMemcpyPlanDestroy(&((VecScatter_Seq_General*)ctx->fromdata)->memcpy_plan);;
50: PetscFree2(ctx->todata,ctx->fromdata);
51: return(0);
52: }
54: PetscErrorCode VecScatterDestroy_SSToSG(VecScatter ctx)
55: {
59: PetscFree(((VecScatter_Seq_General*)ctx->todata)->vslots);
60: VecScatterMemcpyPlanDestroy(&((VecScatter_Seq_General*)ctx->todata)->memcpy_plan);;
61: PetscFree2(ctx->todata,ctx->fromdata);
62: return(0);
63: }
65: PetscErrorCode VecScatterDestroy_SSToSS(VecScatter ctx)
66: {
70: PetscFree2(ctx->todata,ctx->fromdata);
71: return(0);
72: }
74: /* --------------------------------------------------------------------------------------*/
75: /*
76: Scatter: sequential general to sequential general
77: */
78: PetscErrorCode VecScatterBegin_SGToSG(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
79: {
80: VecScatter_Seq_General *gen_to = (VecScatter_Seq_General*)ctx->todata;
81: VecScatter_Seq_General *gen_from = (VecScatter_Seq_General*)ctx->fromdata;
82: PetscErrorCode ierr;
83: PetscInt i,n = gen_from->n,*fslots,*tslots;
84: PetscScalar *xv,*yv;
85: #if defined(PETSC_HAVE_CUDA)
86: PetscBool is_veccuda;
87: #endif
90: #if defined(PETSC_HAVE_CUDA)
91: PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
92: if (is_veccuda && x->valid_GPU_array == PETSC_OFFLOAD_GPU) {
93: /* create the scatter indices if not done already */
94: if (!ctx->spptr) {
95: PetscInt tofirst = 0,tostep = 0,fromfirst = 0,fromstep = 0;
96: fslots = gen_from->vslots;
97: tslots = gen_to->vslots;
98: VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
99: }
100: /* next do the scatter */
101: VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
102: return(0);
103: }
104: #endif
106: VecGetArrayPair(x,y,&xv,&yv);
107: if (mode & SCATTER_REVERSE) {
108: gen_to = (VecScatter_Seq_General*)ctx->fromdata;
109: gen_from = (VecScatter_Seq_General*)ctx->todata;
110: }
111: fslots = gen_from->vslots;
112: tslots = gen_to->vslots;
114: if (gen_from->memcpy_plan.optimized[0]) { VecScatterMemcpyPlanExecute_Scatter(0,xv,&gen_from->memcpy_plan,yv,&gen_to->memcpy_plan,addv); }
115: else if (addv == INSERT_VALUES) { for (i=0; i<n; i++) yv[tslots[i]] = xv[fslots[i]]; }
116: else if (addv == ADD_VALUES) { for (i=0; i<n; i++) yv[tslots[i]] += xv[fslots[i]]; }
117: #if !defined(PETSC_USE_COMPLEX)
118: else if (addv == MAX_VALUES) { for (i=0; i<n; i++) yv[tslots[i]] = PetscMax(yv[tslots[i]],xv[fslots[i]]); }
119: #endif
120: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
121: VecRestoreArrayPair(x,y,&xv,&yv);
122: return(0);
123: }
125: /*
126: Scatter: sequential general to sequential stride 1
127: */
128: PetscErrorCode VecScatterBegin_SGToSS_Stride1(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
129: {
130: VecScatter_Seq_Stride *gen_to = (VecScatter_Seq_Stride*)ctx->todata;
131: VecScatter_Seq_General *gen_from = (VecScatter_Seq_General*)ctx->fromdata;
132: PetscInt i,n = gen_from->n,*fslots = gen_from->vslots;
133: PetscErrorCode ierr;
134: PetscInt first = gen_to->first;
135: PetscScalar *xv,*yv;
136: #if defined(PETSC_HAVE_CUDA)
137: PetscBool is_veccuda;
138: #endif
141: #if defined(PETSC_HAVE_CUDA)
142: PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
143: if (is_veccuda && x->valid_GPU_array == PETSC_OFFLOAD_GPU) {
144: /* create the scatter indices if not done already */
145: if (!ctx->spptr) {
146: PetscInt tofirst = first,tostep = 1,fromfirst = 0,fromstep = 0;
147: PetscInt *tslots = 0;
148: VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
149: }
150: /* next do the scatter */
151: VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
152: return(0);
153: }
154: #endif
156: VecGetArrayPair(x,y,&xv,&yv);
157: if (mode & SCATTER_REVERSE) {
158: PetscScalar *xxv = xv + first;
159: if (gen_from->memcpy_plan.optimized[0]) { VecScatterMemcpyPlanExecute_Unpack(0,xxv,yv,&gen_from->memcpy_plan,addv,1); }
160: else if (addv == INSERT_VALUES) { for (i=0; i<n; i++) yv[fslots[i]] = xxv[i]; }
161: else if (addv == ADD_VALUES) { for (i=0; i<n; i++) yv[fslots[i]] += xxv[i]; }
162: #if !defined(PETSC_USE_COMPLEX)
163: else if (addv == MAX_VALUES) { for (i=0; i<n; i++) yv[fslots[i]] = PetscMax(yv[fslots[i]],xxv[i]); }
164: #endif
165: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
166: } else {
167: PetscScalar *yyv = yv + first;
168: if (gen_from->memcpy_plan.optimized[0]) { VecScatterMemcpyPlanExecute_Pack(0,xv,&gen_from->memcpy_plan,yyv,addv,1); }
169: else if (addv == INSERT_VALUES) { for (i=0; i<n; i++) yyv[i] = xv[fslots[i]]; }
170: else if (addv == ADD_VALUES) { for (i=0; i<n; i++) yyv[i] += xv[fslots[i]]; }
171: #if !defined(PETSC_USE_COMPLEX)
172: else if (addv == MAX_VALUES) { for (i=0; i<n; i++) yyv[i] = PetscMax(yyv[i],xv[fslots[i]]); }
173: #endif
174: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
175: }
176: VecRestoreArrayPair(x,y,&xv,&yv);
177: return(0);
178: }
180: /*
181: Scatter: sequential general to sequential stride
182: */
183: PetscErrorCode VecScatterBegin_SGToSS(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
184: {
185: VecScatter_Seq_Stride *gen_to = (VecScatter_Seq_Stride*)ctx->todata;
186: VecScatter_Seq_General *gen_from = (VecScatter_Seq_General*)ctx->fromdata;
187: PetscInt i,n = gen_from->n,*fslots = gen_from->vslots;
188: PetscErrorCode ierr;
189: PetscInt first = gen_to->first,step = gen_to->step;
190: PetscScalar *xv,*yv;
191: #if defined(PETSC_HAVE_CUDA)
192: PetscBool is_veccuda;
193: #endif
196: #if defined(PETSC_HAVE_CUDA)
197: PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
198: if (is_veccuda && x->valid_GPU_array == PETSC_OFFLOAD_GPU) {
199: /* create the scatter indices if not done already */
200: if (!ctx->spptr) {
201: PetscInt tofirst = first,tostep = step,fromfirst = 0,fromstep = 0;
202: PetscInt * tslots = 0;
203: VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
204: }
205: /* next do the scatter */
206: VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
207: return(0);
208: }
209: #endif
211: VecGetArrayPair(x,y,&xv,&yv);
212: if (mode & SCATTER_REVERSE) {
213: if (addv == INSERT_VALUES) {
214: for (i=0; i<n; i++) yv[fslots[i]] = xv[first + i*step];
215: } else if (addv == ADD_VALUES) {
216: for (i=0; i<n; i++) yv[fslots[i]] += xv[first + i*step];
217: #if !defined(PETSC_USE_COMPLEX)
218: } else if (addv == MAX_VALUES) {
219: for (i=0; i<n; i++) yv[fslots[i]] = PetscMax(yv[fslots[i]],xv[first + i*step]);
220: #endif
221: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
222: } else {
223: if (addv == INSERT_VALUES) {
224: for (i=0; i<n; i++) yv[first + i*step] = xv[fslots[i]];
225: } else if (addv == ADD_VALUES) {
226: for (i=0; i<n; i++) yv[first + i*step] += xv[fslots[i]];
227: #if !defined(PETSC_USE_COMPLEX)
228: } else if (addv == MAX_VALUES) {
229: for (i=0; i<n; i++) yv[first + i*step] = PetscMax(yv[first + i*step],xv[fslots[i]]);
230: #endif
231: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
232: }
233: VecRestoreArrayPair(x,y,&xv,&yv);
234: return(0);
235: }
237: /*
238: Scatter: sequential stride 1 to sequential general
239: */
240: PetscErrorCode VecScatterBegin_SSToSG_Stride1(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
241: {
242: VecScatter_Seq_Stride *gen_from = (VecScatter_Seq_Stride*)ctx->fromdata;
243: VecScatter_Seq_General *gen_to = (VecScatter_Seq_General*)ctx->todata;
244: PetscInt i,n = gen_from->n,*tslots = gen_to->vslots;
245: PetscErrorCode ierr;
246: PetscInt first = gen_from->first;
247: PetscScalar *xv,*yv;
248: #if defined(PETSC_HAVE_CUDA)
249: PetscBool is_veccuda;
250: #endif
253: #if defined(PETSC_HAVE_CUDA)
254: PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
255: if (is_veccuda && x->valid_GPU_array == PETSC_OFFLOAD_GPU) {
256: /* create the scatter indices if not done already */
257: if (!ctx->spptr) {
258: PetscInt tofirst = 0,tostep = 0,fromfirst = first,fromstep = 1;
259: PetscInt *fslots = 0;
260: VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
261: }
262: /* next do the scatter */
263: VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
264: return(0);
265: }
266: #endif
268: VecGetArrayPair(x,y,&xv,&yv);
269: if (mode & SCATTER_REVERSE) {
270: PetscScalar *yyv = yv + first;
271: if (gen_to->memcpy_plan.optimized[0]) { VecScatterMemcpyPlanExecute_Pack(0,xv,&gen_to->memcpy_plan,yyv,addv,1); }
272: else if (addv == INSERT_VALUES) { for (i=0; i<n; i++) yyv[i] = xv[tslots[i]]; }
273: else if (addv == ADD_VALUES) { for (i=0; i<n; i++) yyv[i] += xv[tslots[i]]; }
274: #if !defined(PETSC_USE_COMPLEX)
275: else if (addv == MAX_VALUES) { for (i=0; i<n; i++) yyv[i] = PetscMax(yyv[i],xv[tslots[i]]); }
276: #endif
277: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
278: } else {
279: PetscScalar *xxv = xv + first;
280: if (gen_to->memcpy_plan.optimized[0]) { VecScatterMemcpyPlanExecute_Unpack(0,xxv,yv,&gen_to->memcpy_plan,addv,1); }
281: else if (addv == INSERT_VALUES) { for (i=0; i<n; i++) yv[tslots[i]] = xxv[i]; }
282: else if (addv == ADD_VALUES) { for (i=0; i<n; i++) yv[tslots[i]] += xxv[i]; }
283: #if !defined(PETSC_USE_COMPLEX)
284: else if (addv == MAX_VALUES) { for (i=0; i<n; i++) yv[tslots[i]] = PetscMax(yv[tslots[i]],xxv[i]); }
285: #endif
286: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
287: }
288: VecRestoreArrayPair(x,y,&xv,&yv);
289: return(0);
290: }
292: /*
293: Scatter: sequential stride to sequential general
294: */
295: PetscErrorCode VecScatterBegin_SSToSG(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
296: {
297: VecScatter_Seq_Stride *gen_from = (VecScatter_Seq_Stride*)ctx->fromdata;
298: VecScatter_Seq_General *gen_to = (VecScatter_Seq_General*)ctx->todata;
299: PetscInt i,n = gen_from->n,*tslots = gen_to->vslots;
300: PetscErrorCode ierr;
301: PetscInt first = gen_from->first,step = gen_from->step;
302: PetscScalar *xv,*yv;
303: #if defined(PETSC_HAVE_CUDA)
304: PetscBool is_veccuda;
305: #endif
308: #if defined(PETSC_HAVE_CUDA)
309: PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
310: if (is_veccuda && x->valid_GPU_array == PETSC_OFFLOAD_GPU) {
311: /* create the scatter indices if not done already */
312: if (!ctx->spptr) {
313: PetscInt tofirst = 0,tostep = 0,fromfirst = first,fromstep = step;
314: PetscInt *fslots = 0;
315: VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
316: }
317: /* next do the scatter */
318: VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
319: return(0);
320: }
321: #endif
323: VecGetArrayPair(x,y,&xv,&yv);
324: if (mode & SCATTER_REVERSE) {
325: if (addv == INSERT_VALUES) {
326: for (i=0; i<n; i++) yv[first + i*step] = xv[tslots[i]];
327: } else if (addv == ADD_VALUES) {
328: for (i=0; i<n; i++) yv[first + i*step] += xv[tslots[i]];
329: #if !defined(PETSC_USE_COMPLEX)
330: } else if (addv == MAX_VALUES) {
331: for (i=0; i<n; i++) yv[first + i*step] = PetscMax(yv[first + i*step],xv[tslots[i]]);
332: #endif
333: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
334: } else {
335: if (addv == INSERT_VALUES) {
336: for (i=0; i<n; i++) yv[tslots[i]] = xv[first + i*step];
337: } else if (addv == ADD_VALUES) {
338: for (i=0; i<n; i++) yv[tslots[i]] += xv[first + i*step];
339: #if !defined(PETSC_USE_COMPLEX)
340: } else if (addv == MAX_VALUES) {
341: for (i=0; i<n; i++) yv[tslots[i]] = PetscMax(yv[tslots[i]],xv[first + i*step]);
342: #endif
343: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
344: }
345: VecRestoreArrayPair(x,y,&xv,&yv);
346: return(0);
347: }
349: PetscErrorCode VecScatterView_SSToSG(VecScatter in,PetscViewer viewer)
350: {
351: PetscErrorCode ierr;
352: VecScatter_Seq_Stride *in_from = (VecScatter_Seq_Stride*)in->fromdata;
353: VecScatter_Seq_General *in_to = (VecScatter_Seq_General*)in->todata;
354: PetscInt i;
355: PetscBool isascii;
358: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
359: if (isascii) {
360: PetscViewerASCIIPrintf(viewer,"Sequential stride to general scatter\n");
361: for (i=0; i<in_to->n; i++) {
362: PetscViewerASCIIPrintf(viewer,"%D to %D\n",in_from->first + in_from->step*i,in_to->vslots[i]);
363: }
364: if (in_to->memcpy_plan.optimized[0]) {
365: PetscViewerASCIIPrintf(viewer,"This stride1 to general scatter is made of %D copies\n",in_to->memcpy_plan.copy_offsets[1]);
366: }
367: }
368: return(0);
369: }
370: /*
371: Scatter: sequential stride to sequential stride
372: */
373: PetscErrorCode VecScatterBegin_SSToSS(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
374: {
375: VecScatter_Seq_Stride *gen_to = (VecScatter_Seq_Stride*)ctx->todata;
376: VecScatter_Seq_Stride *gen_from = (VecScatter_Seq_Stride*)ctx->fromdata;
377: PetscInt i,n = gen_from->n,to_first = gen_to->first,to_step = gen_to->step;
378: PetscErrorCode ierr;
379: PetscInt from_first = gen_from->first,from_step = gen_from->step;
380: PetscScalar *xv,*yv;
381: #if defined(PETSC_HAVE_CUDA)
382: PetscBool is_veccuda;
383: #endif
386: #if defined(PETSC_HAVE_CUDA)
387: PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
388: if (is_veccuda && x->valid_GPU_array == PETSC_OFFLOAD_GPU) {
389: /* create the scatter indices if not done already */
390: if (!ctx->spptr) {
391: PetscInt *tslots = 0,*fslots = 0;
392: VecScatterCUDAIndicesCreate_StoS(n,to_first,from_first,to_step,from_step,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
393: }
394: /* next do the scatter */
395: VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
396: return(0);
397: }
398: #endif
400: VecGetArrayPair(x,y,&xv,&yv);
401: if (mode & SCATTER_REVERSE) {
402: from_first = gen_to->first;
403: to_first = gen_from->first;
404: from_step = gen_to->step;
405: to_step = gen_from->step;
406: }
408: if (addv == INSERT_VALUES) {
409: if (to_step == 1 && from_step == 1) {
410: PetscMemcpy(yv+to_first,xv+from_first,n*sizeof(PetscScalar));
411: } else {
412: for (i=0; i<n; i++) yv[to_first + i*to_step] = xv[from_first+i*from_step];
413: }
414: } else if (addv == ADD_VALUES) {
415: if (to_step == 1 && from_step == 1) {
416: PetscScalar *yyv = yv + to_first, *xxv = xv + from_first;
417: for (i=0; i<n; i++) yyv[i] += xxv[i];
418: } else {
419: for (i=0; i<n; i++) yv[to_first + i*to_step] += xv[from_first+i*from_step];
420: }
421: #if !defined(PETSC_USE_COMPLEX)
422: } else if (addv == MAX_VALUES) {
423: if (to_step == 1 && from_step == 1) {
424: PetscScalar *yyv = yv + to_first, *xxv = xv + from_first;
425: for (i=0; i<n; i++) yyv[i] = PetscMax(yyv[i],xxv[i]);
426: } else {
427: for (i=0; i<n; i++) yv[to_first + i*to_step] = PetscMax(yv[to_first + i*to_step],xv[from_first+i*from_step]);
428: }
429: #endif
430: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
431: VecRestoreArrayPair(x,y,&xv,&yv);
432: return(0);
433: }
435: /* --------------------------------------------------------------------------*/
438: PetscErrorCode VecScatterCopy_SGToSG(VecScatter in,VecScatter out)
439: {
440: PetscErrorCode ierr;
441: VecScatter_Seq_General *in_to = (VecScatter_Seq_General*)in->todata,*out_to = NULL;
442: VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata,*out_from = NULL;
445: out->ops->begin = in->ops->begin;
446: out->ops->end = in->ops->end;
447: out->ops->copy = in->ops->copy;
448: out->ops->destroy = in->ops->destroy;
449: out->ops->view = in->ops->view;
451: PetscMalloc2(1,&out_to,1,&out_from);
452: PetscMalloc2(in_to->n,&out_to->vslots,in_from->n,&out_from->vslots);
453: out_to->n = in_to->n;
454: out_to->format = in_to->format;
455: out_to->nonmatching_computed = PETSC_FALSE;
456: out_to->slots_nonmatching = 0;
457: PetscMemcpy(out_to->vslots,in_to->vslots,(out_to->n)*sizeof(PetscInt));
458: VecScatterMemcpyPlanCopy(&in_to->memcpy_plan,&out_to->memcpy_plan);
460: out_from->n = in_from->n;
461: out_from->format = in_from->format;
462: out_from->nonmatching_computed = PETSC_FALSE;
463: out_from->slots_nonmatching = 0;
464: PetscMemcpy(out_from->vslots,in_from->vslots,(out_from->n)*sizeof(PetscInt));
465: VecScatterMemcpyPlanCopy(&in_from->memcpy_plan,&out_from->memcpy_plan);
467: out->todata = (void*)out_to;
468: out->fromdata = (void*)out_from;
469: return(0);
470: }
472: PetscErrorCode VecScatterView_SGToSG(VecScatter in,PetscViewer viewer)
473: {
474: PetscErrorCode ierr;
475: VecScatter_Seq_General *in_to = (VecScatter_Seq_General*)in->todata;
476: VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata;
477: PetscInt i;
478: PetscBool isascii;
481: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
482: if (isascii) {
483: PetscViewerASCIIPrintf(viewer,"Sequential general scatter\n");
484: for (i=0; i<in_to->n; i++) {
485: PetscViewerASCIIPrintf(viewer,"%D to %D\n",in_from->vslots[i],in_to->vslots[i]);
486: }
487: if (in_from->memcpy_plan.optimized[0]) {
488: PetscViewerASCIIPrintf(viewer,"This general to general scatter is made of %D copies\n",in_from->memcpy_plan.copy_offsets[1]);
489: }
490: }
491: return(0);
492: }
495: PetscErrorCode VecScatterCopy_SGToSS(VecScatter in,VecScatter out)
496: {
497: PetscErrorCode ierr;
498: VecScatter_Seq_Stride *in_to = (VecScatter_Seq_Stride*)in->todata,*out_to = NULL;
499: VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata,*out_from = NULL;
502: out->ops->begin = in->ops->begin;
503: out->ops->end = in->ops->end;
504: out->ops->copy = in->ops->copy;
505: out->ops->destroy = in->ops->destroy;
506: out->ops->view = in->ops->view;
508: PetscMalloc2(1,&out_to,1,&out_from);
509: PetscMalloc1(in_from->n,&out_from->vslots);
510: out_to->n = in_to->n;
511: out_to->format = in_to->format;
512: out_to->first = in_to->first;
513: out_to->step = in_to->step;
514: out_to->format = in_to->format;
516: out_from->n = in_from->n;
517: out_from->format = in_from->format;
518: out_from->nonmatching_computed = PETSC_FALSE;
519: out_from->slots_nonmatching = 0;
520: PetscMemcpy(out_from->vslots,in_from->vslots,(out_from->n)*sizeof(PetscInt));
521: VecScatterMemcpyPlanCopy(&in_from->memcpy_plan,&out_from->memcpy_plan);
523: out->todata = (void*)out_to;
524: out->fromdata = (void*)out_from;
525: return(0);
526: }
528: PetscErrorCode VecScatterView_SGToSS(VecScatter in,PetscViewer viewer)
529: {
530: PetscErrorCode ierr;
531: VecScatter_Seq_Stride *in_to = (VecScatter_Seq_Stride*)in->todata;
532: VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata;
533: PetscInt i;
534: PetscBool isascii;
537: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
538: if (isascii) {
539: PetscViewerASCIIPrintf(viewer,"Sequential general scatter to stride\n");
540: for (i=0; i<in_to->n; i++) {
541: PetscViewerASCIIPrintf(viewer,"%D to %D\n",in_from->vslots[i],in_to->first + in_to->step*i);
542: }
543: if (in_from->memcpy_plan.optimized[0]) {
544: PetscViewerASCIIPrintf(viewer,"This general to stride1 scatter is made of %D copies\n",in_from->memcpy_plan.copy_offsets[1]);
545: }
546: }
547: return(0);
548: }
550: /* --------------------------------------------------------------------------*/
551: /*
552: Scatter: parallel to sequential vector, sequential strides for both.
553: */
554: PetscErrorCode VecScatterCopy_SSToSS(VecScatter in,VecScatter out)
555: {
556: VecScatter_Seq_Stride *in_to = (VecScatter_Seq_Stride*)in->todata,*out_to = NULL;
557: VecScatter_Seq_Stride *in_from = (VecScatter_Seq_Stride*)in->fromdata,*out_from = NULL;
558: PetscErrorCode ierr;
561: out->ops->begin = in->ops->begin;
562: out->ops->end = in->ops->end;
563: out->ops->copy = in->ops->copy;
564: out->ops->destroy = in->ops->destroy;
565: out->ops->view = in->ops->view;
567: PetscMalloc2(1,&out_to,1,&out_from);
568: out_to->n = in_to->n;
569: out_to->format = in_to->format;
570: out_to->first = in_to->first;
571: out_to->step = in_to->step;
572: out_to->format = in_to->format;
573: out_from->n = in_from->n;
574: out_from->format= in_from->format;
575: out_from->first = in_from->first;
576: out_from->step = in_from->step;
577: out_from->format= in_from->format;
578: out->todata = (void*)out_to;
579: out->fromdata = (void*)out_from;
580: return(0);
581: }
583: PetscErrorCode VecScatterView_SSToSS(VecScatter in,PetscViewer viewer)
584: {
585: VecScatter_Seq_Stride *in_to = (VecScatter_Seq_Stride*)in->todata;
586: VecScatter_Seq_Stride *in_from = (VecScatter_Seq_Stride*)in->fromdata;
587: PetscErrorCode ierr;
588: PetscBool isascii;
591: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
592: if (isascii) {
593: PetscViewerASCIIPrintf(viewer,"Sequential stride count %D start %D step to start %D stride %D\n",in_to->n,in_to->first,in_to->step,in_from->first,in_from->step);
594: }
595: return(0);
596: }
598: /* --------------------------------------------------------------------------------------*/
599: /* Create a memcpy plan for a sequential general (SG) to SG scatter */
600: PetscErrorCode VecScatterMemcpyPlanCreate_SGToSG(PetscInt bs,VecScatter_Seq_General *to,VecScatter_Seq_General *from)
601: {
602: PetscInt n = to->n,i,*to_slots = to->vslots,*from_slots = from->vslots;
603: PetscInt j,n_copies;
605: PetscBool same_copy_starts;
608: PetscMemzero(&to->memcpy_plan,sizeof(VecScatterMemcpyPlan));
609: PetscMemzero(&from->memcpy_plan,sizeof(VecScatterMemcpyPlan));
610: to->memcpy_plan.n = 1;
611: from->memcpy_plan.n = 1;
613: /* malloc and init the two fields to false and zero */
614: PetscCalloc2(1,&to->memcpy_plan.optimized,2,&to->memcpy_plan.copy_offsets);
615: PetscCalloc2(1,&from->memcpy_plan.optimized,2,&from->memcpy_plan.copy_offsets);
617: /* count number of copies, which runs from 1 to n */
618: n_copies = 1;
619: for (i=0; i<n-1; i++) {
620: if (to_slots[i]+bs != to_slots[i+1] || from_slots[i]+bs != from_slots[i+1]) n_copies++;
621: }
623: /* if average copy size >= 256 bytes, use memcpy instead of load/store */
624: if (bs*n*sizeof(PetscScalar)/n_copies >= 256) {
625: PetscMalloc2(n_copies,&to->memcpy_plan.copy_starts,n_copies,&to->memcpy_plan.copy_lengths);
626: PetscMalloc2(n_copies,&from->memcpy_plan.copy_starts,n_copies,&from->memcpy_plan.copy_lengths);
628: /* set up copy_starts[] & copy_lenghts[] of to and from */
629: to->memcpy_plan.copy_starts[0] = to_slots[0];
630: from->memcpy_plan.copy_starts[0] = from_slots[0];
632: if (n_copies != 1) { /* one copy is trival and we can save some work */
633: j = 0; /* j-th copy */
634: for (i=0; i<n-1; i++) {
635: if (to_slots[i]+bs != to_slots[i+1] || from_slots[i]+bs != from_slots[i+1]) {
636: to->memcpy_plan.copy_lengths[j] = sizeof(PetscScalar)*(to_slots[i]+bs-to->memcpy_plan.copy_starts[j]);
637: from->memcpy_plan.copy_lengths[j] = sizeof(PetscScalar)*(from_slots[i]+bs-from->memcpy_plan.copy_starts[j]);
638: to->memcpy_plan.copy_starts[j+1] = to_slots[i+1];
639: from->memcpy_plan.copy_starts[j+1] = from_slots[i+1];
640: j++;
641: }
642: }
643: }
645: /* set up copy_lengths[] of the last copy */
646: to->memcpy_plan.copy_lengths[n_copies-1] = sizeof(PetscScalar)*(to_slots[n-1]+bs-to->memcpy_plan.copy_starts[n_copies-1]);
647: from->memcpy_plan.copy_lengths[n_copies-1] = sizeof(PetscScalar)*(from_slots[n-1]+bs-from->memcpy_plan.copy_starts[n_copies-1]);
649: /* check if to and from have the same copy_starts[] values */
650: same_copy_starts = PETSC_TRUE;
651: for (i=0; i<n_copies; i++) {
652: if (to->memcpy_plan.copy_starts[i] != from->memcpy_plan.copy_starts[i]) { same_copy_starts = PETSC_FALSE; break; }
653: }
655: to->memcpy_plan.optimized[0] = PETSC_TRUE;
656: from->memcpy_plan.optimized[0] = PETSC_TRUE;
657: to->memcpy_plan.copy_offsets[1] = n_copies;
658: from->memcpy_plan.copy_offsets[1] = n_copies;
659: to->memcpy_plan.same_copy_starts = same_copy_starts;
660: from->memcpy_plan.same_copy_starts = same_copy_starts;
661: }
663: /* we do not do stride optimzation for this kind of scatter since the chance is rare. All related fields are zeroed out */
664: return(0);
665: }
667: /* -------------------------------------------------- */
668: PetscErrorCode VecScatterSetUp_Seq(VecScatter ctx)
669: {
670: PetscErrorCode ierr;
671: PetscInt ix_type=-1,iy_type=-1;
672: IS tix = NULL,tiy = NULL,ix=ctx->from_is,iy=ctx->to_is;
673: Vec xin=ctx->from_v;
676: GetInputISType_private(ctx,VEC_SEQ_ID,VEC_SEQ_ID,&ix_type,&tix,&iy_type,&tiy);
677: if (tix) ix = tix;
678: if (tiy) iy = tiy;
680: if (ix_type == IS_GENERAL_ID && iy_type == IS_GENERAL_ID) {
681: PetscInt nx,ny;
682: const PetscInt *idx,*idy;
683: VecScatter_Seq_General *to = NULL,*from = NULL;
685: ISGetLocalSize(ix,&nx);
686: ISGetLocalSize(iy,&ny);
687: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
688: ISGetIndices(ix,&idx);
689: ISGetIndices(iy,&idy);
690: PetscMalloc2(1,&to,1,&from);
691: PetscMalloc2(nx,&to->vslots,nx,&from->vslots);
692: to->n = nx;
693: #if defined(PETSC_USE_DEBUG)
694: VecScatterCheckIndices_Private(ctx->to_n,ny,idy);
695: #endif
696: PetscMemcpy(to->vslots,idy,nx*sizeof(PetscInt));
697: from->n = nx;
698: #if defined(PETSC_USE_DEBUG)
699: VecScatterCheckIndices_Private(ctx->from_n,nx,idx);
700: #endif
701: PetscMemcpy(from->vslots,idx,nx*sizeof(PetscInt));
702: to->format = VEC_SCATTER_SEQ_GENERAL;
703: from->format = VEC_SCATTER_SEQ_GENERAL;
704: ctx->todata = (void*)to;
705: ctx->fromdata = (void*)from;
706: VecScatterMemcpyPlanCreate_SGToSG(1,to,from);
707: ctx->ops->begin = VecScatterBegin_SGToSG;
708: ctx->ops->end = 0;
709: ctx->ops->destroy = VecScatterDestroy_SGToSG;
710: ctx->ops->copy = VecScatterCopy_SGToSG;
711: ctx->ops->view = VecScatterView_SGToSG;
712: PetscInfo(xin,"Special case: sequential vector general scatter\n");
713: goto functionend;
714: } else if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
715: PetscInt nx,ny,to_first,to_step,from_first,from_step;
716: VecScatter_Seq_Stride *from8 = NULL,*to8 = NULL;
718: ISGetLocalSize(ix,&nx);
719: ISGetLocalSize(iy,&ny);
720: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
721: ISStrideGetInfo(iy,&to_first,&to_step);
722: ISStrideGetInfo(ix,&from_first,&from_step);
723: PetscMalloc2(1,&to8,1,&from8);
724: to8->n = nx;
725: to8->first = to_first;
726: to8->step = to_step;
727: from8->n = nx;
728: from8->first = from_first;
729: from8->step = from_step;
730: to8->format = VEC_SCATTER_SEQ_STRIDE;
731: from8->format = VEC_SCATTER_SEQ_STRIDE;
732: ctx->todata = (void*)to8;
733: ctx->fromdata = (void*)from8;
734: ctx->ops->begin = VecScatterBegin_SSToSS;
735: ctx->ops->end = 0;
736: ctx->ops->destroy = VecScatterDestroy_SSToSS;
737: ctx->ops->copy = VecScatterCopy_SSToSS;
738: ctx->ops->view = VecScatterView_SSToSS;
739: PetscInfo(xin,"Special case: sequential vector stride to stride\n");
740: goto functionend;
741: } else if (ix_type == IS_GENERAL_ID && iy_type == IS_STRIDE_ID) {
742: PetscInt nx,ny,first,step;
743: const PetscInt *idx;
744: VecScatter_Seq_General *from9 = NULL;
745: VecScatter_Seq_Stride *to9 = NULL;
747: ISGetLocalSize(ix,&nx);
748: ISGetIndices(ix,&idx);
749: ISGetLocalSize(iy,&ny);
750: ISStrideGetInfo(iy,&first,&step);
751: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
752: PetscMalloc2(1,&to9,1,&from9);
753: PetscMemzero(&from9->memcpy_plan,sizeof(VecScatterMemcpyPlan));
754: PetscMalloc1(nx,&from9->vslots);
755: to9->n = nx;
756: to9->first = first;
757: to9->step = step;
758: from9->n = nx;
759: #if defined(PETSC_USE_DEBUG)
760: VecScatterCheckIndices_Private(ctx->from_n,nx,idx);
761: #endif
762: PetscMemcpy(from9->vslots,idx,nx*sizeof(PetscInt));
763: ctx->todata = (void*)to9; ctx->fromdata = (void*)from9;
764: if (step == 1) {
765: PetscInt tmp[2];
766: tmp[0] = 0; tmp[1] = nx;
767: VecScatterMemcpyPlanCreate_Index(1,tmp,from9->vslots,1,&from9->memcpy_plan);
768: ctx->ops->begin = VecScatterBegin_SGToSS_Stride1;
769: } else {
770: ctx->ops->begin = VecScatterBegin_SGToSS;
771: }
772: ctx->ops->destroy = VecScatterDestroy_SGToSS;
773: ctx->ops->end = 0;
774: ctx->ops->copy = VecScatterCopy_SGToSS;
775: ctx->ops->view = VecScatterView_SGToSS;
776: to9->format = VEC_SCATTER_SEQ_STRIDE;
777: from9->format = VEC_SCATTER_SEQ_GENERAL;
778: PetscInfo(xin,"Special case: sequential vector general to stride\n");
779: goto functionend;
780: } else if (ix_type == IS_STRIDE_ID && iy_type == IS_GENERAL_ID) {
781: PetscInt nx,ny,first,step;
782: const PetscInt *idy;
783: VecScatter_Seq_General *to10 = NULL;
784: VecScatter_Seq_Stride *from10 = NULL;
786: ISGetLocalSize(ix,&nx);
787: ISGetIndices(iy,&idy);
788: ISGetLocalSize(iy,&ny);
789: ISStrideGetInfo(ix,&first,&step);
790: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
791: PetscMalloc2(1,&to10,1,&from10);
792: PetscMemzero(&to10->memcpy_plan,sizeof(VecScatterMemcpyPlan));
793: PetscMalloc1(nx,&to10->vslots);
794: from10->n = nx;
795: from10->first = first;
796: from10->step = step;
797: to10->n = nx;
798: #if defined(PETSC_USE_DEBUG)
799: VecScatterCheckIndices_Private(ctx->to_n,ny,idy);
800: #endif
801: PetscMemcpy(to10->vslots,idy,nx*sizeof(PetscInt));
802: ctx->todata = (void*)to10;
803: ctx->fromdata = (void*)from10;
804: if (step == 1) {
805: PetscInt tmp[2];
806: tmp[0] = 0; tmp[1] = nx;
807: VecScatterMemcpyPlanCreate_Index(1,tmp,to10->vslots,1,&to10->memcpy_plan);
808: ctx->ops->begin = VecScatterBegin_SSToSG_Stride1;
809: } else {
810: ctx->ops->begin = VecScatterBegin_SSToSG;
811: }
812: ctx->ops->destroy = VecScatterDestroy_SSToSG;
813: ctx->ops->end = 0;
814: ctx->ops->copy = 0;
815: ctx->ops->view = VecScatterView_SSToSG;
816: to10->format = VEC_SCATTER_SEQ_GENERAL;
817: from10->format = VEC_SCATTER_SEQ_STRIDE;
818: PetscInfo(xin,"Special case: sequential vector stride to general\n");
819: goto functionend;
820: } else {
821: PetscInt nx,ny;
822: const PetscInt *idx,*idy;
823: VecScatter_Seq_General *to11 = NULL,*from11 = NULL;
824: PetscBool idnx,idny;
826: ISGetLocalSize(ix,&nx);
827: ISGetLocalSize(iy,&ny);
828: if (nx != ny) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match, in %D out %D",nx,ny);
830: ISIdentity(ix,&idnx);
831: ISIdentity(iy,&idny);
832: if (idnx && idny) {
833: VecScatter_Seq_Stride *to13 = NULL,*from13 = NULL;
834: PetscMalloc2(1,&to13,1,&from13);
835: to13->n = nx;
836: to13->first = 0;
837: to13->step = 1;
838: from13->n = nx;
839: from13->first = 0;
840: from13->step = 1;
841: to13->format = VEC_SCATTER_SEQ_STRIDE;
842: from13->format = VEC_SCATTER_SEQ_STRIDE;
843: ctx->todata = (void*)to13;
844: ctx->fromdata = (void*)from13;
845: ctx->ops->begin = VecScatterBegin_SSToSS;
846: ctx->ops->end = 0;
847: ctx->ops->destroy = VecScatterDestroy_SSToSS;
848: ctx->ops->copy = VecScatterCopy_SSToSS;
849: ctx->ops->view = VecScatterView_SSToSS;
850: PetscInfo(xin,"Special case: sequential copy\n");
851: goto functionend;
852: }
854: ISGetIndices(iy,&idy);
855: ISGetIndices(ix,&idx);
856: PetscMalloc2(1,&to11,1,&from11);
857: PetscMalloc2(nx,&to11->vslots,nx,&from11->vslots);
858: to11->n = nx;
859: #if defined(PETSC_USE_DEBUG)
860: VecScatterCheckIndices_Private(ctx->to_n,ny,idy);
861: #endif
862: PetscMemcpy(to11->vslots,idy,nx*sizeof(PetscInt));
863: from11->n = nx;
864: #if defined(PETSC_USE_DEBUG)
865: VecScatterCheckIndices_Private(ctx->from_n,nx,idx);
866: #endif
867: PetscMemcpy(from11->vslots,idx,nx*sizeof(PetscInt));
868: to11->format = VEC_SCATTER_SEQ_GENERAL;
869: from11->format = VEC_SCATTER_SEQ_GENERAL;
870: ctx->todata = (void*)to11;
871: ctx->fromdata = (void*)from11;
872: ctx->ops->begin = VecScatterBegin_SGToSG;
873: ctx->ops->end = 0;
874: ctx->ops->destroy = VecScatterDestroy_SGToSG;
875: ctx->ops->copy = VecScatterCopy_SGToSG;
876: ctx->ops->view = VecScatterView_SGToSG;
877: VecScatterMemcpyPlanCreate_SGToSG(1,to11,from11);
878: ISRestoreIndices(ix,&idx);
879: ISRestoreIndices(iy,&idy);
880: PetscInfo(xin,"Sequential vector scatter with block indices\n");
881: goto functionend;
882: }
883: functionend:
884: ISDestroy(&tix);
885: ISDestroy(&tiy);
886: VecScatterViewFromOptions(ctx,NULL,"-vecscatter_view");
887: return(0);
888: }
890: PetscErrorCode VecScatterCreate_Seq(VecScatter ctx)
891: {
892: PetscErrorCode ierr;
895: ctx->ops->setup = VecScatterSetUp_Seq;
896: PetscObjectChangeTypeName((PetscObject)ctx,VECSCATTERSEQ);
897: return(0);
898: }