Actual source code: vscat.c
petsc-3.14.6 2021-03-30
2: /*
3: Higher level code for creating scatters between vectors called by some of the implementations.
5: The routines check for special cases and then call the implementation function for the general cases
6: */
8: #include <petsc/private/vecscatterimpl.h>
10: #if defined(PETSC_HAVE_CUDA)
11: #include <petsc/private/cudavecimpl.h>
12: #endif
14: /*
15: This is special scatter code for when the entire parallel vector is copied to each processor.
17: This code was written by Cameron Cooper, Occidental College, Fall 1995,
18: will working at ANL as a SERS student.
19: */
20: static PetscErrorCode VecScatterBegin_MPI_ToAll(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
21: {
23: PetscInt yy_n,xx_n;
24: PetscScalar *xv,*yv;
27: VecGetLocalSize(y,&yy_n);
28: VecGetLocalSize(x,&xx_n);
29: VecGetArrayPair(x,y,&xv,&yv);
31: if (mode & SCATTER_REVERSE) {
32: PetscScalar *xvt,*xvt2;
33: VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
34: PetscInt i;
35: PetscMPIInt *disply = scat->displx;
37: if (addv == INSERT_VALUES) {
38: PetscInt rstart,rend;
39: /*
40: copy the correct part of the local vector into the local storage of
41: the MPI one. Note: this operation only makes sense if all the local
42: vectors have the same values
43: */
44: VecGetOwnershipRange(y,&rstart,&rend);
45: PetscArraycpy(yv,xv+rstart,yy_n);
46: } else {
47: MPI_Comm comm;
48: PetscMPIInt rank;
49: PetscObjectGetComm((PetscObject)y,&comm);
50: MPI_Comm_rank(comm,&rank);
51: if (scat->work1) xvt = scat->work1;
52: else {
53: PetscMalloc1(xx_n,&xvt);
54: scat->work1 = xvt;
55: }
56: if (!rank) { /* I am the zeroth processor, values are accumulated here */
57: if (scat->work2) xvt2 = scat->work2;
58: else {
59: PetscMalloc1(xx_n,&xvt2);
60: scat->work2 = xvt2;
61: }
62: MPI_Gatherv(yv,yy_n,MPIU_SCALAR,xvt2,scat->count,disply,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
63: MPI_Reduce(xv,xvt,xx_n,MPIU_SCALAR,MPIU_SUM,0,PetscObjectComm((PetscObject)ctx));
64: if (addv == ADD_VALUES) {
65: for (i=0; i<xx_n; i++) xvt[i] += xvt2[i];
66: #if !defined(PETSC_USE_COMPLEX)
67: } else if (addv == MAX_VALUES) {
68: for (i=0; i<xx_n; i++) xvt[i] = PetscMax(xvt[i],xvt2[i]);
69: #endif
70: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
71: MPI_Scatterv(xvt,scat->count,disply,MPIU_SCALAR,yv,yy_n,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
72: } else {
73: MPI_Gatherv(yv,yy_n,MPIU_SCALAR,NULL,NULL,NULL,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
74: MPI_Reduce(xv,xvt,xx_n,MPIU_SCALAR,MPIU_SUM,0,PetscObjectComm((PetscObject)ctx));
75: MPI_Scatterv(NULL,scat->count,disply,MPIU_SCALAR,yv,yy_n,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
76: }
77: }
78: } else {
79: PetscScalar *yvt;
80: VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
81: PetscInt i;
82: PetscMPIInt *displx = scat->displx;
84: if (addv == INSERT_VALUES) {
85: MPI_Allgatherv(xv,xx_n,MPIU_SCALAR,yv,scat->count,displx,MPIU_SCALAR,PetscObjectComm((PetscObject)ctx));
86: } else {
87: if (scat->work1) yvt = scat->work1;
88: else {
89: PetscMalloc1(yy_n,&yvt);
90: scat->work1 = yvt;
91: }
92: MPI_Allgatherv(xv,xx_n,MPIU_SCALAR,yvt,scat->count,displx,MPIU_SCALAR,PetscObjectComm((PetscObject)ctx));
93: if (addv == ADD_VALUES) {
94: for (i=0; i<yy_n; i++) yv[i] += yvt[i];
95: #if !defined(PETSC_USE_COMPLEX)
96: } else if (addv == MAX_VALUES) {
97: for (i=0; i<yy_n; i++) yv[i] = PetscMax(yv[i],yvt[i]);
98: #endif
99: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
100: }
101: }
102: VecRestoreArrayPair(x,y,&xv,&yv);
103: return(0);
104: }
106: PetscErrorCode VecScatterView_MPI_ToAll(VecScatter in,PetscViewer viewer)
107: {
109: PetscBool isascii;
112: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
113: if (isascii) {
114: PetscViewerASCIIPrintf(viewer,"Entire parallel vector is copied to each process\n");
115: }
116: return(0);
117: }
119: /*
120: This is special scatter code for when the entire parallel vector is copied to processor 0.
122: */
123: static PetscErrorCode VecScatterBegin_MPI_ToOne(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
124: {
125: PetscErrorCode ierr;
126: PetscMPIInt rank;
127: PetscInt yy_n,xx_n;
128: PetscScalar *yv;
129: const PetscScalar *xv;
130: MPI_Comm comm;
133: VecGetLocalSize(y,&yy_n);
134: VecGetLocalSize(x,&xx_n);
135: VecGetArrayRead(x,&xv);
136: VecGetArray(y,&yv);
138: PetscObjectGetComm((PetscObject)x,&comm);
139: MPI_Comm_rank(comm,&rank);
141: /* -------- Reverse scatter; spread from processor 0 to other processors */
142: if (mode & SCATTER_REVERSE) {
143: PetscScalar *yvt;
144: VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
145: PetscInt i;
146: PetscMPIInt *disply = scat->displx;
148: if (addv == INSERT_VALUES) {
149: MPI_Scatterv((PetscScalar*)xv,scat->count,disply,MPIU_SCALAR,yv,yy_n,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
150: } else {
151: if (scat->work2) yvt = scat->work2;
152: else {
153: PetscInt xx_nt;
154: MPI_Allreduce(&xx_n,&xx_nt,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)y));
155: PetscMalloc1(xx_nt,&yvt);
156: scat->work2 = yvt;
157: }
158: MPI_Scatterv((PetscScalar*)xv,scat->count,disply,MPIU_SCALAR,yvt,yy_n,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
159: if (addv == ADD_VALUES) {
160: for (i=0; i<yy_n; i++) yv[i] += yvt[i];
161: #if !defined(PETSC_USE_COMPLEX)
162: } else if (addv == MAX_VALUES) {
163: for (i=0; i<yy_n; i++) yv[i] = PetscMax(yv[i],yvt[i]);
164: #endif
165: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
166: }
167: /* --------- Forward scatter; gather all values onto processor 0 */
168: } else {
169: PetscScalar *yvt = NULL;
170: VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
171: PetscInt i;
172: PetscMPIInt *displx = scat->displx;
174: if (addv == INSERT_VALUES) {
175: MPI_Gatherv((PetscScalar*)xv,xx_n,MPIU_SCALAR,yv,scat->count,displx,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
176: } else {
177: if (!rank) {
178: if (scat->work1) yvt = scat->work1;
179: else {
180: PetscMalloc1(yy_n,&yvt);
181: scat->work1 = yvt;
182: }
183: }
184: MPI_Gatherv((PetscScalar*)xv,xx_n,MPIU_SCALAR,yvt,scat->count,displx,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
185: if (!rank) {
186: if (addv == ADD_VALUES) {
187: for (i=0; i<yy_n; i++) yv[i] += yvt[i];
188: #if !defined(PETSC_USE_COMPLEX)
189: } else if (addv == MAX_VALUES) {
190: for (i=0; i<yy_n; i++) yv[i] = PetscMax(yv[i],yvt[i]);
191: #endif
192: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
193: }
194: }
195: }
196: VecRestoreArrayRead(x,&xv);
197: VecRestoreArray(y,&yv);
198: return(0);
199: }
201: /*
202: The follow to are used for both VecScatterBegin_MPI_ToAll() and VecScatterBegin_MPI_ToOne()
203: */
204: static PetscErrorCode VecScatterDestroy_MPI_ToAll(VecScatter ctx)
205: {
206: VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
207: PetscErrorCode ierr;
210: PetscFree(scat->work1);
211: PetscFree(scat->work2);
212: PetscFree3(ctx->todata,scat->count,scat->displx);
213: return(0);
214: }
215: /* -------------------------------------------------------------------------*/
216: static PetscErrorCode VecScatterCopy_MPI_ToAll(VecScatter in,VecScatter out)
217: {
218: VecScatter_MPI_ToAll *in_to = (VecScatter_MPI_ToAll*)in->todata,*sto;
219: PetscErrorCode ierr;
220: PetscMPIInt size,*count,*displx;
221: PetscInt i;
224: out->ops->begin = in->ops->begin;
225: out->ops->end = in->ops->end;
226: out->ops->copy = in->ops->copy;
227: out->ops->destroy = in->ops->destroy;
228: out->ops->view = in->ops->view;
230: MPI_Comm_size(PetscObjectComm((PetscObject)out),&size);
231: PetscMalloc3(1,&sto,size,&count,size,&displx);
232: sto->format = in_to->format;
233: sto->count = count;
234: sto->displx = displx;
235: for (i=0; i<size; i++) {
236: sto->count[i] = in_to->count[i];
237: sto->displx[i] = in_to->displx[i];
238: }
239: sto->work1 = NULL;
240: sto->work2 = NULL;
241: out->todata = (void*)sto;
242: out->fromdata = (void*)0;
243: return(0);
244: }
246: /*
247: Create memcpy optimization plans based on indices of vector entries we want to scatter
249: Input Parameters:
250: + n - number of target processors
251: . starts - [n+1] for the i-th processor, its associated indices are indices[starts[i], starts[i+1])
252: . indices - [] array storing indices. Its length is starts[n+1]
253: - bs - block size
255: Output Parameters:
256: + plan - the memcpy plan
257: */
258: PetscErrorCode VecScatterMemcpyPlanCreate_Index(PetscInt n,const PetscInt *starts,const PetscInt *indices,PetscInt bs,VecScatterMemcpyPlan *plan)
259: {
261: PetscInt i,j,k,my_copies,n_copies=0,step;
262: PetscBool strided,has_strided;
265: PetscMemzero(plan,sizeof(VecScatterMemcpyPlan));
266: plan->n = n;
267: PetscMalloc2(n,&plan->optimized,n+1,&plan->copy_offsets);
269: /* check if each remote part of the scatter is made of copies, and count total_copies */
270: for (i=0; i<n; i++) { /* for each target processor procs[i] */
271: my_copies = 1; /* count num. of copies for procs[i] */
272: for (j=starts[i]; j<starts[i+1]-1; j++) { /* go through indices from the first to the second to last */
273: if (indices[j]+bs != indices[j+1]) my_copies++;
274: }
275: if (bs*(starts[i+1]-starts[i])*sizeof(PetscScalar)/my_copies >= 256) { /* worth using memcpy? */
276: plan->optimized[i] = PETSC_TRUE;
277: n_copies += my_copies;
278: } else {
279: plan->optimized[i] = PETSC_FALSE;
280: }
281: }
283: /* do malloc with the recently known n_copies */
284: PetscMalloc2(n_copies,&plan->copy_starts,n_copies,&plan->copy_lengths);
286: /* analyze the total_copies one by one */
287: k = 0; /* k-th copy */
288: plan->copy_offsets[0] = 0;
289: for (i=0; i<n; i++) { /* for each target processor procs[i] */
290: if (plan->optimized[i]) {
291: my_copies = 1;
292: plan->copy_starts[k] = indices[starts[i]];
293: for (j=starts[i]; j<starts[i+1]-1; j++) {
294: if (indices[j]+bs != indices[j+1]) { /* meet end of a copy (and next copy must exist) */
295: my_copies++;
296: plan->copy_starts[k+1] = indices[j+1];
297: plan->copy_lengths[k] = indices[j]+bs-plan->copy_starts[k];
298: k++;
299: }
300: }
301: /* set copy length of the last copy for this remote proc */
302: plan->copy_lengths[k] = indices[j]+bs-plan->copy_starts[k];
303: k++;
304: }
306: /* set offset for next proc. When optimized[i] is false, copy_offsets[i] = copy_offsets[i+1] */
307: plan->copy_offsets[i+1] = k;
308: }
310: /* try the last chance to optimize. If a scatter is not memory copies, then is it strided? */
311: has_strided = PETSC_FALSE;
312: PetscMalloc3(n,&plan->stride_first,n,&plan->stride_step,n,&plan->stride_n);
313: for (i=0; i<n; i++) { /* for each target processor procs[i] */
314: if (!plan->optimized[i] && starts[i+1] - starts[i] >= 16) { /* few indices (<16) are not worth striding */
315: strided = PETSC_TRUE;
316: step = indices[starts[i]+1] - indices[starts[i]];
317: for (j=starts[i]; j<starts[i+1]-1; j++) {
318: if (indices[j]+step != indices[j+1]) { strided = PETSC_FALSE; break; }
319: }
320: if (strided) {
321: plan->optimized[i] = PETSC_TRUE;
322: plan->stride_first[i] = indices[starts[i]];
323: plan->stride_step[i] = step;
324: plan->stride_n[i] = starts[i+1] - starts[i];
325: has_strided = PETSC_TRUE;
326: }
327: }
328: }
329: /* if none is strided, free the arrays to save memory here and also in plan copying */
330: if (!has_strided) { PetscFree3(plan->stride_first,plan->stride_step,plan->stride_n); }
331: return(0);
332: }
334: /* Copy the memcpy plan from in to out */
335: PetscErrorCode VecScatterMemcpyPlanCopy(const VecScatterMemcpyPlan *in,VecScatterMemcpyPlan *out)
336: {
340: PetscMemzero(out,sizeof(VecScatterMemcpyPlan));
341: out->n = in->n;
342: PetscMalloc2(in->n,&out->optimized,in->n+1,&out->copy_offsets);
343: PetscMalloc2(in->copy_offsets[in->n],&out->copy_starts,in->copy_offsets[in->n],&out->copy_lengths);
344: PetscArraycpy(out->optimized,in->optimized,in->n);
345: PetscArraycpy(out->copy_offsets,in->copy_offsets,in->n+1);
346: PetscArraycpy(out->copy_starts,in->copy_starts,in->copy_offsets[in->n]);
347: PetscArraycpy(out->copy_lengths,in->copy_lengths,in->copy_offsets[in->n]);
348: if (in->stride_first) {
349: PetscMalloc3(in->n,&out->stride_first,in->n,&out->stride_step,in->n,&out->stride_n);
350: PetscArraycpy(out->stride_first,in->stride_first,in->n);
351: PetscArraycpy(out->stride_step,in->stride_step,in->n);
352: PetscArraycpy(out->stride_n,in->stride_n,in->n);
353: }
354: return(0);
355: }
357: /* Destroy the vecscatter memcpy plan */
358: PetscErrorCode VecScatterMemcpyPlanDestroy(VecScatterMemcpyPlan *plan)
359: {
363: PetscFree2(plan->optimized,plan->copy_offsets);
364: PetscFree2(plan->copy_starts,plan->copy_lengths);
365: PetscFree3(plan->stride_first,plan->stride_step,plan->stride_n);
366: return(0);
367: }
369: /* =======================================================================*/
370: /*
371: Blocksizes we have optimized scatters for
372: */
374: #define VecScatterOptimizedBS(mbs) (2 <= mbs)
377: static PetscErrorCode VecScatterCreate_PtoS(VecScatter ctx,PetscErrorCode (*vecscattercreatelocal_ptos)(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter))
378: {
379: PetscErrorCode ierr;
380: PetscMPIInt size;
381: PetscInt ix_type=-1,iy_type=-1,*range;
382: MPI_Comm comm;
383: IS tix=NULL,tiy=NULL,ix=ctx->from_is,iy=ctx->to_is;
384: Vec xin=ctx->from_v,yin=ctx->to_v;
385: PetscBool totalv,ixblock,iyblock,iystride,islocal,cando;
388: PetscObjectGetComm((PetscObject)ctx,&comm);
389: GetInputISType_private(ctx,VEC_MPI_ID,VEC_SEQ_ID,&ix_type,&tix,&iy_type,&tiy);
390: if (tix) ix = tix;
391: if (tiy) iy = tiy;
393: islocal = PETSC_FALSE;
394: /* special case extracting (subset of) local portion */
395: if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
396: /* Case (2-a) */
397: PetscInt nx,ny,to_first,to_step,from_first,from_step;
398: PetscInt start,end,min,max;
399: VecScatter_Seq_Stride *from12 = NULL,*to12 = NULL;
401: VecGetOwnershipRange(xin,&start,&end);
402: ISGetLocalSize(ix,&nx);
403: ISStrideGetInfo(ix,&from_first,&from_step);
404: ISGetLocalSize(iy,&ny);
405: ISStrideGetInfo(iy,&to_first,&to_step);
406: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
407: ISGetMinMax(ix,&min,&max);
408: if (min >= start && max < end) islocal = PETSC_TRUE;
409: else islocal = PETSC_FALSE;
410: /* cannot use MPIU_Allreduce() since this call matches with the MPI_Allreduce() in the else statement below */
411: MPI_Allreduce(&islocal,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
412: if (cando) {
413: PetscMalloc2(1,&to12,1,&from12);
414: to12->n = nx;
415: to12->first = to_first;
416: to12->step = to_step;
417: from12->n = nx;
418: from12->first = from_first-start;
419: from12->step = from_step;
420: to12->format = VEC_SCATTER_SEQ_STRIDE;
421: from12->format = VEC_SCATTER_SEQ_STRIDE;
422: ctx->todata = (void*)to12;
423: ctx->fromdata = (void*)from12;
424: ctx->ops->begin = VecScatterBegin_SSToSS;
425: ctx->ops->end = NULL;
426: ctx->ops->destroy = VecScatterDestroy_SSToSS;
427: ctx->ops->copy = VecScatterCopy_SSToSS;
428: ctx->ops->view = VecScatterView_SSToSS;
429: PetscInfo(xin,"Special case: processors only getting local values\n");
430: goto functionend;
431: }
432: } else {
433: MPI_Allreduce(&islocal,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
434: }
436: /* test for special case of all processors getting entire vector */
437: /* contains check that PetscMPIInt can handle the sizes needed */
438: totalv = PETSC_FALSE;
439: if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
440: /* Case (2-b) */
441: PetscInt i,nx,ny,to_first,to_step,from_first,from_step,N;
442: PetscMPIInt *count = NULL,*displx;
443: VecScatter_MPI_ToAll *sto = NULL;
445: ISGetLocalSize(ix,&nx);
446: ISStrideGetInfo(ix,&from_first,&from_step);
447: ISGetLocalSize(iy,&ny);
448: ISStrideGetInfo(iy,&to_first,&to_step);
449: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
450: VecGetSize(xin,&N);
451: if (nx != N) totalv = PETSC_FALSE;
452: else if (from_first == 0 && from_step == 1 && from_first == to_first && from_step == to_step) totalv = PETSC_TRUE;
453: else totalv = PETSC_FALSE;
454: /* cannot use MPIU_Allreduce() since this call matches with the MPI_Allreduce() in the else statement below */
455: MPI_Allreduce(&totalv,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
457: #if defined(PETSC_USE_64BIT_INDICES)
458: if (cando && (yin->map->N < PETSC_MPI_INT_MAX)) {
459: #else
460: if (cando) {
461: #endif
462: MPI_Comm_size(PetscObjectComm((PetscObject)ctx),&size);
463: PetscMalloc3(1,&sto,size,&count,size,&displx);
464: range = xin->map->range;
465: for (i=0; i<size; i++) {
466: PetscMPIIntCast(range[i+1] - range[i],count+i);
467: PetscMPIIntCast(range[i],displx+i);
468: }
469: sto->count = count;
470: sto->displx = displx;
471: sto->work1 = NULL;
472: sto->work2 = NULL;
473: sto->format = VEC_SCATTER_MPI_TOALL;
474: ctx->todata = (void*)sto;
475: ctx->fromdata = NULL;
476: ctx->ops->begin = VecScatterBegin_MPI_ToAll;
477: ctx->ops->end = NULL;
478: ctx->ops->destroy = VecScatterDestroy_MPI_ToAll;
479: ctx->ops->copy = VecScatterCopy_MPI_ToAll;
480: ctx->ops->view = VecScatterView_MPI_ToAll;
481: PetscInfo(xin,"Special case: all processors get entire parallel vector\n");
482: goto functionend;
483: }
484: } else {
485: MPI_Allreduce(&totalv,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
486: }
488: /* test for special case of processor 0 getting entire vector */
489: /* contains check that PetscMPIInt can handle the sizes needed */
490: totalv = PETSC_FALSE;
491: if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
492: /* Case (2-c) */
493: PetscInt i,nx,ny,to_first,to_step,from_first,from_step,N;
494: PetscMPIInt rank,*count = NULL,*displx;
495: VecScatter_MPI_ToAll *sto = NULL;
497: PetscObjectGetComm((PetscObject)xin,&comm);
498: MPI_Comm_rank(comm,&rank);
499: ISGetLocalSize(ix,&nx);
500: ISStrideGetInfo(ix,&from_first,&from_step);
501: ISGetLocalSize(iy,&ny);
502: ISStrideGetInfo(iy,&to_first,&to_step);
503: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
504: if (!rank) {
505: VecGetSize(xin,&N);
506: if (nx != N) totalv = PETSC_FALSE;
507: else if (from_first == 0 && from_step == 1 &&
508: from_first == to_first && from_step == to_step) totalv = PETSC_TRUE;
509: else totalv = PETSC_FALSE;
510: } else {
511: if (!nx) totalv = PETSC_TRUE;
512: else totalv = PETSC_FALSE;
513: }
514: /* cannot use MPIU_Allreduce() since this call matches with the MPI_Allreduce() in the else statement below */
515: MPI_Allreduce(&totalv,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
517: #if defined(PETSC_USE_64BIT_INDICES)
518: if (cando && (yin->map->N < PETSC_MPI_INT_MAX)) {
519: #else
520: if (cando) {
521: #endif
522: MPI_Comm_size(PetscObjectComm((PetscObject)ctx),&size);
523: PetscMalloc3(1,&sto,size,&count,size,&displx);
524: range = xin->map->range;
525: for (i=0; i<size; i++) {
526: PetscMPIIntCast(range[i+1] - range[i],count+i);
527: PetscMPIIntCast(range[i],displx+i);
528: }
529: sto->count = count;
530: sto->displx = displx;
531: sto->work1 = NULL;
532: sto->work2 = NULL;
533: sto->format = VEC_SCATTER_MPI_TOONE;
534: ctx->todata = (void*)sto;
535: ctx->fromdata = NULL;
536: ctx->ops->begin = VecScatterBegin_MPI_ToOne;
537: ctx->ops->end = NULL;
538: ctx->ops->destroy = VecScatterDestroy_MPI_ToAll;
539: ctx->ops->copy = VecScatterCopy_MPI_ToAll;
540: ctx->ops->view = VecScatterView_MPI_ToAll;
541: PetscInfo(xin,"Special case: processor zero gets entire parallel vector, rest get none\n");
542: goto functionend;
543: }
544: } else {
545: MPI_Allreduce(&totalv,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
546: }
548: /* Case 2-d */
549: PetscObjectTypeCompare((PetscObject)ix,ISBLOCK,&ixblock);
550: PetscObjectTypeCompare((PetscObject)iy,ISBLOCK,&iyblock);
551: PetscObjectTypeCompare((PetscObject)iy,ISSTRIDE,&iystride);
552: if (ixblock) {
553: /* special case block to block */
554: if (iyblock) {
555: PetscInt nx,ny,bsx,bsy,min,max;
556: const PetscInt *idx,*idy;
557: ISGetBlockSize(iy,&bsy);
558: ISGetBlockSize(ix,&bsx);
559: min = PetscMin(bsx,bsy);
560: max = PetscMax(bsx,bsy);
561: MPIU_Allreduce(MPI_IN_PLACE,&min,1,MPIU_INT,MPI_MIN,comm);
562: MPIU_Allreduce(MPI_IN_PLACE,&max,1,MPIU_INT,MPI_MAX,comm);
563: if (min == max && VecScatterOptimizedBS(bsx)) {
564: ISBlockGetLocalSize(ix,&nx);
565: ISBlockGetIndices(ix,&idx);
566: ISBlockGetLocalSize(iy,&ny);
567: ISBlockGetIndices(iy,&idy);
568: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
569: (*vecscattercreatelocal_ptos)(nx,idx,ny,idy,xin,yin,bsx,ctx);
570: ISBlockRestoreIndices(ix,&idx);
571: ISBlockRestoreIndices(iy,&idy);
572: PetscInfo(xin,"Special case: blocked indices\n");
573: goto functionend;
574: }
575: /* special case block to stride */
576: } else if (iystride) {
577: /* Case (2-e) */
578: PetscInt ystart,ystride,ysize,bsx;
579: ISStrideGetInfo(iy,&ystart,&ystride);
580: ISGetLocalSize(iy,&ysize);
581: ISGetBlockSize(ix,&bsx);
582: /* see if stride index set is equivalent to block index set */
583: if (VecScatterOptimizedBS(bsx) && ((ystart % bsx) == 0) && (ystride == 1) && ((ysize % bsx) == 0)) {
584: PetscInt nx,il,*idy;
585: const PetscInt *idx;
586: ISBlockGetLocalSize(ix,&nx);
587: ISBlockGetIndices(ix,&idx);
588: if (ysize != bsx*nx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
589: PetscMalloc1(nx,&idy);
590: if (nx) {
591: idy[0] = ystart/bsx;
592: for (il=1; il<nx; il++) idy[il] = idy[il-1] + 1;
593: }
594: (*vecscattercreatelocal_ptos)(nx,idx,nx,idy,xin,yin,bsx,ctx);
595: PetscFree(idy);
596: ISBlockRestoreIndices(ix,&idx);
597: PetscInfo(xin,"Special case: blocked indices to stride\n");
598: goto functionend;
599: }
600: }
601: }
603: /* left over general case (2-f) */
604: {
605: PetscInt nx,ny;
606: const PetscInt *idx,*idy;
607: ISGetLocalSize(ix,&nx);
608: ISGetIndices(ix,&idx);
609: ISGetLocalSize(iy,&ny);
610: ISGetIndices(iy,&idy);
611: if (nx != ny) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match (%D %D)",nx,ny);
612: (*vecscattercreatelocal_ptos)(nx,idx,ny,idy,xin,yin,1,ctx);
613: ISRestoreIndices(ix,&idx);
614: ISRestoreIndices(iy,&idy);
615: PetscInfo(xin,"General case: MPI to Seq\n");
616: goto functionend;
617: }
618: functionend:
619: ISDestroy(&tix);
620: ISDestroy(&tiy);
621: VecScatterViewFromOptions(ctx,NULL,"-vecscatter_view");
622: return(0);
623: }
625: static PetscErrorCode VecScatterCreate_StoP(VecScatter ctx,PetscErrorCode (*vecscattercreatelocal_stop)(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter))
626: {
627: PetscErrorCode ierr;
628: PetscInt ix_type=-1,iy_type=-1;
629: IS tix=NULL,tiy=NULL,ix=ctx->from_is,iy=ctx->to_is;
630: Vec xin=ctx->from_v,yin=ctx->to_v;
631: PetscBool islocal,cando;
634: GetInputISType_private(ctx,VEC_MPI_ID,VEC_SEQ_ID,&ix_type,&tix,&iy_type,&tiy);
635: if (tix) ix = tix;
636: if (tiy) iy = tiy;
638: /* special case local copy portion */
639: islocal = PETSC_FALSE;
640: if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
641: PetscInt nx,ny,to_first,to_step,from_step,start,end,from_first,min,max;
642: VecScatter_Seq_Stride *from = NULL,*to = NULL;
644: VecGetOwnershipRange(yin,&start,&end);
645: ISGetLocalSize(ix,&nx);
646: ISStrideGetInfo(ix,&from_first,&from_step);
647: ISGetLocalSize(iy,&ny);
648: ISStrideGetInfo(iy,&to_first,&to_step);
649: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
650: ISGetMinMax(iy,&min,&max);
651: if (min >= start && max < end) islocal = PETSC_TRUE;
652: else islocal = PETSC_FALSE;
653: /* cannot use MPIU_Allreduce() since this call matches with the MPI_Allreduce() in the else statement below */
654: MPI_Allreduce(&islocal,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)yin));
655: if (cando) {
656: PetscMalloc2(1,&to,1,&from);
657: to->n = nx;
658: to->first = to_first-start;
659: to->step = to_step;
660: from->n = nx;
661: from->first = from_first;
662: from->step = from_step;
663: to->format = VEC_SCATTER_SEQ_STRIDE;
664: from->format = VEC_SCATTER_SEQ_STRIDE;
665: ctx->todata = (void*)to;
666: ctx->fromdata = (void*)from;
667: ctx->ops->begin = VecScatterBegin_SSToSS;
668: ctx->ops->end = NULL;
669: ctx->ops->destroy = VecScatterDestroy_SSToSS;
670: ctx->ops->copy = VecScatterCopy_SSToSS;
671: ctx->ops->view = VecScatterView_SSToSS;
672: PetscInfo(xin,"Special case: sequential stride to MPI stride\n");
673: goto functionend;
674: }
675: } else {
676: MPI_Allreduce(&islocal,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)yin));
677: }
678: /* special case block to stride */
679: if (ix_type == IS_BLOCK_ID && iy_type == IS_STRIDE_ID) {
680: PetscInt ystart,ystride,ysize,bsx;
681: ISStrideGetInfo(iy,&ystart,&ystride);
682: ISGetLocalSize(iy,&ysize);
683: ISGetBlockSize(ix,&bsx);
684: /* see if stride index set is equivalent to block index set */
685: if (VecScatterOptimizedBS(bsx) && ((ystart % bsx) == 0) && (ystride == 1) && ((ysize % bsx) == 0)) {
686: PetscInt nx,il,*idy;
687: const PetscInt *idx;
688: ISBlockGetLocalSize(ix,&nx);
689: ISBlockGetIndices(ix,&idx);
690: if (ysize != bsx*nx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
691: PetscMalloc1(nx,&idy);
692: if (nx) {
693: idy[0] = ystart/bsx;
694: for (il=1; il<nx; il++) idy[il] = idy[il-1] + 1;
695: }
696: (*vecscattercreatelocal_stop)(nx,idx,nx,idy,xin,yin,bsx,ctx);
697: PetscFree(idy);
698: ISBlockRestoreIndices(ix,&idx);
699: PetscInfo(xin,"Special case: Blocked indices to stride\n");
700: goto functionend;
701: }
702: }
704: /* general case */
705: {
706: PetscInt nx,ny;
707: const PetscInt *idx,*idy;
708: ISGetLocalSize(ix,&nx);
709: ISGetIndices(ix,&idx);
710: ISGetLocalSize(iy,&ny);
711: ISGetIndices(iy,&idy);
712: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
713: (*vecscattercreatelocal_stop)(nx,idx,ny,idy,xin,yin,1,ctx);
714: ISRestoreIndices(ix,&idx);
715: ISRestoreIndices(iy,&idy);
716: PetscInfo(xin,"General case: Seq to MPI\n");
717: goto functionend;
718: }
719: functionend:
720: ISDestroy(&tix);
721: ISDestroy(&tiy);
722: VecScatterViewFromOptions(ctx,NULL,"-vecscatter_view");
723: return(0);
724: }
726: static PetscErrorCode VecScatterCreate_PtoP(VecScatter ctx,PetscErrorCode (*vecscattercreatelocal_ptop)(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter))
727: {
728: PetscErrorCode ierr;
729: PetscInt ix_type=-1,iy_type=-1;
730: IS tix=NULL,tiy=NULL,ix=ctx->from_is,iy=ctx->to_is;
731: Vec xin=ctx->from_v,yin=ctx->to_v;
732: PetscInt nx,ny;
733: const PetscInt *idx,*idy;
736: GetInputISType_private(ctx,VEC_MPI_ID,VEC_MPI_ID,&ix_type,&tix,&iy_type,&tiy);
737: if (tix) ix = tix;
738: if (tiy) iy = tiy;
740: /* no special cases for now */
741: ISGetLocalSize(ix,&nx);
742: ISGetIndices(ix,&idx);
743: ISGetLocalSize(iy,&ny);
744: ISGetIndices(iy,&idy);
745: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
746: (*vecscattercreatelocal_ptop)(nx,idx,ny,idy,xin,yin,1,ctx);
747: ISRestoreIndices(ix,&idx);
748: ISRestoreIndices(iy,&idy);
749: PetscInfo(xin,"General case: MPI to MPI\n");
751: ISDestroy(&tix);
752: ISDestroy(&tiy);
753: VecScatterViewFromOptions(ctx,NULL,"-vecscatter_view");
754: return(0);
755: }
757: static PetscErrorCode VecScatterGetInputVecType_private(VecScatter ctx,PetscInt *xin_type1,PetscInt *yin_type1)
758: {
759: PetscErrorCode ierr;
760: MPI_Comm comm,ycomm;
761: PetscMPIInt size;
762: Vec xin = ctx->from_v,yin = ctx->to_v;
763: PetscInt xin_type,yin_type;
766: /*
767: Determine if the vectors are "parallel", ie. it shares a comm with other processors, or
768: sequential (it does not share a comm). The difference is that parallel vectors treat the
769: index set as providing indices in the global parallel numbering of the vector, with
770: sequential vectors treat the index set as providing indices in the local sequential
771: numbering
772: */
773: PetscObjectGetComm((PetscObject)xin,&comm);
774: MPI_Comm_size(comm,&size);
775: if (size > 1) {
776: xin_type = VEC_MPI_ID;
777: } else xin_type = VEC_SEQ_ID;
778: *xin_type1 = xin_type;
780: PetscObjectGetComm((PetscObject)yin,&ycomm);
781: MPI_Comm_size(ycomm,&size);
782: if (size > 1) {
783: yin_type = VEC_MPI_ID;
784: } else yin_type = VEC_SEQ_ID;
785: *yin_type1 = yin_type;
786: return(0);
787: }
789: PetscErrorCode GetInputISType_private(VecScatter ctx,PetscInt xin_type,PetscInt yin_type,PetscInt *ix_type1,IS *tix1,PetscInt *iy_type1,IS *tiy1)
790: {
791: PetscErrorCode ierr;
792: MPI_Comm comm;
793: Vec xin = ctx->from_v,yin = ctx->to_v;
794: IS tix = NULL,tiy = NULL,ix = ctx->from_is, iy = ctx->to_is;
795: PetscInt ix_type = IS_GENERAL_ID,iy_type = IS_GENERAL_ID;
796: PetscBool flag;
799: PetscObjectGetComm((PetscObject)ctx,&comm);
801: /* if ix or iy is not included; assume just grabbing entire vector */
802: if (!ix && xin_type == VEC_SEQ_ID) {
803: ISCreateStride(comm,ctx->from_n,0,1,&ix);
804: tix = ix;
805: } else if (!ix && xin_type == VEC_MPI_ID) {
806: if (yin_type == VEC_MPI_ID) {
807: PetscInt ntmp, low;
808: VecGetLocalSize(xin,&ntmp);
809: VecGetOwnershipRange(xin,&low,NULL);
810: ISCreateStride(comm,ntmp,low,1,&ix);
811: } else {
812: PetscInt Ntmp;
813: VecGetSize(xin,&Ntmp);
814: ISCreateStride(comm,Ntmp,0,1,&ix);
815: }
816: tix = ix;
817: } else if (!ix) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ix not given, but not Seq or MPI vector");
819: if (!iy && yin_type == VEC_SEQ_ID) {
820: ISCreateStride(comm,ctx->to_n,0,1,&iy);
821: tiy = iy;
822: } else if (!iy && yin_type == VEC_MPI_ID) {
823: if (xin_type == VEC_MPI_ID) {
824: PetscInt ntmp, low;
825: VecGetLocalSize(yin,&ntmp);
826: VecGetOwnershipRange(yin,&low,NULL);
827: ISCreateStride(comm,ntmp,low,1,&iy);
828: } else {
829: PetscInt Ntmp;
830: VecGetSize(yin,&Ntmp);
831: ISCreateStride(comm,Ntmp,0,1,&iy);
832: }
833: tiy = iy;
834: } else if (!iy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"iy not given, but not Seq or MPI vector");
836: /* Determine types of index sets */
837: PetscObjectTypeCompare((PetscObject)ix,ISBLOCK,&flag);
838: if (flag) ix_type = IS_BLOCK_ID;
839: PetscObjectTypeCompare((PetscObject)iy,ISBLOCK,&flag);
840: if (flag) iy_type = IS_BLOCK_ID;
841: PetscObjectTypeCompare((PetscObject)ix,ISSTRIDE,&flag);
842: if (flag) ix_type = IS_STRIDE_ID;
843: PetscObjectTypeCompare((PetscObject)iy,ISSTRIDE,&flag);
844: if (flag) iy_type = IS_STRIDE_ID;
846: if (ix_type1) *ix_type1 = ix_type;
847: if (iy_type1) *iy_type1 = iy_type;
848: if (tix1) *tix1 = tix;
849: if (tiy1) *tiy1 = tiy;
850: return(0);
851: }
853: PetscErrorCode VecScatterSetUp_vectype_private(VecScatter ctx,PetscErrorCode (*vecscattercreatelocal_ptos)(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter),PetscErrorCode (*vecscattercreatelocal_stop)(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter),PetscErrorCode (*vecscattercreatelocal_ptop)(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter))
854: {
855: PetscErrorCode ierr;
856: PetscInt xin_type=-1,yin_type=-1;
859: VecScatterGetInputVecType_private(ctx,&xin_type,&yin_type);
860: if (xin_type == VEC_MPI_ID && yin_type == VEC_SEQ_ID) {
861: VecScatterCreate_PtoS(ctx,vecscattercreatelocal_ptos);
862: } else if (xin_type == VEC_SEQ_ID && yin_type == VEC_MPI_ID) {
863: VecScatterCreate_StoP(ctx,vecscattercreatelocal_stop);
864: } else if (xin_type == VEC_MPI_ID && yin_type == VEC_MPI_ID) {
865: VecScatterCreate_PtoP(ctx,vecscattercreatelocal_ptop);
866: }
867: return(0);
868: }