Actual source code: vscatfce.c
petsc-3.10.5 2019-03-28
1: #include <petsc/private/vecscatterimpl.h>
2: #if defined(PETSC_HAVE_VECCUDA)
3: #include <../src/vec/vec/impls/seq/seqcuda/cudavecimpl.h>
4: #endif
5: /* ------------------------------------------------------------------*/
6: /*@
7: VecScatterGetMerged - Returns true if the scatter is completed in the VecScatterBegin()
8: and the VecScatterEnd() does nothing
10: Not Collective
12: Input Parameter:
13: . ctx - scatter context created with VecScatterCreate()
15: Output Parameter:
16: . flg - PETSC_TRUE if the VecScatterBegin/End() are all done during the VecScatterBegin()
18: Level: developer
20: .seealso: VecScatterCreate(), VecScatterEnd(), VecScatterBegin()
21: @*/
22: PetscErrorCode VecScatterGetMerged(VecScatter ctx,PetscBool *flg)
23: {
26: *flg = ctx->beginandendtogether;
27: return(0);
28: }
30: /*@
31: VecScatterBegin - Begins a generalized scatter from one vector to
32: another. Complete the scattering phase with VecScatterEnd().
34: Neighbor-wise Collective on VecScatter and Vec
36: Input Parameters:
37: + ctx - scatter context generated by VecScatterCreate()
38: . x - the vector from which we scatter
39: . y - the vector to which we scatter
40: . addv - either ADD_VALUES or INSERT_VALUES, with INSERT_VALUES mode any location
41: not scattered to retains its old value; i.e. the vector is NOT first zeroed.
42: - mode - the scattering mode, usually SCATTER_FORWARD. The available modes are:
43: SCATTER_FORWARD or SCATTER_REVERSE
46: Level: intermediate
48: Options Database: See VecScatterCreate()
50: Notes:
51: The vectors x and y need not be the same vectors used in the call
52: to VecScatterCreate(), but x must have the same parallel data layout
53: as that passed in as the x to VecScatterCreate(), similarly for the y.
54: Most likely they have been obtained from VecDuplicate().
56: You cannot change the values in the input vector between the calls to VecScatterBegin()
57: and VecScatterEnd().
59: If you use SCATTER_REVERSE the two arguments x and y should be reversed, from
60: the SCATTER_FORWARD.
62: y[iy[i]] = x[ix[i]], for i=0,...,ni-1
64: This scatter is far more general than the conventional
65: scatter, since it can be a gather or a scatter or a combination,
66: depending on the indices ix and iy. If x is a parallel vector and y
67: is sequential, VecScatterBegin() can serve to gather values to a
68: single processor. Similarly, if y is parallel and x sequential, the
69: routine can scatter from one processor to many processors.
71: Concepts: scatter^between vectors
72: Concepts: gather^between vectors
74: .seealso: VecScatterCreate(), VecScatterEnd()
75: @*/
76: PetscErrorCode VecScatterBegin(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
77: {
79: #if defined(PETSC_USE_DEBUG)
80: PetscInt to_n,from_n;
81: #endif
86: if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
88: #if defined(PETSC_USE_DEBUG)
89: /*
90: Error checking to make sure these vectors match the vectors used
91: to create the vector scatter context. -1 in the from_n and to_n indicate the
92: vector lengths are unknown (for example with mapped scatters) and thus
93: no error checking is performed.
94: */
95: if (ctx->from_n >= 0 && ctx->to_n >= 0) {
96: VecGetLocalSize(x,&from_n);
97: VecGetLocalSize(y,&to_n);
98: if (mode & SCATTER_REVERSE) {
99: if (to_n != ctx->from_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter reverse and vector to != ctx from size)",to_n,ctx->from_n);
100: if (from_n != ctx->to_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter reverse and vector from != ctx to size)",from_n,ctx->to_n);
101: } else {
102: if (to_n != ctx->to_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter forward and vector to != ctx to size)",to_n,ctx->to_n);
103: if (from_n != ctx->from_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter forward and vector from != ctx from size)",from_n,ctx->from_n);
104: }
105: }
106: #endif
108: ctx->inuse = PETSC_TRUE;
109: PetscLogEventBegin(VEC_ScatterBegin,ctx,x,y,0);
110: (*ctx->ops->begin)(ctx,x,y,addv,mode);
111: if (ctx->beginandendtogether && ctx->ops->end) {
112: ctx->inuse = PETSC_FALSE;
113: (*ctx->ops->end)(ctx,x,y,addv,mode);
114: }
115: PetscLogEventEnd(VEC_ScatterBegin,ctx,x,y,0);
116: return(0);
117: }
119: /* --------------------------------------------------------------------*/
120: /*@
121: VecScatterEnd - Ends a generalized scatter from one vector to another. Call
122: after first calling VecScatterBegin().
124: Neighbor-wise Collective on VecScatter and Vec
126: Input Parameters:
127: + ctx - scatter context generated by VecScatterCreate()
128: . x - the vector from which we scatter
129: . y - the vector to which we scatter
130: . addv - either ADD_VALUES or INSERT_VALUES.
131: - mode - the scattering mode, usually SCATTER_FORWARD. The available modes are:
132: SCATTER_FORWARD, SCATTER_REVERSE
134: Level: intermediate
136: Notes:
137: If you use SCATTER_REVERSE the arguments x and y should be reversed, from the SCATTER_FORWARD.
139: y[iy[i]] = x[ix[i]], for i=0,...,ni-1
141: .seealso: VecScatterBegin(), VecScatterCreate()
142: @*/
143: PetscErrorCode VecScatterEnd(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
144: {
151: ctx->inuse = PETSC_FALSE;
152: if (!ctx->ops->end) return(0);
153: if (!ctx->beginandendtogether) {
154: PetscLogEventBegin(VEC_ScatterEnd,ctx,x,y,0);
155: (*(ctx)->ops->end)(ctx,x,y,addv,mode);
156: PetscLogEventEnd(VEC_ScatterEnd,ctx,x,y,0);
157: }
158: return(0);
159: }
161: /*@
162: VecScatterDestroy - Destroys a scatter context created by
163: VecScatterCreate().
165: Collective on VecScatter
167: Input Parameter:
168: . ctx - the scatter context
170: Level: intermediate
172: .seealso: VecScatterCreate(), VecScatterCopy()
173: @*/
174: PetscErrorCode VecScatterDestroy(VecScatter *ctx)
175: {
179: if (!*ctx) return(0);
181: if ((*ctx)->inuse && ((PetscObject)(*ctx))->refct == 1) SETERRQ(((PetscObject)(*ctx))->comm,PETSC_ERR_ARG_WRONGSTATE,"Scatter context is in use");
182: if (--((PetscObject)(*ctx))->refct > 0) {*ctx = 0; return(0);}
184: /* if memory was published with SAWs then destroy it */
185: PetscObjectSAWsViewOff((PetscObject)(*ctx));
186: if ((*ctx)->ops->destroy) {(*(*ctx)->ops->destroy)(*ctx);}
187: #if defined(PETSC_HAVE_VECCUDA)
188: VecScatterCUDAIndicesDestroy((PetscCUDAIndices*)&((*ctx)->spptr));
189: #endif
190: PetscHeaderDestroy(ctx);
191: return(0);
192: }
194: /*@
195: VecScatterCopy - Makes a copy of a scatter context.
197: Collective on VecScatter
199: Input Parameter:
200: . sctx - the scatter context
202: Output Parameter:
203: . ctx - the context copy
205: Level: advanced
207: .seealso: VecScatterCreate(), VecScatterDestroy()
208: @*/
209: PetscErrorCode VecScatterCopy(VecScatter sctx,VecScatter *ctx)
210: {
212: VecScatterType type;
217: if (!sctx->ops->copy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot copy this type");
218: PetscHeaderCreate(*ctx,VEC_SCATTER_CLASSID,"VecScatter","VecScatter","Vec",PetscObjectComm((PetscObject)sctx),VecScatterDestroy,VecScatterView);
219: (*ctx)->to_n = sctx->to_n;
220: (*ctx)->from_n = sctx->from_n;
221: (*sctx->ops->copy)(sctx,*ctx);
223: VecScatterGetType(sctx,&type);
224: PetscObjectChangeTypeName((PetscObject)(*ctx),type);
225: return(0);
226: }
228: /* ------------------------------------------------------------------*/
229: /*@C
230: VecScatterView - Views a vector scatter context.
232: Collective on VecScatter
234: Input Parameters:
235: + ctx - the scatter context
236: - viewer - the viewer for displaying the context
238: Level: intermediate
240: @*/
241: PetscErrorCode VecScatterView(VecScatter ctx,PetscViewer viewer)
242: {
247: if (!viewer) {
248: PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)ctx),&viewer);
249: }
251: if (ctx->ops->view) {
252: (*ctx->ops->view)(ctx,viewer);
253: }
254: return(0);
255: }
257: /*@C
258: VecScatterRemap - Remaps the "from" and "to" indices in a
259: vector scatter context. FOR EXPERTS ONLY!
261: Collective on VecScatter
263: Input Parameters:
264: + scat - vector scatter context
265: . tomap - remapping plan for "to" indices (may be NULL).
266: - frommap - remapping plan for "from" indices (may be NULL)
268: Level: developer
270: Notes:
271: In the parallel case the todata contains indices from where the data is taken
272: (and then sent to others)! The fromdata contains indices from where the received
273: data is finally put locally.
275: In the sequential case the todata contains indices from where the data is put
276: and the fromdata contains indices from where the data is taken from.
277: This is backwards from the paralllel case!
279: @*/
280: PetscErrorCode VecScatterRemap(VecScatter scat,PetscInt tomap[],PetscInt frommap[])
281: {
282: VecScatter_MPI_General *to,*from;
283: VecScatter_Seq_General *sgto,*sgfrom;
284: VecScatter_Seq_Stride *ssto;
285: PetscInt i,ierr;
292: to = (VecScatter_MPI_General*)scat->todata;
293: from = (VecScatter_MPI_General*)scat->fromdata;
294: ssto = (VecScatter_Seq_Stride*)scat->todata;
295: sgto = (VecScatter_Seq_General*)scat->todata;
296: sgfrom = (VecScatter_Seq_General*)scat->fromdata;
298: /* remap indices from where we take/read data */
299: if (tomap) {
300: if (to->format == VEC_SCATTER_MPI_TOALL) {
301: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Not for to all scatter");
302: } else if (to->format == VEC_SCATTER_MPI_GENERAL) {
303: /* handle off processor parts */
304: for (i=0; i<to->starts[to->n]; i++) to->indices[i] = tomap[to->indices[i]];
306: /* handle local part */
307: for (i=0; i<to->local.n; i++) to->local.vslots[i] = tomap[to->local.vslots[i]];
309: /* the memcpy optimizations in vecscatter was based on index patterns it has.
310: They need to be recalculated when indices are changed (remapped).
311: */
312: VecScatterMemcpyPlanDestroy_PtoP(to,from);
313: VecScatterMemcpyPlanCreate_PtoP(to,from);
314: } else if (sgfrom->format == VEC_SCATTER_SEQ_GENERAL) {
315: /* remap indices*/
316: for (i=0; i<sgfrom->n; i++) sgfrom->vslots[i] = tomap[sgfrom->vslots[i]];
317: /* update optimizations, which happen when it is a Stride1toSG, SGtoStride1 or SGToSG vecscatter */
318: if (ssto->format == VEC_SCATTER_SEQ_STRIDE && ssto->step == 1) {
319: PetscInt tmp[2];
320: tmp[0] = 0; tmp[1] = sgfrom->n;
321: VecScatterMemcpyPlanDestroy(&sgfrom->memcpy_plan);
322: VecScatterMemcpyPlanCreate_Index(1,tmp,sgfrom->vslots,1/*bs*/,&sgfrom->memcpy_plan);
323: } else if (sgto->format == VEC_SCATTER_SEQ_GENERAL) {
324: VecScatterMemcpyPlanDestroy(&sgto->memcpy_plan);;
325: VecScatterMemcpyPlanDestroy(&sgfrom->memcpy_plan);
326: VecScatterMemcpyPlanCreate_SGToSG(1/*bs*/,sgto,sgfrom);
327: }
328: } else if (sgfrom->format == VEC_SCATTER_SEQ_STRIDE) {
329: VecScatter_Seq_Stride *ssto = (VecScatter_Seq_Stride*)sgfrom;
331: /* if the remapping is the identity and stride is identity then skip remap */
332: if (ssto->step == 1 && ssto->first == 0) {
333: for (i=0; i<ssto->n; i++) {
334: if (tomap[i] != i) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Unable to remap such scatters");
335: }
336: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Unable to remap such scatters");
337: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Unable to remap such scatters");
338: }
340: if (frommap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unable to remap the FROM in scatters yet");
342: /*
343: Mark then vector lengths as unknown because we do not know the
344: lengths of the remapped vectors
345: */
346: scat->from_n = -1;
347: scat->to_n = -1;
348: return(0);
349: }
351: /*
352: VecScatterGetTypes_Private - Returns the scatter types.
354: scatter - The scatter.
355: from - Upon exit this contains the type of the from scatter.
356: to - Upon exit this contains the type of the to scatter.
357: */
358: PetscErrorCode VecScatterGetTypes_Private(VecScatter scatter,VecScatterFormat *from,VecScatterFormat *to)
359: {
360: VecScatter_Common* fromdata = (VecScatter_Common*)scatter->fromdata;
361: VecScatter_Common* todata = (VecScatter_Common*)scatter->todata;
364: *from = fromdata->format;
365: *to = todata->format;
366: return(0);
367: }
370: /*
371: VecScatterIsSequential_Private - Returns true if the scatter is sequential.
373: scatter - The scatter.
374: flag - Upon exit flag is true if the scatter is of type VecScatter_Seq_General
375: or VecScatter_Seq_Stride; otherwise flag is false.
376: */
377: PetscErrorCode VecScatterIsSequential_Private(VecScatter_Common *scatter,PetscBool *flag)
378: {
379: VecScatterFormat scatterType = scatter->format;
382: if (scatterType == VEC_SCATTER_SEQ_GENERAL || scatterType == VEC_SCATTER_SEQ_STRIDE) {
383: *flag = PETSC_TRUE;
384: } else {
385: *flag = PETSC_FALSE;
386: }
387: return(0);
388: }
390: #if defined(PETSC_HAVE_VECCUDA)
392: /*@C
393: VecScatterInitializeForGPU - Initializes a generalized scatter from one vector
394: to another for GPU based computation.
396: Input Parameters:
397: + inctx - scatter context generated by VecScatterCreate()
398: - x - the vector from which we scatter
401: Level: intermediate
403: Notes:
404: Effectively, this function creates all the necessary indexing buffers and work
405: vectors needed to move data only those data points in a vector which need to
406: be communicated across ranks. This is done at the first time this function is
407: called. Currently, this only used in the context of the parallel SpMV call in
408: MatMult_MPIAIJCUSPARSE.
410: This function is executed before the call to MatMult. This enables the memory
411: transfers to be overlapped with the MatMult SpMV kernel call.
413: .seealso: VecScatterFinalizeForGPU(), VecScatterCreate(), VecScatterEnd()
414: @*/
415: PETSC_EXTERN PetscErrorCode VecScatterInitializeForGPU(VecScatter inctx,Vec x)
416: {
418: VecScatter_MPI_General *to,*from;
419: PetscErrorCode ierr;
420: PetscInt i,*indices,*sstartsSends,*sstartsRecvs,nrecvs,nsends,bs;
421: PetscBool isSeq1,isSeq2;
424: VecScatterIsSequential_Private((VecScatter_Common*)inctx->fromdata,&isSeq1);
425: VecScatterIsSequential_Private((VecScatter_Common*)inctx->todata,&isSeq2);
426: if (isSeq1 || isSeq2) {
427: return(0);
428: }
430: to = (VecScatter_MPI_General*)inctx->todata;
431: from = (VecScatter_MPI_General*)inctx->fromdata;
432: bs = to->bs;
433: nrecvs = from->n;
434: nsends = to->n;
435: indices = to->indices;
436: sstartsSends = to->starts;
437: sstartsRecvs = from->starts;
438: if (x->valid_GPU_array != PETSC_OFFLOAD_UNALLOCATED && (nsends>0 || nrecvs>0)) {
439: if (!inctx->spptr) {
440: PetscInt k,*tindicesSends,*sindicesSends,*tindicesRecvs,*sindicesRecvs;
441: PetscInt ns = sstartsSends[nsends],nr = sstartsRecvs[nrecvs];
442: /* Here we create indices for both the senders and receivers. */
443: PetscMalloc1(ns,&tindicesSends);
444: PetscMalloc1(nr,&tindicesRecvs);
446: PetscMemcpy(tindicesSends,indices,ns*sizeof(PetscInt));
447: PetscMemcpy(tindicesRecvs,from->indices,nr*sizeof(PetscInt));
449: PetscSortRemoveDupsInt(&ns,tindicesSends);
450: PetscSortRemoveDupsInt(&nr,tindicesRecvs);
452: PetscMalloc1(bs*ns,&sindicesSends);
453: PetscMalloc1(from->bs*nr,&sindicesRecvs);
455: /* sender indices */
456: for (i=0; i<ns; i++) {
457: for (k=0; k<bs; k++) sindicesSends[i*bs+k] = tindicesSends[i]+k;
458: }
459: PetscFree(tindicesSends);
461: /* receiver indices */
462: for (i=0; i<nr; i++) {
463: for (k=0; k<from->bs; k++) sindicesRecvs[i*from->bs+k] = tindicesRecvs[i]+k;
464: }
465: PetscFree(tindicesRecvs);
467: /* create GPU indices, work vectors, ... */
468: VecScatterCUDAIndicesCreate_PtoP(ns*bs,sindicesSends,nr*from->bs,sindicesRecvs,(PetscCUDAIndices*)&inctx->spptr);
469: PetscFree(sindicesSends);
470: PetscFree(sindicesRecvs);
471: }
472: }
473: return(0);
474: }
476: /*@C
477: VecScatterFinalizeForGPU - Finalizes a generalized scatter from one vector to
478: another for GPU based computation.
480: Input Parameter:
481: + inctx - scatter context generated by VecScatterCreate()
483: Level: intermediate
485: Notes:
486: Effectively, this function resets the temporary buffer flags. Currently, this
487: only used in the context of the parallel SpMV call in in MatMult_MPIAIJCUDA
488: or MatMult_MPIAIJCUDAARSE. Once the MatMultAdd is finished, the GPU temporary
489: buffers used for messaging are no longer valid.
491: .seealso: VecScatterInitializeForGPU(), VecScatterCreate(), VecScatterEnd()
492: @*/
493: PETSC_EXTERN PetscErrorCode VecScatterFinalizeForGPU(VecScatter inctx)
494: {
496: return(0);
497: }
499: #endif