Actual source code: vscatfce.c
petsc-3.14.6 2021-03-30
1: #include <petsc/private/vecscatterimpl.h>
2: #if defined(PETSC_HAVE_CUDA)
3: #include <petsc/private/cudavecimpl.h>
4: #endif
5: /* ------------------------------------------------------------------*/
6: /*@
7: VecScatterGetMerged - Returns true if the scatter is completed in the VecScatterBegin()
8: and the VecScatterEnd() does nothing
10: Not Collective
12: Input Parameter:
13: . ctx - scatter context created with VecScatterCreate()
15: Output Parameter:
16: . flg - PETSC_TRUE if the VecScatterBegin/End() are all done during the VecScatterBegin()
18: Level: developer
20: .seealso: VecScatterCreate(), VecScatterEnd(), VecScatterBegin()
21: @*/
22: PetscErrorCode VecScatterGetMerged(VecScatter ctx,PetscBool *flg)
23: {
26: *flg = ctx->beginandendtogether;
27: return(0);
28: }
30: /*@
31: VecScatterBegin - Begins a generalized scatter from one vector to
32: another. Complete the scattering phase with VecScatterEnd().
34: Neighbor-wise Collective on VecScatter
36: Input Parameters:
37: + ctx - scatter context generated by VecScatterCreate()
38: . x - the vector from which we scatter
39: . y - the vector to which we scatter
40: . addv - either ADD_VALUES, MAX_VALUES, MIN_VALUES or INSERT_VALUES, with INSERT_VALUES mode any location
41: not scattered to retains its old value; i.e. the vector is NOT first zeroed.
42: - mode - the scattering mode, usually SCATTER_FORWARD. The available modes are:
43: SCATTER_FORWARD or SCATTER_REVERSE
46: Level: intermediate
48: Options Database: See VecScatterCreate()
50: Notes:
51: The vectors x and y need not be the same vectors used in the call
52: to VecScatterCreate(), but x must have the same parallel data layout
53: as that passed in as the x to VecScatterCreate(), similarly for the y.
54: Most likely they have been obtained from VecDuplicate().
56: You cannot change the values in the input vector between the calls to VecScatterBegin()
57: and VecScatterEnd().
59: If you use SCATTER_REVERSE the two arguments x and y should be reversed, from
60: the SCATTER_FORWARD.
62: y[iy[i]] = x[ix[i]], for i=0,...,ni-1
64: This scatter is far more general than the conventional
65: scatter, since it can be a gather or a scatter or a combination,
66: depending on the indices ix and iy. If x is a parallel vector and y
67: is sequential, VecScatterBegin() can serve to gather values to a
68: single processor. Similarly, if y is parallel and x sequential, the
69: routine can scatter from one processor to many processors.
72: .seealso: VecScatterCreate(), VecScatterEnd()
73: @*/
74: PetscErrorCode VecScatterBegin(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
75: {
77: PetscInt to_n,from_n;
83: if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
85: if (PetscDefined(USE_DEBUG)) {
86: /*
87: Error checking to make sure these vectors match the vectors used
88: to create the vector scatter context. -1 in the from_n and to_n indicate the
89: vector lengths are unknown (for example with mapped scatters) and thus
90: no error checking is performed.
91: */
92: if (ctx->from_n >= 0 && ctx->to_n >= 0) {
93: VecGetLocalSize(x,&from_n);
94: VecGetLocalSize(y,&to_n);
95: if (mode & SCATTER_REVERSE) {
96: if (to_n != ctx->from_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter reverse and vector to != ctx from size)",to_n,ctx->from_n);
97: if (from_n != ctx->to_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter reverse and vector from != ctx to size)",from_n,ctx->to_n);
98: } else {
99: if (to_n != ctx->to_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter forward and vector to != ctx to size)",to_n,ctx->to_n);
100: if (from_n != ctx->from_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter forward and vector from != ctx from size)",from_n,ctx->from_n);
101: }
102: }
103: }
105: ctx->inuse = PETSC_TRUE;
106: PetscLogEventBegin(VEC_ScatterBegin,ctx,x,y,0);
107: (*ctx->ops->begin)(ctx,x,y,addv,mode);
108: if (ctx->beginandendtogether && ctx->ops->end) {
109: ctx->inuse = PETSC_FALSE;
110: (*ctx->ops->end)(ctx,x,y,addv,mode);
111: }
112: PetscLogEventEnd(VEC_ScatterBegin,ctx,x,y,0);
113: return(0);
114: }
116: /* --------------------------------------------------------------------*/
117: /*@
118: VecScatterEnd - Ends a generalized scatter from one vector to another. Call
119: after first calling VecScatterBegin().
121: Neighbor-wise Collective on VecScatter
123: Input Parameters:
124: + ctx - scatter context generated by VecScatterCreate()
125: . x - the vector from which we scatter
126: . y - the vector to which we scatter
127: . addv - one of ADD_VALUES, MAX_VALUES, MIN_VALUES or INSERT_VALUES
128: - mode - the scattering mode, usually SCATTER_FORWARD. The available modes are:
129: SCATTER_FORWARD, SCATTER_REVERSE
131: Level: intermediate
133: Notes:
134: If you use SCATTER_REVERSE the arguments x and y should be reversed, from the SCATTER_FORWARD.
136: y[iy[i]] = x[ix[i]], for i=0,...,ni-1
138: .seealso: VecScatterBegin(), VecScatterCreate()
139: @*/
140: PetscErrorCode VecScatterEnd(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
141: {
148: ctx->inuse = PETSC_FALSE;
149: if (!ctx->ops->end) return(0);
150: if (!ctx->beginandendtogether) {
151: PetscLogEventBegin(VEC_ScatterEnd,ctx,x,y,0);
152: (*(ctx)->ops->end)(ctx,x,y,addv,mode);
153: PetscLogEventEnd(VEC_ScatterEnd,ctx,x,y,0);
154: }
155: return(0);
156: }
158: /*@
159: VecScatterDestroy - Destroys a scatter context created by VecScatterCreate()
161: Collective on VecScatter
163: Input Parameter:
164: . ctx - the scatter context
166: Level: intermediate
168: .seealso: VecScatterCreate(), VecScatterCopy()
169: @*/
170: PetscErrorCode VecScatterDestroy(VecScatter *ctx)
171: {
175: if (!*ctx) return(0);
177: if ((*ctx)->inuse && ((PetscObject)(*ctx))->refct == 1) SETERRQ(((PetscObject)(*ctx))->comm,PETSC_ERR_ARG_WRONGSTATE,"Scatter context is in use");
178: if (--((PetscObject)(*ctx))->refct > 0) {*ctx = NULL; return(0);}
180: /* if memory was published with SAWs then destroy it */
181: PetscObjectSAWsViewOff((PetscObject)(*ctx));
182: if ((*ctx)->ops->destroy) {(*(*ctx)->ops->destroy)(*ctx);}
183: #if defined(PETSC_HAVE_CUDA)
184: VecScatterCUDAIndicesDestroy((PetscCUDAIndices*)&((*ctx)->spptr));
185: #endif
186: PetscHeaderDestroy(ctx);
187: return(0);
188: }
190: /*@
191: VecScatterSetUp - Sets up the VecScatter to be able to actually scatter information between vectors
193: Collective on VecScatter
195: Input Parameter:
196: . ctx - the scatter context
198: Level: intermediate
200: .seealso: VecScatterCreate(), VecScatterCopy()
201: @*/
202: PetscErrorCode VecScatterSetUp(VecScatter ctx)
203: {
208: (*ctx->ops->setup)(ctx);
209: return(0);
210: }
212: /*@
213: VecScatterCopy - Makes a copy of a scatter context.
215: Collective on VecScatter
217: Input Parameter:
218: . sctx - the scatter context
220: Output Parameter:
221: . ctx - the context copy
223: Level: advanced
225: .seealso: VecScatterCreate(), VecScatterDestroy()
226: @*/
227: PetscErrorCode VecScatterCopy(VecScatter sctx,VecScatter *ctx)
228: {
230: VecScatterType type;
235: if (!sctx->ops->copy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot copy this type");
236: PetscHeaderCreate(*ctx,VEC_SCATTER_CLASSID,"VecScatter","VecScatter","Vec",PetscObjectComm((PetscObject)sctx),VecScatterDestroy,VecScatterView);
237: (*ctx)->to_n = sctx->to_n;
238: (*ctx)->from_n = sctx->from_n;
239: (*sctx->ops->copy)(sctx,*ctx);
241: VecScatterGetType(sctx,&type);
242: PetscObjectChangeTypeName((PetscObject)(*ctx),type);
243: return(0);
244: }
246: /*@C
247: VecScatterViewFromOptions - View from Options
249: Collective on VecScatter
251: Input Parameters:
252: + A - the scatter context
253: . obj - Optional object
254: - name - command line option
256: Level: intermediate
257: .seealso: VecScatter, VecScatterView, PetscObjectViewFromOptions(), VecScatterCreate()
258: @*/
259: PetscErrorCode VecScatterViewFromOptions(VecScatter A,PetscObject obj,const char name[])
260: {
265: PetscObjectViewFromOptions((PetscObject)A,obj,name);
266: return(0);
267: }
269: /* ------------------------------------------------------------------*/
270: /*@C
271: VecScatterView - Views a vector scatter context.
273: Collective on VecScatter
275: Input Parameters:
276: + ctx - the scatter context
277: - viewer - the viewer for displaying the context
279: Level: intermediate
281: @*/
282: PetscErrorCode VecScatterView(VecScatter ctx,PetscViewer viewer)
283: {
288: if (!viewer) {
289: PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)ctx),&viewer);
290: }
292: if (ctx->ops->view) {
293: (*ctx->ops->view)(ctx,viewer);
294: }
295: return(0);
296: }
298: /*@C
299: VecScatterRemap - Remaps the "from" and "to" indices in a
300: vector scatter context. FOR EXPERTS ONLY!
302: Collective on VecScatter
304: Input Parameters:
305: + scat - vector scatter context
306: . tomap - remapping plan for "to" indices (may be NULL).
307: - frommap - remapping plan for "from" indices (may be NULL)
309: Level: developer
311: Notes:
312: In the parallel case the todata contains indices from where the data is taken
313: (and then sent to others)! The fromdata contains indices from where the received
314: data is finally put locally.
316: In the sequential case the todata contains indices from where the data is put
317: and the fromdata contains indices from where the data is taken from.
318: This is backwards from the paralllel case!
320: @*/
321: PetscErrorCode VecScatterRemap(VecScatter scat,PetscInt tomap[],PetscInt frommap[])
322: {
323: VecScatter_MPI_General *to,*from;
324: VecScatter_Seq_General *sgto,*sgfrom;
325: VecScatter_Seq_Stride *ssto;
326: PetscInt i,ierr;
333: if (scat->ops->remap) {
334: (*scat->ops->remap)(scat,tomap,frommap);
335: } else {
336: to = (VecScatter_MPI_General*)scat->todata;
337: from = (VecScatter_MPI_General*)scat->fromdata;
338: ssto = (VecScatter_Seq_Stride*)scat->todata;
339: sgto = (VecScatter_Seq_General*)scat->todata;
340: sgfrom = (VecScatter_Seq_General*)scat->fromdata;
342: /* remap indices from where we take/read data */
343: if (tomap) {
344: if (to->format == VEC_SCATTER_MPI_TOALL) {
345: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Not for to all scatter");
346: } else if (to->format == VEC_SCATTER_MPI_GENERAL) {
347: /* handle off processor parts */
348: for (i=0; i<to->starts[to->n]; i++) to->indices[i] = tomap[to->indices[i]];
350: /* handle local part */
351: for (i=0; i<to->local.n; i++) to->local.vslots[i] = tomap[to->local.vslots[i]];
353: /* the memcpy optimizations in vecscatter was based on index patterns it has.
354: They need to be recalculated when indices are changed (remapped).
355: */
356: VecScatterMemcpyPlanDestroy_PtoP(to,from);
357: VecScatterMemcpyPlanCreate_PtoP(to,from);
358: } else if (sgfrom->format == VEC_SCATTER_SEQ_GENERAL) {
359: /* remap indices*/
360: for (i=0; i<sgfrom->n; i++) sgfrom->vslots[i] = tomap[sgfrom->vslots[i]];
361: /* update optimizations, which happen when it is a Stride1toSG, SGtoStride1 or SGToSG vecscatter */
362: if (ssto->format == VEC_SCATTER_SEQ_STRIDE && ssto->step == 1) {
363: PetscInt tmp[2];
364: tmp[0] = 0; tmp[1] = sgfrom->n;
365: VecScatterMemcpyPlanDestroy(&sgfrom->memcpy_plan);
366: VecScatterMemcpyPlanCreate_Index(1,tmp,sgfrom->vslots,1/*bs*/,&sgfrom->memcpy_plan);
367: } else if (sgto->format == VEC_SCATTER_SEQ_GENERAL) {
368: VecScatterMemcpyPlanDestroy(&sgto->memcpy_plan);
369: VecScatterMemcpyPlanDestroy(&sgfrom->memcpy_plan);
370: VecScatterMemcpyPlanCreate_SGToSG(1/*bs*/,sgto,sgfrom);
371: }
372: } else if (sgfrom->format == VEC_SCATTER_SEQ_STRIDE) {
373: VecScatter_Seq_Stride *ssto = (VecScatter_Seq_Stride*)sgfrom;
375: /* if the remapping is the identity and stride is identity then skip remap */
376: if (ssto->step == 1 && ssto->first == 0) {
377: for (i=0; i<ssto->n; i++) {
378: if (tomap[i] != i) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Unable to remap such scatters");
379: }
380: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Unable to remap such scatters");
381: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Unable to remap such scatters");
382: }
383: }
384: if (frommap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unable to remap the FROM in scatters yet");
386: /*
387: Mark then vector lengths as unknown because we do not know the
388: lengths of the remapped vectors
389: */
390: scat->from_n = -1;
391: scat->to_n = -1;
392: return(0);
393: }
395: /* Given a parallel VecScatter context, return number of procs and vector entries involved in remote (i.e., off-process) communication
397: Input Parameters:
398: + ctx - the context (must be a parallel vecscatter)
399: - send - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)
401: Output parameters:
402: + num_procs - number of remote processors
403: - num_entries - number of vector entries to send or recv
406: .seealso: VecScatterGetRemote_Private(), VecScatterGetRemoteOrdered_Private()
408: Notes:
409: Sometimes PETSc internally needs to use the matrix-vector-multiply vecscatter context for other purposes. The client code
410: usually only uses MPI_Send/Recv. This group of subroutines provides info needed for such uses.
411: */
412: PetscErrorCode VecScatterGetRemoteCount_Private(VecScatter ctx,PetscBool send,PetscInt *num_procs,PetscInt *num_entries)
413: {
414: VecScatter_MPI_General *vs;
415: PetscBool par;
416: PetscErrorCode ierr;
419: if (ctx->ops->getremotecount) {
420: (*ctx->ops->getremotecount)(ctx,send,num_procs,num_entries);
421: } else {
422: vs = (VecScatter_MPI_General*)(send ? ctx->todata : ctx->fromdata);
423: par = (vs->format == VEC_SCATTER_MPI_GENERAL)? PETSC_TRUE : PETSC_FALSE;
424: if (num_procs) *num_procs = par ? vs->n : 0;
425: if (num_entries) *num_entries = par ? vs->starts[vs->n] : 0;
426: }
427: return(0);
428: }
430: /* Given a parallel VecScatter context, return a plan that represents the remote communication.
431: Any output parameter can be NULL.
433: Input Parameters:
434: + ctx - the context
435: - send - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)
437: Output parameters:
438: + n - number of remote processors
439: . starts - starting point in indices for each proc. ATTENTION: starts[0] is not necessarily zero.
440: Therefore, expressions like starts[i+1]-starts[i] and indices[starts[i]+j] work as
441: expected for a CSR structure but buf[starts[i]+j] may be out of range if buf was allocated
442: with length starts[n]-starts[0]. One should use buf[starts[i]-starts[0]+j] instead.
443: . indices - indices of entries to send/recv
444: . procs - ranks of remote processors
445: - bs - block size
447: .seealso: VecScatterRestoreRemote_Private(), VecScatterGetRemoteOrdered_Private()
448: */
449: PetscErrorCode VecScatterGetRemote_Private(VecScatter ctx,PetscBool send,PetscInt *n,const PetscInt **starts,const PetscInt **indices,const PetscMPIInt **procs,PetscInt *bs)
450: {
451: VecScatter_MPI_General *vs;
452: PetscBool par;
453: PetscErrorCode ierr;
456: if (ctx->ops->getremote) {
457: (*ctx->ops->getremote)(ctx,send,n,starts,indices,procs,bs);
458: } else {
459: vs = (VecScatter_MPI_General*)(send ? ctx->todata : ctx->fromdata);
460: par = (vs->format == VEC_SCATTER_MPI_GENERAL)? PETSC_TRUE : PETSC_FALSE;
461: if (n) *n = par ? vs->n : 0;
462: if (indices) *indices = par ? vs->indices : NULL;
463: if (starts) *starts = par ? vs->starts : NULL;
464: if (procs) *procs = par ? vs->procs : NULL;
465: if (bs) *bs = par ? vs->bs : 0;
466: }
467: return(0);
468: }
471: /* Given a parallel VecScatter context, return a plan that represents the remote communication. Ranks of remote
472: processors returned in procs must be sorted in ascending order. Any output parameter can be NULL.
474: Input Parameters:
475: + ctx - the context
476: - send - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)
478: Output parameters:
479: + n - number of remote processors
480: . starts - starting point in indices for each proc. ATTENTION: starts[0] is not necessarily zero.
481: Therefore, expressions like starts[i+1]-starts[i] and indices[starts[i]+j] work as
482: expected for a CSR structure but buf[starts[i]+j] may be out of range if buf was allocated
483: with length starts[n]-starts[0]. One should use buf[starts[i]-starts[0]+j] instead.
484: . indices - indices of entries to send/recv
485: . procs - ranks of remote processors
486: - bs - block size
488: .seealso: VecScatterRestoreRemoteOrdered_Private(), VecScatterGetRemote_Private()
490: Notes:
491: Output parameters like starts, indices must also be adapted according to the sorted ranks.
492: */
493: PetscErrorCode VecScatterGetRemoteOrdered_Private(VecScatter ctx,PetscBool send,PetscInt *n,const PetscInt **starts,const PetscInt **indices,const PetscMPIInt **procs,PetscInt *bs)
494: {
495: VecScatter_MPI_General *vs;
496: PetscBool par;
497: PetscErrorCode ierr;
500: if (ctx->ops->getremoteordered) {
501: (*ctx->ops->getremoteordered)(ctx,send,n,starts,indices,procs,bs);
502: } else {
503: vs = (VecScatter_MPI_General*)(send ? ctx->todata : ctx->fromdata);
504: par = (vs->format == VEC_SCATTER_MPI_GENERAL)? PETSC_TRUE : PETSC_FALSE;
505: if (n) *n = par ? vs->n : 0;
506: if (indices) *indices = par ? vs->indices : NULL;
507: if (starts) *starts = par ? vs->starts : NULL;
508: if (procs) *procs = par ? vs->procs : NULL;
509: if (bs) *bs = par ? vs->bs : 0;
510: }
511: if (PetscUnlikelyDebug(n && procs)) {
512: PetscInt i;
513: /* from back to front to also handle cases *n=0 */
514: for (i=*n-1; i>0; i--) { if ((*procs)[i-1] > (*procs)[i]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"procs[] are not ordered"); }
515: }
516: return(0);
517: }
519: /* Given a parallel VecScatter context, restore the plan returned by VecScatterGetRemote_Private. This gives a chance for
520: an implementation to free memory allocated in the VecScatterGetRemote_Private call.
522: Input Parameters:
523: + ctx - the context
524: - send - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)
526: Output parameters:
527: + n - number of remote processors
528: . starts - starting point in indices for each proc
529: . indices - indices of entries to send/recv
530: . procs - ranks of remote processors
531: - bs - block size
533: .seealso: VecScatterGetRemote_Private()
534: */
535: PetscErrorCode VecScatterRestoreRemote_Private(VecScatter ctx,PetscBool send,PetscInt *n,const PetscInt **starts,const PetscInt **indices,const PetscMPIInt **procs,PetscInt *bs)
536: {
537: PetscErrorCode ierr;
540: if (ctx->ops->restoreremote) {
541: (*ctx->ops->restoreremote)(ctx,send,n,starts,indices,procs,bs);
542: } else {
543: if (starts) *starts = NULL;
544: if (indices) *indices = NULL;
545: if (procs) *procs = NULL;
546: }
547: return(0);
548: }
550: /* Given a parallel VecScatter context, restore the plan returned by VecScatterGetRemoteOrdered_Private. This gives a chance for
551: an implementation to free memory allocated in the VecScatterGetRemoteOrdered_Private call.
553: Input Parameters:
554: + ctx - the context
555: - send - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)
557: Output parameters:
558: + n - number of remote processors
559: . starts - starting point in indices for each proc
560: . indices - indices of entries to send/recv
561: . procs - ranks of remote processors
562: - bs - block size
564: .seealso: VecScatterGetRemoteOrdered_Private()
565: */
566: PetscErrorCode VecScatterRestoreRemoteOrdered_Private(VecScatter ctx,PetscBool send,PetscInt *n,const PetscInt **starts,const PetscInt **indices,const PetscMPIInt **procs,PetscInt *bs)
567: {
570: if (ctx->ops->restoreremoteordered) {
571: (*ctx->ops->restoreremoteordered)(ctx,send,n,starts,indices,procs,bs);
572: } else {
573: VecScatterRestoreRemote_Private(ctx,send,n,starts,indices,procs,bs);
574: }
575: return(0);
576: }
578: #if defined(PETSC_HAVE_CUDA)
580: /*@C
581: VecScatterInitializeForGPU - Initializes a generalized scatter from one vector
582: to another for GPU based computation.
584: Input Parameters:
585: + inctx - scatter context generated by VecScatterCreate()
586: - x - the vector from which we scatter
588: Level: intermediate
590: Notes:
591: Effectively, this function creates all the necessary indexing buffers and work
592: vectors needed to move only those data points in a vector which need to
593: be communicated across ranks. This is done at the first time this function is
594: called. Currently, this only used in the context of the parallel SpMV call in
595: MatMult_MPIAIJCUSPARSE.
597: This function is executed before the call to MatMult. This enables the memory
598: transfers to be overlapped with the MatMult SpMV kernel call.
600: .seealso: VecScatterFinalizeForGPU(), VecScatterCreate(), VecScatterEnd()
601: @*/
602: PETSC_EXTERN PetscErrorCode VecScatterInitializeForGPU(VecScatter inctx,Vec x)
603: {
605: PetscInt i,nrecvs,nsends,sbs,rbs,ns,nr;
606: const PetscInt *sstarts,*rstarts,*sindices,*rindices;
607: VecScatterType type;
608: PetscBool isSF;
611: VecScatterGetType(inctx,&type);
612: PetscStrcmp(type,VECSCATTERSF,&isSF);
613: if (isSF) return(0);
615: VecScatterGetRemote_Private(inctx,PETSC_TRUE/*send*/, &nsends,&sstarts,&sindices,NULL/*procs*/,&sbs);
616: VecScatterGetRemote_Private(inctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,&rindices,NULL/*procs*/,&rbs);
617: ns = nsends ? sstarts[nsends]-sstarts[0] : 0; /* s/rstarts[0] is not necessarily zero */
618: nr = nrecvs ? rstarts[nrecvs]-rstarts[0] : 0;
620: if (x->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (nsends>0 || nrecvs>0)) {
621: if (!inctx->spptr) {
622: PetscInt k,*tindicesSends,*sindicesSends,*tindicesRecvs,*sindicesRecvs;
623: /* Here we create indices for both the senders and receivers. */
624: PetscMalloc1(ns,&tindicesSends);
625: PetscMalloc1(nr,&tindicesRecvs);
627: /* s/rindices and s/rstarts could be NULL when ns or nr is zero */
628: if (ns) {PetscArraycpy(tindicesSends,&sindices[sstarts[0]],ns);}
629: if (nr) {PetscArraycpy(tindicesRecvs,&rindices[rstarts[0]],nr);}
631: PetscSortRemoveDupsInt(&ns,tindicesSends);
632: PetscSortRemoveDupsInt(&nr,tindicesRecvs);
634: PetscMalloc1(sbs*ns,&sindicesSends);
635: PetscMalloc1(rbs*nr,&sindicesRecvs);
637: /* sender indices */
638: for (i=0; i<ns; i++) {
639: for (k=0; k<sbs; k++) sindicesSends[i*sbs+k] = tindicesSends[i]+k;
640: }
641: PetscFree(tindicesSends);
643: /* receiver indices */
644: for (i=0; i<nr; i++) {
645: for (k=0; k<rbs; k++) sindicesRecvs[i*rbs+k] = tindicesRecvs[i]+k;
646: }
647: PetscFree(tindicesRecvs);
649: /* create GPU indices, work vectors, ... */
650: VecScatterCUDAIndicesCreate_PtoP(ns*sbs,sindicesSends,nr*rbs,sindicesRecvs,(PetscCUDAIndices*)&inctx->spptr);
651: PetscFree(sindicesSends);
652: PetscFree(sindicesRecvs);
653: }
654: }
655: return(0);
656: }
658: /*@C
659: VecScatterFinalizeForGPU - Finalizes a generalized scatter from one vector to
660: another for GPU based computation.
662: Input Parameter:
663: . inctx - scatter context generated by VecScatterCreate()
665: Level: intermediate
667: Notes:
668: Effectively, this function resets the temporary buffer flags. Currently, this
669: only used in the context of the parallel SpMV call in in MatMult_MPIAIJCUSPARSE
670: Once the MatMultAdd is finished, the GPU temporary buffers used for messaging are no longer valid.
672: .seealso: VecScatterInitializeForGPU(), VecScatterCreate(), VecScatterEnd()
673: @*/
674: PETSC_EXTERN PetscErrorCode VecScatterFinalizeForGPU(VecScatter inctx)
675: {
677: return(0);
678: }
680: #endif