1: #include <petsc/private/vecscatterimpl.h> 2: #if defined(PETSC_HAVE_CUDA)
3: #include <../src/vec/vec/impls/seq/seqcuda/cudavecimpl.h> 4: #endif
5: /* ------------------------------------------------------------------*/
6: /*@
7: VecScatterGetMerged - Returns true if the scatter is completed in the VecScatterBegin()
8: and the VecScatterEnd() does nothing
10: Not Collective
12: Input Parameter:
13: . ctx - scatter context created with VecScatterCreate()
15: Output Parameter:
16: . flg - PETSC_TRUE if the VecScatterBegin/End() are all done during the VecScatterBegin()
18: Level: developer
20: .seealso: VecScatterCreate(), VecScatterEnd(), VecScatterBegin()
21: @*/
22: PetscErrorCodeVecScatterGetMerged(VecScatter ctx,PetscBool *flg) 23: {
26: *flg = ctx->beginandendtogether;
27: return(0);
28: }
30: /*@
31: VecScatterBegin - Begins a generalized scatter from one vector to
32: another. Complete the scattering phase with VecScatterEnd().
34: Neighbor-wise Collective on VecScatter 36: Input Parameters:
37: + ctx - scatter context generated by VecScatterCreate()
38: . x - the vector from which we scatter
39: . y - the vector to which we scatter
40: . addv - either ADD_VALUES, MAX_VALUES, MIN_VALUES or INSERT_VALUES, with INSERT_VALUES mode any location
41: not scattered to retains its old value; i.e. the vector is NOT first zeroed.
42: - mode - the scattering mode, usually SCATTER_FORWARD. The available modes are:
43: SCATTER_FORWARD or SCATTER_REVERSE 46: Level: intermediate
48: Options Database: See VecScatterCreate()
50: Notes:
51: The vectors x and y need not be the same vectors used in the call
52: to VecScatterCreate(), but x must have the same parallel data layout
53: as that passed in as the x to VecScatterCreate(), similarly for the y.
54: Most likely they have been obtained from VecDuplicate().
56: You cannot change the values in the input vector between the calls to VecScatterBegin()
57: and VecScatterEnd().
59: If you use SCATTER_REVERSE the two arguments x and y should be reversed, from
60: the SCATTER_FORWARD.
62: y[iy[i]] = x[ix[i]], for i=0,...,ni-1
64: This scatter is far more general than the conventional
65: scatter, since it can be a gather or a scatter or a combination,
66: depending on the indices ix and iy. If x is a parallel vector and y
67: is sequential, VecScatterBegin() can serve to gather values to a
68: single processor. Similarly, if y is parallel and x sequential, the
69: routine can scatter from one processor to many processors.
72: .seealso: VecScatterCreate(), VecScatterEnd()
73: @*/
74: PetscErrorCodeVecScatterBegin(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode) 75: {
77: #if defined(PETSC_USE_DEBUG)
78: PetscInt to_n,from_n;
79: #endif
84: if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
86: #if defined(PETSC_USE_DEBUG)
87: /*
88: Error checking to make sure these vectors match the vectors used
89: to create the vector scatter context. -1 in the from_n and to_n indicate the
90: vector lengths are unknown (for example with mapped scatters) and thus
91: no error checking is performed.
92: */
93: if (ctx->from_n >= 0 && ctx->to_n >= 0) {
94: VecGetLocalSize(x,&from_n);
95: VecGetLocalSize(y,&to_n);
96: if (mode & SCATTER_REVERSE) {
97: if (to_n != ctx->from_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter reverse and vector to != ctx from size)",to_n,ctx->from_n);
98: if (from_n != ctx->to_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter reverse and vector from != ctx to size)",from_n,ctx->to_n);
99: } else {
100: if (to_n != ctx->to_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter forward and vector to != ctx to size)",to_n,ctx->to_n);
101: if (from_n != ctx->from_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter forward and vector from != ctx from size)",from_n,ctx->from_n);
102: }
103: }
104: #endif
106: ctx->inuse = PETSC_TRUE;
107: PetscLogEventBegin(VEC_ScatterBegin,ctx,x,y,0);
108: (*ctx->ops->begin)(ctx,x,y,addv,mode);
109: if (ctx->beginandendtogether && ctx->ops->end) {
110: ctx->inuse = PETSC_FALSE;
111: (*ctx->ops->end)(ctx,x,y,addv,mode);
112: }
113: PetscLogEventEnd(VEC_ScatterBegin,ctx,x,y,0);
114: return(0);
115: }
117: /* --------------------------------------------------------------------*/
118: /*@
119: VecScatterEnd - Ends a generalized scatter from one vector to another. Call
120: after first calling VecScatterBegin().
122: Neighbor-wise Collective on VecScatter124: Input Parameters:
125: + ctx - scatter context generated by VecScatterCreate()
126: . x - the vector from which we scatter
127: . y - the vector to which we scatter
128: . addv - one of ADD_VALUES, MAX_VALUES, MIN_VALUES or INSERT_VALUES129: - mode - the scattering mode, usually SCATTER_FORWARD. The available modes are:
130: SCATTER_FORWARD, SCATTER_REVERSE132: Level: intermediate
134: Notes:
135: If you use SCATTER_REVERSE the arguments x and y should be reversed, from the SCATTER_FORWARD.
137: y[iy[i]] = x[ix[i]], for i=0,...,ni-1
139: .seealso: VecScatterBegin(), VecScatterCreate()
140: @*/
141: PetscErrorCodeVecScatterEnd(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)142: {
149: ctx->inuse = PETSC_FALSE;
150: if (!ctx->ops->end) return(0);
151: if (!ctx->beginandendtogether) {
152: PetscLogEventBegin(VEC_ScatterEnd,ctx,x,y,0);
153: (*(ctx)->ops->end)(ctx,x,y,addv,mode);
154: PetscLogEventEnd(VEC_ScatterEnd,ctx,x,y,0);
155: }
156: return(0);
157: }
159: /*@
160: VecScatterDestroy - Destroys a scatter context created by VecScatterCreate()
162: Collective on VecScatter164: Input Parameter:
165: . ctx - the scatter context
167: Level: intermediate
169: .seealso: VecScatterCreate(), VecScatterCopy()
170: @*/
171: PetscErrorCodeVecScatterDestroy(VecScatter *ctx)172: {
176: if (!*ctx) return(0);
178: if ((*ctx)->inuse && ((PetscObject)(*ctx))->refct == 1) SETERRQ(((PetscObject)(*ctx))->comm,PETSC_ERR_ARG_WRONGSTATE,"Scatter context is in use");
179: if (--((PetscObject)(*ctx))->refct > 0) {*ctx = 0; return(0);}
181: /* if memory was published with SAWs then destroy it */
182: PetscObjectSAWsViewOff((PetscObject)(*ctx));
183: if ((*ctx)->ops->destroy) {(*(*ctx)->ops->destroy)(*ctx);}
184: #if defined(PETSC_HAVE_CUDA)
185: VecScatterCUDAIndicesDestroy((PetscCUDAIndices*)&((*ctx)->spptr));
186: #endif
187: PetscHeaderDestroy(ctx);
188: return(0);
189: }
191: /*@
192: VecScatterSetUp - Sets up the VecScatter to be able to actually scatter information between vectors
194: Collective on VecScatter196: Input Parameter:
197: . ctx - the scatter context
199: Level: intermediate
201: .seealso: VecScatterCreate(), VecScatterCopy()
202: @*/
203: PetscErrorCodeVecScatterSetUp(VecScatter ctx)204: {
209: (*ctx->ops->setup)(ctx);
210: return(0);
211: }
213: /*@
214: VecScatterCopy - Makes a copy of a scatter context.
216: Collective on VecScatter218: Input Parameter:
219: . sctx - the scatter context
221: Output Parameter:
222: . ctx - the context copy
224: Level: advanced
226: .seealso: VecScatterCreate(), VecScatterDestroy()
227: @*/
228: PetscErrorCodeVecScatterCopy(VecScatter sctx,VecScatter *ctx)229: {
231: VecScatterType type;
236: if (!sctx->ops->copy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot copy this type");
237: PetscHeaderCreate(*ctx,VEC_SCATTER_CLASSID,"VecScatter","VecScatter","Vec",PetscObjectComm((PetscObject)sctx),VecScatterDestroy,VecScatterView);
238: (*ctx)->to_n = sctx->to_n;
239: (*ctx)->from_n = sctx->from_n;
240: (*sctx->ops->copy)(sctx,*ctx);
242: VecScatterGetType(sctx,&type);
243: PetscObjectChangeTypeName((PetscObject)(*ctx),type);
244: return(0);
245: }
247: /* ------------------------------------------------------------------*/
248: /*@C
249: VecScatterView - Views a vector scatter context.
251: Collective on VecScatter253: Input Parameters:
254: + ctx - the scatter context
255: - viewer - the viewer for displaying the context
257: Level: intermediate
259: @*/
260: PetscErrorCodeVecScatterView(VecScatter ctx,PetscViewer viewer)261: {
266: if (!viewer) {
267: PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)ctx),&viewer);
268: }
270: if (ctx->ops->view) {
271: (*ctx->ops->view)(ctx,viewer);
272: }
273: return(0);
274: }
276: /*@C
277: VecScatterRemap - Remaps the "from" and "to" indices in a
278: vector scatter context. FOR EXPERTS ONLY!
280: Collective on VecScatter282: Input Parameters:
283: + scat - vector scatter context
284: . tomap - remapping plan for "to" indices (may be NULL).
285: - frommap - remapping plan for "from" indices (may be NULL)
287: Level: developer
289: Notes:
290: In the parallel case the todata contains indices from where the data is taken
291: (and then sent to others)! The fromdata contains indices from where the received
292: data is finally put locally.
294: In the sequential case the todata contains indices from where the data is put
295: and the fromdata contains indices from where the data is taken from.
296: This is backwards from the paralllel case!
298: @*/
299: PetscErrorCodeVecScatterRemap(VecScatter scat,PetscInt tomap[],PetscInt frommap[])300: {
301: VecScatter_MPI_General *to,*from;
302: VecScatter_Seq_General *sgto,*sgfrom;
303: VecScatter_Seq_Stride *ssto;
304: PetscInt i,ierr;
311: if (scat->ops->remap) {
312: (*scat->ops->remap)(scat,tomap,frommap);
313: } else {
314: to = (VecScatter_MPI_General*)scat->todata;
315: from = (VecScatter_MPI_General*)scat->fromdata;
316: ssto = (VecScatter_Seq_Stride*)scat->todata;
317: sgto = (VecScatter_Seq_General*)scat->todata;
318: sgfrom = (VecScatter_Seq_General*)scat->fromdata;
320: /* remap indices from where we take/read data */
321: if (tomap) {
322: if (to->format == VEC_SCATTER_MPI_TOALL) {
323: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Not for to all scatter");
324: } else if (to->format == VEC_SCATTER_MPI_GENERAL) {
325: /* handle off processor parts */
326: for (i=0; i<to->starts[to->n]; i++) to->indices[i] = tomap[to->indices[i]];
328: /* handle local part */
329: for (i=0; i<to->local.n; i++) to->local.vslots[i] = tomap[to->local.vslots[i]];
331: /* the memcpy optimizations in vecscatter was based on index patterns it has.
332: They need to be recalculated when indices are changed (remapped).
333: */
334: VecScatterMemcpyPlanDestroy_PtoP(to,from);
335: VecScatterMemcpyPlanCreate_PtoP(to,from);
336: } else if (sgfrom->format == VEC_SCATTER_SEQ_GENERAL) {
337: /* remap indices*/
338: for (i=0; i<sgfrom->n; i++) sgfrom->vslots[i] = tomap[sgfrom->vslots[i]];
339: /* update optimizations, which happen when it is a Stride1toSG, SGtoStride1 or SGToSG vecscatter */
340: if (ssto->format == VEC_SCATTER_SEQ_STRIDE && ssto->step == 1) {
341: PetscInt tmp[2];
342: tmp[0] = 0; tmp[1] = sgfrom->n;
343: VecScatterMemcpyPlanDestroy(&sgfrom->memcpy_plan);
344: VecScatterMemcpyPlanCreate_Index(1,tmp,sgfrom->vslots,1/*bs*/,&sgfrom->memcpy_plan);
345: } else if (sgto->format == VEC_SCATTER_SEQ_GENERAL) {
346: VecScatterMemcpyPlanDestroy(&sgto->memcpy_plan);
347: VecScatterMemcpyPlanDestroy(&sgfrom->memcpy_plan);
348: VecScatterMemcpyPlanCreate_SGToSG(1/*bs*/,sgto,sgfrom);
349: }
350: } else if (sgfrom->format == VEC_SCATTER_SEQ_STRIDE) {
351: VecScatter_Seq_Stride *ssto = (VecScatter_Seq_Stride*)sgfrom;
353: /* if the remapping is the identity and stride is identity then skip remap */
354: if (ssto->step == 1 && ssto->first == 0) {
355: for (i=0; i<ssto->n; i++) {
356: if (tomap[i] != i) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Unable to remap such scatters");
357: }
358: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Unable to remap such scatters");
359: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Unable to remap such scatters");
360: }
361: }
362: if (frommap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unable to remap the FROM in scatters yet");
364: /*
365: Mark then vector lengths as unknown because we do not know the
366: lengths of the remapped vectors
367: */
368: scat->from_n = -1;
369: scat->to_n = -1;
370: return(0);
371: }
373: /* Given a parallel VecScatter context, return number of procs and vector entries involved in remote (i.e., off-process) communication
375: Input Parameters:
376: + ctx - the context (must be a parallel vecscatter)
377: - send - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)
379: Output parameters:
380: + num_procs - number of remote processors
381: - num_entries - number of vector entries to send or recv
384: .seealso: VecScatterGetRemote_Private(), VecScatterGetRemoteOrdered_Private()
386: Notes:
387: Sometimes PETSc internally needs to use the matrix-vector-multiply vecscatter context for other purposes. The client code
388: usually only uses MPI_Send/Recv. This group of subroutines provides info needed for such uses.
389: */
390: PetscErrorCode VecScatterGetRemoteCount_Private(VecScatter ctx,PetscBool send,PetscInt *num_procs,PetscInt *num_entries)391: {
392: VecScatter_MPI_General *vs;
393: PetscBool par;
394: PetscErrorCode ierr;
397: if (ctx->ops->getremotecount) {
398: (*ctx->ops->getremotecount)(ctx,send,num_procs,num_entries);
399: } else {
400: vs = (VecScatter_MPI_General*)(send ? ctx->todata : ctx->fromdata);
401: par = (vs->format == VEC_SCATTER_MPI_GENERAL)? PETSC_TRUE : PETSC_FALSE;
402: if (num_procs) *num_procs = par ? vs->n : 0;
403: if (num_entries) *num_entries = par ? vs->starts[vs->n] : 0;
404: }
405: return(0);
406: }
408: /* Given a parallel VecScatter context, return a plan that represents the remote communication.
409: Any output parameter can be NULL.
411: Input Parameters:
412: + ctx - the context
413: - send - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)
415: Output parameters:
416: + n - number of remote processors
417: . starts - starting point in indices for each proc. ATTENTION: starts[0] is not necessarily zero.
418: Therefore, expressions like starts[i+1]-starts[i] and indices[starts[i]+j] work as
419: expected for a CSR structure but buf[starts[i]+j] may be out of range if buf was allocated
420: with length starts[n]-starts[0]. One should use buf[starts[i]-starts[0]+j] instead.
421: . indices - indices of entries to send/recv
422: . procs - ranks of remote processors
423: - bs - block size
425: .seealso: VecScatterRestoreRemote_Private(), VecScatterGetRemoteOrdered_Private()
426: */
427: PetscErrorCode VecScatterGetRemote_Private(VecScatter ctx,PetscBool send,PetscInt *n,const PetscInt **starts,const PetscInt **indices,const PetscMPIInt **procs,PetscInt *bs)428: {
429: VecScatter_MPI_General *vs;
430: PetscBool par;
431: PetscErrorCode ierr;
434: if (ctx->ops->getremote) {
435: (*ctx->ops->getremote)(ctx,send,n,starts,indices,procs,bs);
436: } else {
437: vs = (VecScatter_MPI_General*)(send ? ctx->todata : ctx->fromdata);
438: par = (vs->format == VEC_SCATTER_MPI_GENERAL)? PETSC_TRUE : PETSC_FALSE;
439: if (n) *n = par ? vs->n : 0;
440: if (indices) *indices = par ? vs->indices : NULL;
441: if (starts) *starts = par ? vs->starts : NULL;
442: if (procs) *procs = par ? vs->procs : NULL;
443: if (bs) *bs = par ? vs->bs : 0;
444: }
445: return(0);
446: }
449: /* Given a parallel VecScatter context, return a plan that represents the remote communication. Ranks of remote
450: processors returned in procs must be sorted in ascending order. Any output parameter can be NULL.
452: Input Parameters:
453: + ctx - the context
454: - send - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)
456: Output parameters:
457: + n - number of remote processors
458: . starts - starting point in indices for each proc. ATTENTION: starts[0] is not necessarily zero.
459: Therefore, expressions like starts[i+1]-starts[i] and indices[starts[i]+j] work as
460: expected for a CSR structure but buf[starts[i]+j] may be out of range if buf was allocated
461: with length starts[n]-starts[0]. One should use buf[starts[i]-starts[0]+j] instead.
462: . indices - indices of entries to send/recv
463: . procs - ranks of remote processors
464: - bs - block size
466: .seealso: VecScatterRestoreRemoteOrdered_Private(), VecScatterGetRemote_Private()
468: Notes:
469: Output parameters like starts, indices must also be adapted according to the sorted ranks.
470: */
471: PetscErrorCode VecScatterGetRemoteOrdered_Private(VecScatter ctx,PetscBool send,PetscInt *n,const PetscInt **starts,const PetscInt **indices,const PetscMPIInt **procs,PetscInt *bs)472: {
473: VecScatter_MPI_General *vs;
474: PetscBool par;
475: PetscErrorCode ierr;
478: if (ctx->ops->getremoteordered) {
479: (*ctx->ops->getremoteordered)(ctx,send,n,starts,indices,procs,bs);
480: } else {
481: vs = (VecScatter_MPI_General*)(send ? ctx->todata : ctx->fromdata);
482: par = (vs->format == VEC_SCATTER_MPI_GENERAL)? PETSC_TRUE : PETSC_FALSE;
483: if (n) *n = par ? vs->n : 0;
484: if (indices) *indices = par ? vs->indices : NULL;
485: if (starts) *starts = par ? vs->starts : NULL;
486: if (procs) *procs = par ? vs->procs : NULL;
487: if (bs) *bs = par ? vs->bs : 0;
488: }
489: #if defined(PETSC_USE_DEBUG)
490: if (n && procs) {
491: PetscInt i;
492: /* from back to front to also handle cases *n=0 */
493: for (i=*n-1; i>0; i--) { if ((*procs)[i-1] > (*procs)[i]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"procs[] are not ordered"); }
494: }
495: #endif
496: return(0);
497: }
499: /* Given a parallel VecScatter context, restore the plan returned by VecScatterGetRemote_Private. This gives a chance for
500: an implementation to free memory allocated in the VecScatterGetRemote_Private call.
502: Input Parameters:
503: + ctx - the context
504: - send - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)
506: Output parameters:
507: + n - number of remote processors
508: . starts - starting point in indices for each proc
509: . indices - indices of entries to send/recv
510: . procs - ranks of remote processors
511: - bs - block size
513: .seealso: VecScatterGetRemote_Private()
514: */
515: PetscErrorCode VecScatterRestoreRemote_Private(VecScatter ctx,PetscBool send,PetscInt *n,const PetscInt **starts,const PetscInt **indices,const PetscMPIInt **procs,PetscInt *bs)516: {
517: PetscErrorCode ierr;
520: if (ctx->ops->restoreremote) {
521: (*ctx->ops->restoreremote)(ctx,send,n,starts,indices,procs,bs);
522: } else {
523: if (starts) *starts = NULL;
524: if (indices) *indices = NULL;
525: if (procs) *procs = NULL;
526: }
527: return(0);
528: }
530: /* Given a parallel VecScatter context, restore the plan returned by VecScatterGetRemoteOrdered_Private. This gives a chance for
531: an implementation to free memory allocated in the VecScatterGetRemoteOrdered_Private call.
533: Input Parameters:
534: + ctx - the context
535: - send - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)
537: Output parameters:
538: + n - number of remote processors
539: . starts - starting point in indices for each proc
540: . indices - indices of entries to send/recv
541: . procs - ranks of remote processors
542: - bs - block size
544: .seealso: VecScatterGetRemoteOrdered_Private()
545: */
546: PetscErrorCode VecScatterRestoreRemoteOrdered_Private(VecScatter ctx,PetscBool send,PetscInt *n,const PetscInt **starts,const PetscInt **indices,const PetscMPIInt **procs,PetscInt *bs)547: {
550: if (ctx->ops->restoreremoteordered) {
551: (*ctx->ops->restoreremoteordered)(ctx,send,n,starts,indices,procs,bs);
552: } else {
553: VecScatterRestoreRemote_Private(ctx,send,n,starts,indices,procs,bs);
554: }
555: return(0);
556: }
558: #if defined(PETSC_HAVE_CUDA)
560: /*@C
561: VecScatterInitializeForGPU - Initializes a generalized scatter from one vector
562: to another for GPU based computation.
564: Input Parameters:
565: + inctx - scatter context generated by VecScatterCreate()
566: - x - the vector from which we scatter
568: Level: intermediate
570: Notes:
571: Effectively, this function creates all the necessary indexing buffers and work
572: vectors needed to move only those data points in a vector which need to
573: be communicated across ranks. This is done at the first time this function is
574: called. Currently, this only used in the context of the parallel SpMV call in
575: MatMult_MPIAIJCUSPARSE.
577: This function is executed before the call to MatMult. This enables the memory
578: transfers to be overlapped with the MatMult SpMV kernel call.
580: .seealso: VecScatterFinalizeForGPU(), VecScatterCreate(), VecScatterEnd()
581: @*/
582: PETSC_EXTERN PetscErrorCodeVecScatterInitializeForGPU(VecScatter inctx,Vec x)583: {
585: PetscInt i,nrecvs,nsends,sbs,rbs,ns,nr;
586: const PetscInt *sstarts,*rstarts,*sindices,*rindices;
589: if (use_gpu_aware_mpi) return(0);
590: VecScatterGetRemote_Private(inctx,PETSC_TRUE/*send*/, &nsends,&sstarts,&sindices,NULL/*procs*/,&sbs);
591: VecScatterGetRemote_Private(inctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,&rindices,NULL/*procs*/,&rbs);
592: ns = nsends ? sstarts[nsends]-sstarts[0] : 0; /* s/rstarts[0] is not necessarily zero */
593: nr = nrecvs ? rstarts[nrecvs]-rstarts[0] : 0;
595: if (x->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (nsends>0 || nrecvs>0)) {
596: if (!inctx->spptr) {
597: PetscInt k,*tindicesSends,*sindicesSends,*tindicesRecvs,*sindicesRecvs;
598: /* Here we create indices for both the senders and receivers. */
599: PetscMalloc1(ns,&tindicesSends);
600: PetscMalloc1(nr,&tindicesRecvs);
602: /* s/rindices and s/rstarts could be NULL when ns or nr is zero */
603: if (ns) {PetscArraycpy(tindicesSends,&sindices[sstarts[0]],ns);}
604: if (nr) {PetscArraycpy(tindicesRecvs,&rindices[rstarts[0]],nr);}
606: PetscSortRemoveDupsInt(&ns,tindicesSends);
607: PetscSortRemoveDupsInt(&nr,tindicesRecvs);
609: PetscMalloc1(sbs*ns,&sindicesSends);
610: PetscMalloc1(rbs*nr,&sindicesRecvs);
612: /* sender indices */
613: for (i=0; i<ns; i++) {
614: for (k=0; k<sbs; k++) sindicesSends[i*sbs+k] = tindicesSends[i]+k;
615: }
616: PetscFree(tindicesSends);
618: /* receiver indices */
619: for (i=0; i<nr; i++) {
620: for (k=0; k<rbs; k++) sindicesRecvs[i*rbs+k] = tindicesRecvs[i]+k;
621: }
622: PetscFree(tindicesRecvs);
624: /* create GPU indices, work vectors, ... */
625: VecScatterCUDAIndicesCreate_PtoP(ns*sbs,sindicesSends,nr*rbs,sindicesRecvs,(PetscCUDAIndices*)&inctx->spptr);
626: PetscFree(sindicesSends);
627: PetscFree(sindicesRecvs);
628: }
629: }
630: return(0);
631: }
633: /*@C
634: VecScatterFinalizeForGPU - Finalizes a generalized scatter from one vector to
635: another for GPU based computation.
637: Input Parameter:
638: . inctx - scatter context generated by VecScatterCreate()
640: Level: intermediate
642: Notes:
643: Effectively, this function resets the temporary buffer flags. Currently, this
644: only used in the context of the parallel SpMV call in in MatMult_MPIAIJCUSPARSE
645: Once the MatMultAdd is finished, the GPU temporary buffers used for messaging are no longer valid.
647: .seealso: VecScatterInitializeForGPU(), VecScatterCreate(), VecScatterEnd()
648: @*/
649: PETSC_EXTERN PetscErrorCodeVecScatterFinalizeForGPU(VecScatter inctx)650: {
652: return(0);
653: }
655: #endif