1: #include <petsc/private/vecscatterimpl.h> 2: #if defined(PETSC_HAVE_CUDA)
3: #include <../src/vec/vec/impls/seq/seqcuda/cudavecimpl.h> 4: #endif
5: /* ------------------------------------------------------------------*/
6: /*@
7: VecScatterGetMerged - Returns true if the scatter is completed in the VecScatterBegin()
8: and the VecScatterEnd() does nothing
10: Not Collective
12: Input Parameter:
13: . ctx - scatter context created with VecScatterCreate()
15: Output Parameter:
16: . flg - PETSC_TRUE if the VecScatterBegin/End() are all done during the VecScatterBegin()
18: Level: developer
20: .seealso: VecScatterCreate(), VecScatterEnd(), VecScatterBegin()
21: @*/
22: PetscErrorCodeVecScatterGetMerged(VecScatter ctx,PetscBool *flg) 23: {
26: *flg = ctx->beginandendtogether;
27: return(0);
28: }
30: /*@
31: VecScatterBegin - Begins a generalized scatter from one vector to
32: another. Complete the scattering phase with VecScatterEnd().
34: Neighbor-wise Collective on VecScatter and Vec 36: Input Parameters:
37: + ctx - scatter context generated by VecScatterCreate()
38: . x - the vector from which we scatter
39: . y - the vector to which we scatter
40: . addv - either ADD_VALUES or INSERT_VALUES, with INSERT_VALUES mode any location
41: not scattered to retains its old value; i.e. the vector is NOT first zeroed.
42: - mode - the scattering mode, usually SCATTER_FORWARD. The available modes are:
43: SCATTER_FORWARD or SCATTER_REVERSE 46: Level: intermediate
48: Options Database: See VecScatterCreate()
50: Notes:
51: The vectors x and y need not be the same vectors used in the call
52: to VecScatterCreate(), but x must have the same parallel data layout
53: as that passed in as the x to VecScatterCreate(), similarly for the y.
54: Most likely they have been obtained from VecDuplicate().
56: You cannot change the values in the input vector between the calls to VecScatterBegin()
57: and VecScatterEnd().
59: If you use SCATTER_REVERSE the two arguments x and y should be reversed, from
60: the SCATTER_FORWARD.
62: y[iy[i]] = x[ix[i]], for i=0,...,ni-1
64: This scatter is far more general than the conventional
65: scatter, since it can be a gather or a scatter or a combination,
66: depending on the indices ix and iy. If x is a parallel vector and y
67: is sequential, VecScatterBegin() can serve to gather values to a
68: single processor. Similarly, if y is parallel and x sequential, the
69: routine can scatter from one processor to many processors.
71: Concepts: scatter^between vectors
72: Concepts: gather^between vectors
74: .seealso: VecScatterCreate(), VecScatterEnd()
75: @*/
76: PetscErrorCodeVecScatterBegin(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode) 77: {
79: #if defined(PETSC_USE_DEBUG)
80: PetscInt to_n,from_n;
81: #endif
86: if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
88: #if defined(PETSC_USE_DEBUG)
89: /*
90: Error checking to make sure these vectors match the vectors used
91: to create the vector scatter context. -1 in the from_n and to_n indicate the
92: vector lengths are unknown (for example with mapped scatters) and thus
93: no error checking is performed.
94: */
95: if (ctx->from_n >= 0 && ctx->to_n >= 0) {
96: VecGetLocalSize(x,&from_n);
97: VecGetLocalSize(y,&to_n);
98: if (mode & SCATTER_REVERSE) {
99: if (to_n != ctx->from_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter reverse and vector to != ctx from size)",to_n,ctx->from_n);
100: if (from_n != ctx->to_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter reverse and vector from != ctx to size)",from_n,ctx->to_n);
101: } else {
102: if (to_n != ctx->to_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter forward and vector to != ctx to size)",to_n,ctx->to_n);
103: if (from_n != ctx->from_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter forward and vector from != ctx from size)",from_n,ctx->from_n);
104: }
105: }
106: #endif
108: ctx->inuse = PETSC_TRUE;
109: PetscLogEventBegin(VEC_ScatterBegin,ctx,x,y,0);
110: (*ctx->ops->begin)(ctx,x,y,addv,mode);
111: if (ctx->beginandendtogether && ctx->ops->end) {
112: ctx->inuse = PETSC_FALSE;
113: (*ctx->ops->end)(ctx,x,y,addv,mode);
114: }
115: PetscLogEventEnd(VEC_ScatterBegin,ctx,x,y,0);
116: return(0);
117: }
119: /* --------------------------------------------------------------------*/
120: /*@
121: VecScatterEnd - Ends a generalized scatter from one vector to another. Call
122: after first calling VecScatterBegin().
124: Neighbor-wise Collective on VecScatter and Vec126: Input Parameters:
127: + ctx - scatter context generated by VecScatterCreate()
128: . x - the vector from which we scatter
129: . y - the vector to which we scatter
130: . addv - either ADD_VALUES or INSERT_VALUES.
131: - mode - the scattering mode, usually SCATTER_FORWARD. The available modes are:
132: SCATTER_FORWARD, SCATTER_REVERSE134: Level: intermediate
136: Notes:
137: If you use SCATTER_REVERSE the arguments x and y should be reversed, from the SCATTER_FORWARD.
139: y[iy[i]] = x[ix[i]], for i=0,...,ni-1
141: .seealso: VecScatterBegin(), VecScatterCreate()
142: @*/
143: PetscErrorCodeVecScatterEnd(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)144: {
151: ctx->inuse = PETSC_FALSE;
152: if (!ctx->ops->end) return(0);
153: if (!ctx->beginandendtogether) {
154: PetscLogEventBegin(VEC_ScatterEnd,ctx,x,y,0);
155: (*(ctx)->ops->end)(ctx,x,y,addv,mode);
156: PetscLogEventEnd(VEC_ScatterEnd,ctx,x,y,0);
157: }
158: return(0);
159: }
161: /*@
162: VecScatterDestroy - Destroys a scatter context created by VecScatterCreate()
164: Collective on VecScatter166: Input Parameter:
167: . ctx - the scatter context
169: Level: intermediate
171: .seealso: VecScatterCreate(), VecScatterCopy()
172: @*/
173: PetscErrorCodeVecScatterDestroy(VecScatter *ctx)174: {
178: if (!*ctx) return(0);
180: if ((*ctx)->inuse && ((PetscObject)(*ctx))->refct == 1) SETERRQ(((PetscObject)(*ctx))->comm,PETSC_ERR_ARG_WRONGSTATE,"Scatter context is in use");
181: if (--((PetscObject)(*ctx))->refct > 0) {*ctx = 0; return(0);}
183: /* if memory was published with SAWs then destroy it */
184: PetscObjectSAWsViewOff((PetscObject)(*ctx));
185: if ((*ctx)->ops->destroy) {(*(*ctx)->ops->destroy)(*ctx);}
186: #if defined(PETSC_HAVE_CUDA)
187: VecScatterCUDAIndicesDestroy((PetscCUDAIndices*)&((*ctx)->spptr));
188: #endif
189: PetscHeaderDestroy(ctx);
190: return(0);
191: }
193: /*@
194: VecScatterSetUp - Sets up the VecScatter to be able to actually scatter information between vectors
196: Collective on VecScatter198: Input Parameter:
199: . ctx - the scatter context
201: Level: intermediate
203: .seealso: VecScatterCreate(), VecScatterCopy()
204: @*/
205: PetscErrorCodeVecScatterSetUp(VecScatter ctx)206: {
211: (*ctx->ops->setup)(ctx);
212: return(0);
213: }
215: /*@
216: VecScatterCopy - Makes a copy of a scatter context.
218: Collective on VecScatter220: Input Parameter:
221: . sctx - the scatter context
223: Output Parameter:
224: . ctx - the context copy
226: Level: advanced
228: .seealso: VecScatterCreate(), VecScatterDestroy()
229: @*/
230: PetscErrorCodeVecScatterCopy(VecScatter sctx,VecScatter *ctx)231: {
233: VecScatterType type;
238: if (!sctx->ops->copy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot copy this type");
239: PetscHeaderCreate(*ctx,VEC_SCATTER_CLASSID,"VecScatter","VecScatter","Vec",PetscObjectComm((PetscObject)sctx),VecScatterDestroy,VecScatterView);
240: (*ctx)->to_n = sctx->to_n;
241: (*ctx)->from_n = sctx->from_n;
242: (*sctx->ops->copy)(sctx,*ctx);
244: VecScatterGetType(sctx,&type);
245: PetscObjectChangeTypeName((PetscObject)(*ctx),type);
246: return(0);
247: }
249: /* ------------------------------------------------------------------*/
250: /*@C
251: VecScatterView - Views a vector scatter context.
253: Collective on VecScatter255: Input Parameters:
256: + ctx - the scatter context
257: - viewer - the viewer for displaying the context
259: Level: intermediate
261: @*/
262: PetscErrorCodeVecScatterView(VecScatter ctx,PetscViewer viewer)263: {
268: if (!viewer) {
269: PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)ctx),&viewer);
270: }
272: if (ctx->ops->view) {
273: (*ctx->ops->view)(ctx,viewer);
274: }
275: return(0);
276: }
278: /*@C
279: VecScatterRemap - Remaps the "from" and "to" indices in a
280: vector scatter context. FOR EXPERTS ONLY!
282: Collective on VecScatter284: Input Parameters:
285: + scat - vector scatter context
286: . tomap - remapping plan for "to" indices (may be NULL).
287: - frommap - remapping plan for "from" indices (may be NULL)
289: Level: developer
291: Notes:
292: In the parallel case the todata contains indices from where the data is taken
293: (and then sent to others)! The fromdata contains indices from where the received
294: data is finally put locally.
296: In the sequential case the todata contains indices from where the data is put
297: and the fromdata contains indices from where the data is taken from.
298: This is backwards from the paralllel case!
300: @*/
301: PetscErrorCodeVecScatterRemap(VecScatter scat,PetscInt tomap[],PetscInt frommap[])302: {
303: VecScatter_MPI_General *to,*from;
304: VecScatter_Seq_General *sgto,*sgfrom;
305: VecScatter_Seq_Stride *ssto;
306: PetscInt i,ierr;
313: if (scat->ops->remap) {
314: (*scat->ops->remap)(scat,tomap,frommap);
315: } else {
316: to = (VecScatter_MPI_General*)scat->todata;
317: from = (VecScatter_MPI_General*)scat->fromdata;
318: ssto = (VecScatter_Seq_Stride*)scat->todata;
319: sgto = (VecScatter_Seq_General*)scat->todata;
320: sgfrom = (VecScatter_Seq_General*)scat->fromdata;
322: /* remap indices from where we take/read data */
323: if (tomap) {
324: if (to->format == VEC_SCATTER_MPI_TOALL) {
325: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Not for to all scatter");
326: } else if (to->format == VEC_SCATTER_MPI_GENERAL) {
327: /* handle off processor parts */
328: for (i=0; i<to->starts[to->n]; i++) to->indices[i] = tomap[to->indices[i]];
330: /* handle local part */
331: for (i=0; i<to->local.n; i++) to->local.vslots[i] = tomap[to->local.vslots[i]];
333: /* the memcpy optimizations in vecscatter was based on index patterns it has.
334: They need to be recalculated when indices are changed (remapped).
335: */
336: VecScatterMemcpyPlanDestroy_PtoP(to,from);
337: VecScatterMemcpyPlanCreate_PtoP(to,from);
338: } else if (sgfrom->format == VEC_SCATTER_SEQ_GENERAL) {
339: /* remap indices*/
340: for (i=0; i<sgfrom->n; i++) sgfrom->vslots[i] = tomap[sgfrom->vslots[i]];
341: /* update optimizations, which happen when it is a Stride1toSG, SGtoStride1 or SGToSG vecscatter */
342: if (ssto->format == VEC_SCATTER_SEQ_STRIDE && ssto->step == 1) {
343: PetscInt tmp[2];
344: tmp[0] = 0; tmp[1] = sgfrom->n;
345: VecScatterMemcpyPlanDestroy(&sgfrom->memcpy_plan);
346: VecScatterMemcpyPlanCreate_Index(1,tmp,sgfrom->vslots,1/*bs*/,&sgfrom->memcpy_plan);
347: } else if (sgto->format == VEC_SCATTER_SEQ_GENERAL) {
348: VecScatterMemcpyPlanDestroy(&sgto->memcpy_plan);;
349: VecScatterMemcpyPlanDestroy(&sgfrom->memcpy_plan);
350: VecScatterMemcpyPlanCreate_SGToSG(1/*bs*/,sgto,sgfrom);
351: }
352: } else if (sgfrom->format == VEC_SCATTER_SEQ_STRIDE) {
353: VecScatter_Seq_Stride *ssto = (VecScatter_Seq_Stride*)sgfrom;
355: /* if the remapping is the identity and stride is identity then skip remap */
356: if (ssto->step == 1 && ssto->first == 0) {
357: for (i=0; i<ssto->n; i++) {
358: if (tomap[i] != i) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Unable to remap such scatters");
359: }
360: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Unable to remap such scatters");
361: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Unable to remap such scatters");
362: }
363: }
364: if (frommap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unable to remap the FROM in scatters yet");
366: /*
367: Mark then vector lengths as unknown because we do not know the
368: lengths of the remapped vectors
369: */
370: scat->from_n = -1;
371: scat->to_n = -1;
372: return(0);
373: }
375: /* Given a parallel VecScatter context, return number of procs and vector entries involved in remote (i.e., off-process) communication
377: Input Parameters:
378: + ctx - the context (must be a parallel vecscatter)
379: - send - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)
381: Output parameters:
382: + num_procs - number of remote processors
383: - num_entries - number of vector entries to send or recv
386: .seealso: VecScatterGetRemote_Private(), VecScatterGetRemoteOrdered_Private()
388: Notes:
389: Sometimes PETSc internally needs to use the matrix-vector-multiply vecscatter context for other purposes. The client code
390: usually only uses MPI_Send/Recv. This group of subroutines provides info needed for such uses.
391: */
392: PetscErrorCode VecScatterGetRemoteCount_Private(VecScatter ctx,PetscBool send,PetscInt *num_procs,PetscInt *num_entries)393: {
394: VecScatter_MPI_General *vs;
395: PetscBool par;
396: PetscErrorCode ierr;
399: if (ctx->ops->getremotecount) {
400: (*ctx->ops->getremotecount)(ctx,send,num_procs,num_entries);
401: } else {
402: vs = (VecScatter_MPI_General*)(send ? ctx->todata : ctx->fromdata);
403: par = (vs->format == VEC_SCATTER_MPI_GENERAL)? PETSC_TRUE : PETSC_FALSE;
404: if (num_procs) *num_procs = par ? vs->n : 0;
405: if (num_entries) *num_entries = par ? vs->starts[vs->n] : 0;
406: }
407: return(0);
408: }
410: /* Given a parallel VecScatter context, return a plan that represents the remote communication.
411: Any output parameter can be NULL.
413: Input Parameters:
414: + ctx - the context
415: - send - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)
417: Output parameters:
418: + n - number of remote processors
419: . starts - starting point in indices for each proc. ATTENTION: starts[0] is not necessarily zero.
420: Therefore, expressions like starts[i+1]-starts[i] and indices[starts[i]+j] work as
421: expected for a CSR structure but buf[starts[i]+j] may be out of range if buf was allocated
422: with length starts[n]-starts[0]. One should use buf[starts[i]-starts[0]+j] instead.
423: . indices - indices of entries to send/recv
424: . procs - ranks of remote processors
425: - bs - block size
427: .seealso: VecScatterRestoreRemote_Private(), VecScatterGetRemoteOrdered_Private()
428: */
429: PetscErrorCode VecScatterGetRemote_Private(VecScatter ctx,PetscBool send,PetscInt *n,const PetscInt **starts,const PetscInt **indices,const PetscMPIInt **procs,PetscInt *bs)430: {
431: VecScatter_MPI_General *vs;
432: PetscBool par;
433: PetscErrorCode ierr;
436: if (ctx->ops->getremote) {
437: (*ctx->ops->getremote)(ctx,send,n,starts,indices,procs,bs);
438: } else {
439: vs = (VecScatter_MPI_General*)(send ? ctx->todata : ctx->fromdata);
440: par = (vs->format == VEC_SCATTER_MPI_GENERAL)? PETSC_TRUE : PETSC_FALSE;
441: if (n) *n = par ? vs->n : 0;
442: if (indices) *indices = par ? vs->indices : NULL;
443: if (starts) *starts = par ? vs->starts : NULL;
444: if (procs) *procs = par ? vs->procs : NULL;
445: if (bs) *bs = par ? vs->bs : 0;
446: }
447: return(0);
448: }
451: /* Given a parallel VecScatter context, return a plan that represents the remote communication. Ranks of remote
452: processors returned in procs must be sorted in ascending order. Any output parameter can be NULL.
454: Input Parameters:
455: + ctx - the context
456: - send - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)
458: Output parameters:
459: + n - number of remote processors
460: . starts - starting point in indices for each proc. ATTENTION: starts[0] is not necessarily zero.
461: Therefore, expressions like starts[i+1]-starts[i] and indices[starts[i]+j] work as
462: expected for a CSR structure but buf[starts[i]+j] may be out of range if buf was allocated
463: with length starts[n]-starts[0]. One should use buf[starts[i]-starts[0]+j] instead.
464: . indices - indices of entries to send/recv
465: . procs - ranks of remote processors
466: - bs - block size
468: .seealso: VecScatterRestoreRemoteOrdered_Private(), VecScatterGetRemote_Private()
470: Notes:
471: Output parameters like starts, indices must also be adapted according to the sorted ranks.
472: */
473: PetscErrorCode VecScatterGetRemoteOrdered_Private(VecScatter ctx,PetscBool send,PetscInt *n,const PetscInt **starts,const PetscInt **indices,const PetscMPIInt **procs,PetscInt *bs)474: {
475: VecScatter_MPI_General *vs;
476: PetscBool par;
477: PetscErrorCode ierr;
480: if (ctx->ops->getremoteordered) {
481: (*ctx->ops->getremoteordered)(ctx,send,n,starts,indices,procs,bs);
482: } else {
483: vs = (VecScatter_MPI_General*)(send ? ctx->todata : ctx->fromdata);
484: par = (vs->format == VEC_SCATTER_MPI_GENERAL)? PETSC_TRUE : PETSC_FALSE;
485: if (n) *n = par ? vs->n : 0;
486: if (indices) *indices = par ? vs->indices : NULL;
487: if (starts) *starts = par ? vs->starts : NULL;
488: if (procs) *procs = par ? vs->procs : NULL;
489: if (bs) *bs = par ? vs->bs : 0;
490: }
491: #if defined(PETSC_USE_DEBUG)
492: if (n && procs) {
493: PetscInt i;
494: /* from back to front to also handle cases *n=0 */
495: for (i=*n-1; i>0; i--) { if ((*procs)[i-1] > (*procs)[i]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"procs[] are not ordered"); }
496: }
497: #endif
498: return(0);
499: }
501: /* Given a parallel VecScatter context, restore the plan returned by VecScatterGetRemote_Private. This gives a chance for
502: an implementation to free memory allocated in the VecScatterGetRemote_Private call.
504: Input Parameters:
505: + ctx - the context
506: - send - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)
508: Output parameters:
509: + n - number of remote processors
510: . starts - starting point in indices for each proc
511: . indices - indices of entries to send/recv
512: . procs - ranks of remote processors
513: - bs - block size
515: .seealso: VecScatterGetRemote_Private()
516: */
517: PetscErrorCode VecScatterRestoreRemote_Private(VecScatter ctx,PetscBool send,PetscInt *n,const PetscInt **starts,const PetscInt **indices,const PetscMPIInt **procs,PetscInt *bs)518: {
519: PetscErrorCode ierr;
522: if (ctx->ops->restoreremote) {
523: (*ctx->ops->restoreremote)(ctx,send,n,starts,indices,procs,bs);
524: } else {
525: if (starts) *starts = NULL;
526: if (indices) *indices = NULL;
527: if (procs) *procs = NULL;
528: }
529: return(0);
530: }
532: /* Given a parallel VecScatter context, restore the plan returned by VecScatterGetRemoteOrdered_Private. This gives a chance for
533: an implementation to free memory allocated in the VecScatterGetRemoteOrdered_Private call.
535: Input Parameters:
536: + ctx - the context
537: - send - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)
539: Output parameters:
540: + n - number of remote processors
541: . starts - starting point in indices for each proc
542: . indices - indices of entries to send/recv
543: . procs - ranks of remote processors
544: - bs - block size
546: .seealso: VecScatterGetRemoteOrdered_Private()
547: */
548: PetscErrorCode VecScatterRestoreRemoteOrdered_Private(VecScatter ctx,PetscBool send,PetscInt *n,const PetscInt **starts,const PetscInt **indices,const PetscMPIInt **procs,PetscInt *bs)549: {
552: if (ctx->ops->restoreremoteordered) {
553: (*ctx->ops->restoreremoteordered)(ctx,send,n,starts,indices,procs,bs);
554: } else {
555: VecScatterRestoreRemote_Private(ctx,send,n,starts,indices,procs,bs);
556: }
557: return(0);
558: }
560: #if defined(PETSC_HAVE_CUDA)
562: /*@C
563: VecScatterInitializeForGPU - Initializes a generalized scatter from one vector
564: to another for GPU based computation.
566: Input Parameters:
567: + inctx - scatter context generated by VecScatterCreate()
568: - x - the vector from which we scatter
570: Level: intermediate
572: Notes:
573: Effectively, this function creates all the necessary indexing buffers and work
574: vectors needed to move data only those data points in a vector which need to
575: be communicated across ranks. This is done at the first time this function is
576: called. Currently, this only used in the context of the parallel SpMV call in
577: MatMult_MPIAIJCUSPARSE.
579: This function is executed before the call to MatMult. This enables the memory
580: transfers to be overlapped with the MatMult SpMV kernel call.
582: .seealso: VecScatterFinalizeForGPU(), VecScatterCreate(), VecScatterEnd()
583: @*/
584: PETSC_EXTERN PetscErrorCodeVecScatterInitializeForGPU(VecScatter inctx,Vec x)585: {
587: PetscInt i,nrecvs,nsends,sbs,rbs,ns,nr;
588: const PetscInt *sstarts,*rstarts,*sindices,*rindices;
591: VecScatterGetRemote_Private(inctx,PETSC_TRUE/*send*/, &nsends,&sstarts,&sindices,NULL/*procs*/,&sbs);
592: VecScatterGetRemote_Private(inctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,&rindices,NULL/*procs*/,&rbs);
593: ns = nsends ? sstarts[nsends]-sstarts[0] : 0; /* s/rstarts[0] is not necessarily zero */
594: nr = nrecvs ? rstarts[nrecvs]-rstarts[0] : 0;
596: if (x->valid_GPU_array != PETSC_OFFLOAD_UNALLOCATED && (nsends>0 || nrecvs>0)) {
597: if (!inctx->spptr) {
598: PetscInt k,*tindicesSends,*sindicesSends,*tindicesRecvs,*sindicesRecvs;
599: /* Here we create indices for both the senders and receivers. */
600: PetscMalloc1(ns,&tindicesSends);
601: PetscMalloc1(nr,&tindicesRecvs);
603: /* s/rindices and s/rstarts could be NULL when ns or nr is zero */
604: if (ns) {PetscMemcpy(tindicesSends,&sindices[sstarts[0]],ns*sizeof(PetscInt));}
605: if (nr) {PetscMemcpy(tindicesRecvs,&rindices[rstarts[0]],nr*sizeof(PetscInt));}
607: PetscSortRemoveDupsInt(&ns,tindicesSends);
608: PetscSortRemoveDupsInt(&nr,tindicesRecvs);
610: PetscMalloc1(sbs*ns,&sindicesSends);
611: PetscMalloc1(rbs*nr,&sindicesRecvs);
613: /* sender indices */
614: for (i=0; i<ns; i++) {
615: for (k=0; k<sbs; k++) sindicesSends[i*sbs+k] = tindicesSends[i]+k;
616: }
617: PetscFree(tindicesSends);
619: /* receiver indices */
620: for (i=0; i<nr; i++) {
621: for (k=0; k<rbs; k++) sindicesRecvs[i*rbs+k] = tindicesRecvs[i]+k;
622: }
623: PetscFree(tindicesRecvs);
625: /* create GPU indices, work vectors, ... */
626: VecScatterCUDAIndicesCreate_PtoP(ns*sbs,sindicesSends,nr*rbs,sindicesRecvs,(PetscCUDAIndices*)&inctx->spptr);
627: PetscFree(sindicesSends);
628: PetscFree(sindicesRecvs);
629: }
630: }
631: return(0);
632: }
634: /*@C
635: VecScatterFinalizeForGPU - Finalizes a generalized scatter from one vector to
636: another for GPU based computation.
638: Input Parameter:
639: + inctx - scatter context generated by VecScatterCreate()
641: Level: intermediate
643: Notes:
644: Effectively, this function resets the temporary buffer flags. Currently, this
645: only used in the context of the parallel SpMV call in in MatMult_MPIAIJCUDA
646: or MatMult_MPIAIJCUDAARSE. Once the MatMultAdd is finished, the GPU temporary
647: buffers used for messaging are no longer valid.
649: .seealso: VecScatterInitializeForGPU(), VecScatterCreate(), VecScatterEnd()
650: @*/
651: PETSC_EXTERN PetscErrorCodeVecScatterFinalizeForGPU(VecScatter inctx)652: {
654: return(0);
655: }
657: #endif