Actual source code: veccuda.c
petsc-3.7.7 2017-09-25
1: /*
2: Implementation of the sequential cuda vectors.
4: This file contains the code that can be compiled with a C
5: compiler. The companion file veccuda2.cu contains the code that
6: must be compiled with nvcc or a C++ compiler.
7: */
9: #define PETSC_SKIP_SPINLOCK
11: #include <petscconf.h>
12: #include <petsc/private/vecimpl.h> /*I <petscvec.h> I*/
13: #include <../src/vec/vec/impls/dvecimpl.h>
14: #include <../src/vec/vec/impls/seq/seqcuda/cudavecimpl.h>
18: /*
19: Allocates space for the vector array on the Host if it does not exist.
20: Does NOT change the PetscCUDAFlag for the vector
21: Does NOT zero the CUDA array
22: */
23: PetscErrorCode VecCUDAAllocateCheckHost(Vec v)
24: {
26: PetscScalar *array;
27: Vec_Seq *s = (Vec_Seq*)v->data;
28: PetscInt n = v->map->n;
31: if (!s) {
32: PetscNewLog((PetscObject)v,&s);
33: v->data = s;
34: }
35: if (!s->array) {
36: PetscMalloc1(n,&array);
37: PetscLogObjectMemory((PetscObject)v,n*sizeof(PetscScalar));
38: s->array = array;
39: s->array_allocated = array;
40: if (v->valid_GPU_array == PETSC_CUDA_UNALLOCATED) {
41: v->valid_GPU_array = PETSC_CUDA_CPU;
42: }
43: }
44: return(0);
45: }
49: PetscErrorCode VecCopy_SeqCUDA_Private(Vec xin,Vec yin)
50: {
51: PetscScalar *ya;
52: const PetscScalar *xa;
53: PetscErrorCode ierr;
56: VecCUDAAllocateCheckHost(xin);
57: VecCUDAAllocateCheckHost(yin);
58: if (xin != yin) {
59: VecGetArrayRead(xin,&xa);
60: VecGetArray(yin,&ya);
61: PetscMemcpy(ya,xa,xin->map->n*sizeof(PetscScalar));
62: VecRestoreArrayRead(xin,&xa);
63: VecRestoreArray(yin,&ya);
64: }
65: return(0);
66: }
70: PetscErrorCode VecSetRandom_SeqCUDA_Private(Vec xin,PetscRandom r)
71: {
73: PetscInt n = xin->map->n,i;
74: PetscScalar *xx;
77: VecGetArray(xin,&xx);
78: for (i=0; i<n; i++) { PetscRandomGetValue(r,&xx[i]); }
79: VecRestoreArray(xin,&xx);
80: return(0);
81: }
85: PetscErrorCode VecDestroy_SeqCUDA_Private(Vec v)
86: {
87: Vec_Seq *vs = (Vec_Seq*)v->data;
91: PetscObjectSAWsViewOff(v);
92: #if defined(PETSC_USE_LOG)
93: PetscLogObjectState((PetscObject)v,"Length=%D",v->map->n);
94: #endif
95: if (vs) {
96: if (vs->array_allocated) { PetscFree(vs->array_allocated); }
97: PetscFree(vs);
98: }
99: return(0);
100: }
104: PetscErrorCode VecResetArray_SeqCUDA_Private(Vec vin)
105: {
106: Vec_Seq *v = (Vec_Seq*)vin->data;
109: v->array = v->unplacedarray;
110: v->unplacedarray = 0;
111: return(0);
112: }
116: PetscErrorCode VecCUDAAllocateCheck_Public(Vec v)
117: {
121: VecCUDAAllocateCheck(v);
122: return(0);
123: }
127: PetscErrorCode VecCUDACopyToGPU_Public(Vec v)
128: {
132: VecCUDACopyToGPU(v);
133: return(0);
134: }
138: /*
139: VecCUDACopyToGPUSome_Public - Copies certain entries down to the GPU from the CPU of a vector
141: Input Parameters:
142: . v - the vector
143: . indices - the requested indices, this should be created with CUDAIndicesCreate()
145: */
146: PetscErrorCode VecCUDACopyToGPUSome_Public(Vec v,PetscCUDAIndices ci)
147: {
151: VecCUDACopyToGPUSome(v,ci);
152: return(0);
153: }
157: /*
158: VecCUDACopyFromGPUSome_Public - Copies certain entries up to the CPU from the GPU of a vector
160: Input Parameters:
161: + v - the vector
162: - indices - the requested indices, this should be created with CUDAIndicesCreate()
163: */
164: PetscErrorCode VecCUDACopyFromGPUSome_Public(Vec v,PetscCUDAIndices ci)
165: {
169: VecCUDACopyFromGPUSome(v,ci);
170: return(0);
171: }
175: PetscErrorCode VecSetRandom_SeqCUDA(Vec xin,PetscRandom r)
176: {
180: VecSetRandom_SeqCUDA_Private(xin,r);
181: xin->valid_GPU_array = PETSC_CUDA_CPU;
182: return(0);
183: }
187: PetscErrorCode VecResetArray_SeqCUDA(Vec vin)
188: {
192: VecCUDACopyFromGPU(vin);
193: VecResetArray_SeqCUDA_Private(vin);
194: vin->valid_GPU_array = PETSC_CUDA_CPU;
195: return(0);
196: }
200: PetscErrorCode VecPlaceArray_SeqCUDA(Vec vin,const PetscScalar *a)
201: {
205: VecCUDACopyFromGPU(vin);
206: VecPlaceArray_Seq(vin,a);
207: vin->valid_GPU_array = PETSC_CUDA_CPU;
208: return(0);
209: }
213: PetscErrorCode VecReplaceArray_SeqCUDA(Vec vin,const PetscScalar *a)
214: {
218: VecCUDACopyFromGPU(vin);
219: VecReplaceArray_Seq(vin,a);
220: vin->valid_GPU_array = PETSC_CUDA_CPU;
221: return(0);
222: }
226: /*@
227: VecCreateSeqCUDA - Creates a standard, sequential array-style vector.
229: Collective on MPI_Comm
231: Input Parameter:
232: . comm - the communicator, should be PETSC_COMM_SELF
233: . n - the vector length
235: Output Parameter:
236: . V - the vector
238: Notes:
239: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
240: same type as an existing vector.
242: Level: intermediate
244: Concepts: vectors^creating sequential
246: .seealso: VecCreateMPI(), VecCreate(), VecDuplicate(), VecDuplicateVecs(), VecCreateGhost()
247: @*/
248: PetscErrorCode VecCreateSeqCUDA(MPI_Comm comm,PetscInt n,Vec *v)
249: {
253: VecCreate(comm,v);
254: VecSetSizes(*v,n,n);
255: VecSetType(*v,VECSEQCUDA);
256: return(0);
257: }
261: PetscErrorCode VecDuplicate_SeqCUDA(Vec win,Vec *V)
262: {
266: VecCreateSeqCUDA(PetscObjectComm((PetscObject)win),win->map->n,V);
267: PetscLayoutReference(win->map,&(*V)->map);
268: PetscObjectListDuplicate(((PetscObject)win)->olist,&((PetscObject)(*V))->olist);
269: PetscFunctionListDuplicate(((PetscObject)win)->qlist,&((PetscObject)(*V))->qlist);
270: (*V)->stash.ignorenegidx = win->stash.ignorenegidx;
271: return(0);
272: }
276: PETSC_EXTERN PetscErrorCode VecCreate_SeqCUDA(Vec V)
277: {
279: PetscMPIInt size;
282: MPI_Comm_size(PetscObjectComm((PetscObject)V),&size);
283: if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQCUDA on more than one process");
284: VecCreate_Seq_Private(V,0);
285: PetscObjectChangeTypeName((PetscObject)V,VECSEQCUDA);
287: V->ops->dot = VecDot_SeqCUDA;
288: V->ops->norm = VecNorm_SeqCUDA;
289: V->ops->tdot = VecTDot_SeqCUDA;
290: V->ops->scale = VecScale_SeqCUDA;
291: V->ops->copy = VecCopy_SeqCUDA;
292: V->ops->set = VecSet_SeqCUDA;
293: V->ops->swap = VecSwap_SeqCUDA;
294: V->ops->axpy = VecAXPY_SeqCUDA;
295: V->ops->axpby = VecAXPBY_SeqCUDA;
296: V->ops->axpbypcz = VecAXPBYPCZ_SeqCUDA;
297: V->ops->pointwisemult = VecPointwiseMult_SeqCUDA;
298: V->ops->pointwisedivide = VecPointwiseDivide_SeqCUDA;
299: V->ops->setrandom = VecSetRandom_SeqCUDA;
300: V->ops->dot_local = VecDot_SeqCUDA;
301: V->ops->tdot_local = VecTDot_SeqCUDA;
302: V->ops->norm_local = VecNorm_SeqCUDA;
303: V->ops->mdot_local = VecMDot_SeqCUDA;
304: V->ops->maxpy = VecMAXPY_SeqCUDA;
305: V->ops->mdot = VecMDot_SeqCUDA;
306: V->ops->aypx = VecAYPX_SeqCUDA;
307: V->ops->waxpy = VecWAXPY_SeqCUDA;
308: V->ops->dotnorm2 = VecDotNorm2_SeqCUDA;
309: V->ops->placearray = VecPlaceArray_SeqCUDA;
310: V->ops->replacearray = VecReplaceArray_SeqCUDA;
311: V->ops->resetarray = VecResetArray_SeqCUDA;
312: V->ops->destroy = VecDestroy_SeqCUDA;
313: V->ops->duplicate = VecDuplicate_SeqCUDA;
314: V->ops->conjugate = VecConjugate_SeqCUDA;
315: V->ops->getlocalvector = VecGetLocalVector_SeqCUDA;
316: V->ops->restorelocalvector = VecRestoreLocalVector_SeqCUDA;
317: V->ops->getlocalvectorread = VecGetLocalVector_SeqCUDA;
318: V->ops->restorelocalvectorread = VecRestoreLocalVector_SeqCUDA;
320: VecCUDAAllocateCheck(V);
321: V->valid_GPU_array = PETSC_CUDA_GPU;
322: VecSet(V,0.0);
323: return(0);
324: }