petsc-3.7.7 2017-09-25
  1: /*
  2:  Implementation of the sequential cuda vectors.

  4:  This file contains the code that can be compiled with a C
  5:  compiler.  The companion file veccuda2.cu contains the code that
  6:  must be compiled with nvcc or a C++ compiler.
  7:  */

  9: #define PETSC_SKIP_SPINLOCK

 11: #include <petscconf.h>
 12: #include <petsc/private/vecimpl.h>          /*I <petscvec.h> I*/
 13: #include <../src/vec/vec/impls/dvecimpl.h>
 14: #include <../src/vec/vec/impls/seq/seqcuda/cudavecimpl.h>

 18: /*
 19:     Allocates space for the vector array on the Host if it does not exist.
 20:     Does NOT change the PetscCUDAFlag for the vector
 21:     Does NOT zero the CUDA array
 22:  */
 23: PetscErrorCode VecCUDAAllocateCheckHost(Vec v)
 24: {
 26:   PetscScalar    *array;
 27:   Vec_Seq        *s = (Vec_Seq*)v->data;
 28:   PetscInt       n = v->map->n;

 31:   if (!s) {
 32:     PetscNewLog((PetscObject)v,&s);
 33:     v->data = s;
 34:   }
 35:   if (!s->array) {
 36:     PetscMalloc1(n,&array);
 37:     PetscLogObjectMemory((PetscObject)v,n*sizeof(PetscScalar));
 38:     s->array           = array;
 39:     s->array_allocated = array;
 40:     if (v->valid_GPU_array == PETSC_CUDA_UNALLOCATED) {
 41:       v->valid_GPU_array = PETSC_CUDA_CPU;
 42:     }
 43:   }
 44:   return(0);
 45: }

 49: PetscErrorCode VecCopy_SeqCUDA_Private(Vec xin,Vec yin)
 50: {
 51:   PetscScalar       *ya;
 52:   const PetscScalar *xa;
 53:   PetscErrorCode    ierr;

 56:   VecCUDAAllocateCheckHost(xin);
 57:   VecCUDAAllocateCheckHost(yin);
 58:   if (xin != yin) {
 59:     VecGetArrayRead(xin,&xa);
 60:     VecGetArray(yin,&ya);
 61:     PetscMemcpy(ya,xa,xin->map->n*sizeof(PetscScalar));
 62:     VecRestoreArrayRead(xin,&xa);
 63:     VecRestoreArray(yin,&ya);
 64:   }
 65:   return(0);
 66: }

 70: PetscErrorCode VecSetRandom_SeqCUDA_Private(Vec xin,PetscRandom r)
 71: {
 73:   PetscInt       n = xin->map->n,i;
 74:   PetscScalar    *xx;

 77:   VecGetArray(xin,&xx);
 78:   for (i=0; i<n; i++) { PetscRandomGetValue(r,&xx[i]); }
 79:   VecRestoreArray(xin,&xx);
 80:   return(0);
 81: }

 85: PetscErrorCode VecDestroy_SeqCUDA_Private(Vec v)
 86: {
 87:   Vec_Seq        *vs = (Vec_Seq*)v->data;

 91:   PetscObjectSAWsViewOff(v);
 92: #if defined(PETSC_USE_LOG)
 93:   PetscLogObjectState((PetscObject)v,"Length=%D",v->map->n);
 94: #endif
 95:   if (vs) {
 96:     if (vs->array_allocated) { PetscFree(vs->array_allocated); }
 97:     PetscFree(vs);
 98:   }
 99:   return(0);
100: }

104: PetscErrorCode VecResetArray_SeqCUDA_Private(Vec vin)
105: {
106:   Vec_Seq *v = (Vec_Seq*)vin->data;

109:   v->array         = v->unplacedarray;
110:   v->unplacedarray = 0;
111:   return(0);
112: }

116: PetscErrorCode VecCUDAAllocateCheck_Public(Vec v)
117: {

121:   VecCUDAAllocateCheck(v);
122:   return(0);
123: }

127: PetscErrorCode VecCUDACopyToGPU_Public(Vec v)
128: {

132:   VecCUDACopyToGPU(v);
133:   return(0);
134: }

138: /*
139:     VecCUDACopyToGPUSome_Public - Copies certain entries down to the GPU from the CPU of a vector

141:    Input Parameters:
142: .    v - the vector
143: .    indices - the requested indices, this should be created with CUDAIndicesCreate()

145: */
146: PetscErrorCode VecCUDACopyToGPUSome_Public(Vec v,PetscCUDAIndices ci)
147: {

151:   VecCUDACopyToGPUSome(v,ci);
152:   return(0);
153: }

157: /*
158:   VecCUDACopyFromGPUSome_Public - Copies certain entries up to the CPU from the GPU of a vector

160:   Input Parameters:
161:  +    v - the vector
162:  -    indices - the requested indices, this should be created with CUDAIndicesCreate()
163: */
164: PetscErrorCode VecCUDACopyFromGPUSome_Public(Vec v,PetscCUDAIndices ci)
165: {

169:   VecCUDACopyFromGPUSome(v,ci);
170:   return(0);
171: }

175: PetscErrorCode VecSetRandom_SeqCUDA(Vec xin,PetscRandom r)
176: {

180:   VecSetRandom_SeqCUDA_Private(xin,r);
181:   xin->valid_GPU_array = PETSC_CUDA_CPU;
182:   return(0);
183: }

187: PetscErrorCode VecResetArray_SeqCUDA(Vec vin)
188: {

192:   VecCUDACopyFromGPU(vin);
193:   VecResetArray_SeqCUDA_Private(vin);
194:   vin->valid_GPU_array = PETSC_CUDA_CPU;
195:   return(0);
196: }

200: PetscErrorCode VecPlaceArray_SeqCUDA(Vec vin,const PetscScalar *a)
201: {

205:   VecCUDACopyFromGPU(vin);
206:   VecPlaceArray_Seq(vin,a);
207:   vin->valid_GPU_array = PETSC_CUDA_CPU;
208:   return(0);
209: }

213: PetscErrorCode VecReplaceArray_SeqCUDA(Vec vin,const PetscScalar *a)
214: {

218:   VecCUDACopyFromGPU(vin);
219:   VecReplaceArray_Seq(vin,a);
220:   vin->valid_GPU_array = PETSC_CUDA_CPU;
221:   return(0);
222: }

226: /*@
227:  VecCreateSeqCUDA - Creates a standard, sequential array-style vector.

229:  Collective on MPI_Comm

231:  Input Parameter:
232:  .  comm - the communicator, should be PETSC_COMM_SELF
233:  .  n - the vector length

235:  Output Parameter:
236:  .  V - the vector

238:  Notes:
239:  Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
240:  same type as an existing vector.

242:  Level: intermediate

244:  Concepts: vectors^creating sequential

246:  .seealso: VecCreateMPI(), VecCreate(), VecDuplicate(), VecDuplicateVecs(), VecCreateGhost()
247:  @*/
248: PetscErrorCode VecCreateSeqCUDA(MPI_Comm comm,PetscInt n,Vec *v)
249: {

253:   VecCreate(comm,v);
254:   VecSetSizes(*v,n,n);
255:   VecSetType(*v,VECSEQCUDA);
256:   return(0);
257: }

261: PetscErrorCode VecDuplicate_SeqCUDA(Vec win,Vec *V)
262: {

266:   VecCreateSeqCUDA(PetscObjectComm((PetscObject)win),win->map->n,V);
267:   PetscLayoutReference(win->map,&(*V)->map);
268:   PetscObjectListDuplicate(((PetscObject)win)->olist,&((PetscObject)(*V))->olist);
269:   PetscFunctionListDuplicate(((PetscObject)win)->qlist,&((PetscObject)(*V))->qlist);
270:   (*V)->stash.ignorenegidx = win->stash.ignorenegidx;
271:   return(0);
272: }

276: PETSC_EXTERN PetscErrorCode VecCreate_SeqCUDA(Vec V)
277: {
279:   PetscMPIInt    size;

282:   MPI_Comm_size(PetscObjectComm((PetscObject)V),&size);
283:   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQCUDA on more than one process");
284:   VecCreate_Seq_Private(V,0);
285:   PetscObjectChangeTypeName((PetscObject)V,VECSEQCUDA);

287:   V->ops->dot                    = VecDot_SeqCUDA;
288:   V->ops->norm                   = VecNorm_SeqCUDA;
289:   V->ops->tdot                   = VecTDot_SeqCUDA;
290:   V->ops->scale                  = VecScale_SeqCUDA;
291:   V->ops->copy                   = VecCopy_SeqCUDA;
292:   V->ops->set                    = VecSet_SeqCUDA;
293:   V->ops->swap                   = VecSwap_SeqCUDA;
294:   V->ops->axpy                   = VecAXPY_SeqCUDA;
295:   V->ops->axpby                  = VecAXPBY_SeqCUDA;
296:   V->ops->axpbypcz               = VecAXPBYPCZ_SeqCUDA;
297:   V->ops->pointwisemult          = VecPointwiseMult_SeqCUDA;
298:   V->ops->pointwisedivide        = VecPointwiseDivide_SeqCUDA;
299:   V->ops->setrandom              = VecSetRandom_SeqCUDA;
300:   V->ops->dot_local              = VecDot_SeqCUDA;
301:   V->ops->tdot_local             = VecTDot_SeqCUDA;
302:   V->ops->norm_local             = VecNorm_SeqCUDA;
303:   V->ops->mdot_local             = VecMDot_SeqCUDA;
304:   V->ops->maxpy                  = VecMAXPY_SeqCUDA;
305:   V->ops->mdot                   = VecMDot_SeqCUDA;
306:   V->ops->aypx                   = VecAYPX_SeqCUDA;
307:   V->ops->waxpy                  = VecWAXPY_SeqCUDA;
308:   V->ops->dotnorm2               = VecDotNorm2_SeqCUDA;
309:   V->ops->placearray             = VecPlaceArray_SeqCUDA;
310:   V->ops->replacearray           = VecReplaceArray_SeqCUDA;
311:   V->ops->resetarray             = VecResetArray_SeqCUDA;
312:   V->ops->destroy                = VecDestroy_SeqCUDA;
313:   V->ops->duplicate              = VecDuplicate_SeqCUDA;
314:   V->ops->conjugate              = VecConjugate_SeqCUDA;
315:   V->ops->getlocalvector         = VecGetLocalVector_SeqCUDA;
316:   V->ops->restorelocalvector     = VecRestoreLocalVector_SeqCUDA;
317:   V->ops->getlocalvectorread     = VecGetLocalVector_SeqCUDA;
318:   V->ops->restorelocalvectorread = VecRestoreLocalVector_SeqCUDA;

320:   VecCUDAAllocateCheck(V);
321:   V->valid_GPU_array = PETSC_CUDA_GPU;
322:   VecSet(V,0.0);
323:   return(0);
324: }