Actual source code: vecviennacl.cxx
  1: /*
  2:    Implements the sequential ViennaCL vectors.
  3: */

  5: #include <petscconf.h>
  6: #include <petsc/private/vecimpl.h>
  7: #include <../src/vec/vec/impls/dvecimpl.h>
  8: #include <../src/vec/vec/impls/seq/seqviennacl/viennaclvecimpl.h>

 10: #include "viennacl/linalg/inner_prod.hpp"
 11: #include "viennacl/linalg/norm_1.hpp"
 12: #include "viennacl/linalg/norm_2.hpp"
 13: #include "viennacl/linalg/norm_inf.hpp"

 15: #ifdef VIENNACL_WITH_OPENCL
 16: #include "viennacl/ocl/backend.hpp"
 17: #endif

 19: PETSC_EXTERN PetscErrorCode VecViennaCLGetArray(Vec v, ViennaCLVector **a)
 20: {
 22:   *a   = 0;
 23:   VecViennaCLCopyToGPU(v);
 24:   *a   = ((Vec_ViennaCL*)v->spptr)->GPUarray;
 25:   ViennaCLWaitForGPU();
 26:   return 0;
 27: }

 29: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArray(Vec v, ViennaCLVector **a)
 30: {
 32:   v->offloadmask = PETSC_OFFLOAD_GPU;

 34:   PetscObjectStateIncrease((PetscObject)v);
 35:   return 0;
 36: }

 38: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayRead(Vec v, const ViennaCLVector **a)
 39: {
 41:   *a   = 0;
 42:   VecViennaCLCopyToGPU(v);
 43:   *a   = ((Vec_ViennaCL*)v->spptr)->GPUarray;
 44:   ViennaCLWaitForGPU();
 45:   return 0;
 46: }

 48: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayRead(Vec v, const ViennaCLVector **a)
 49: {
 51:   return 0;
 52: }

 54: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayWrite(Vec v, ViennaCLVector **a)
 55: {
 57:   *a   = 0;
 58:   VecViennaCLAllocateCheck(v);
 59:   *a   = ((Vec_ViennaCL*)v->spptr)->GPUarray;
 60:   ViennaCLWaitForGPU();
 61:   return 0;
 62: }

 64: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayWrite(Vec v, ViennaCLVector **a)
 65: {
 67:   v->offloadmask = PETSC_OFFLOAD_GPU;

 69:   PetscObjectStateIncrease((PetscObject)v);
 70:   return 0;
 71: }

 73: PETSC_EXTERN PetscErrorCode PetscViennaCLInit()
 74: {
 75:   char                 string[20];
 76:   PetscBool            flg,flg_cuda,flg_opencl,flg_openmp;

 78:   /* ViennaCL backend selection: CUDA, OpenCL, or OpenMP */
 79:   PetscOptionsGetString(NULL,NULL,"-viennacl_backend",string,sizeof(string),&flg);
 80:   if (flg) {
 81:     try {
 82:       PetscStrcasecmp(string,"cuda",&flg_cuda);
 83:       PetscStrcasecmp(string,"opencl",&flg_opencl);
 84:       PetscStrcasecmp(string,"openmp",&flg_openmp);

 86:       /* A default (sequential) CPU backend is always available - even if OpenMP is not enabled. */
 87:       if (flg_openmp) viennacl::backend::default_memory_type(viennacl::MAIN_MEMORY);
 88: #if defined(PETSC_HAVE_CUDA)
 89:       else if (flg_cuda) viennacl::backend::default_memory_type(viennacl::CUDA_MEMORY);
 90: #endif
 91: #if defined(PETSC_HAVE_OPENCL)
 92:       else if (flg_opencl) viennacl::backend::default_memory_type(viennacl::OPENCL_MEMORY);
 93: #endif
 94:       else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: Backend not recognized or available: %s.\n Pass -viennacl_view to see available backends for ViennaCL.", string);
 95:     } catch (std::exception const & ex) {
 96:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
 97:     }
 98:   }

100:   if (PetscDefined(HAVE_CUDA)) {
101:     /* For CUDA event timers */
102:     PetscDeviceInitialize(PETSC_DEVICE_CUDA);
103:   }

105: #if defined(PETSC_HAVE_OPENCL)
106:   /* ViennaCL OpenCL device type configuration */
107:   PetscOptionsGetString(NULL,NULL,"-viennacl_opencl_device_type",string,sizeof(string),&flg);
108:   if (flg) {
109:     try {
110:       PetscStrcasecmp(string,"cpu",&flg);
111:       if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_CPU);

113:       PetscStrcasecmp(string,"gpu",&flg);
114:       if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_GPU);

116:       PetscStrcasecmp(string,"accelerator",&flg);
117:       if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_ACCELERATOR);
118:     } catch (std::exception const & ex) {
119:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
120:     }
121:   }
122: #endif

124:   /* Print available backends */
125:   PetscOptionsHasName(NULL,NULL,"-viennacl_view",&flg);
126:   if (flg) {
127:     PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backends available: ");
128: #if defined(PETSC_HAVE_CUDA)
129:     PetscPrintf(PETSC_COMM_WORLD, "CUDA, ");
130: #endif
131: #if defined(PETSC_HAVE_OPENCL)
132:     PetscPrintf(PETSC_COMM_WORLD, "OpenCL, ");
133: #endif
134: #if defined(PETSC_HAVE_OPENMP)
135:     PetscPrintf(PETSC_COMM_WORLD, "OpenMP ");
136: #else
137:     PetscPrintf(PETSC_COMM_WORLD, "OpenMP (1 thread) ");
138: #endif
139:     PetscPrintf(PETSC_COMM_WORLD, "\n");

141:     /* Print selected backends */
142:     PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backend  selected: ");
143: #if defined(PETSC_HAVE_CUDA)
144:     if (viennacl::backend::default_memory_type() == viennacl::CUDA_MEMORY) {
145:       PetscPrintf(PETSC_COMM_WORLD, "CUDA ");
146:     }
147: #endif
148: #if defined(PETSC_HAVE_OPENCL)
149:     if (viennacl::backend::default_memory_type() == viennacl::OPENCL_MEMORY) {
150:       PetscPrintf(PETSC_COMM_WORLD, "OpenCL ");
151:     }
152: #endif
153: #if defined(PETSC_HAVE_OPENMP)
154:     if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) {
155:       PetscPrintf(PETSC_COMM_WORLD, "OpenMP ");
156:     }
157: #else
158:     if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) {
159:       PetscPrintf(PETSC_COMM_WORLD, "OpenMP (sequential - consider reconfiguration: --with-openmp=1) ");
160:     }
161: #endif
162:     PetscPrintf(PETSC_COMM_WORLD, "\n");
163:   }
164:   return 0;
165: }

167: /*
168:     Allocates space for the vector array on the Host if it does not exist.
169:     Does NOT change the PetscViennaCLFlag for the vector
170:     Does NOT zero the ViennaCL array
171:  */
172: PETSC_EXTERN PetscErrorCode VecViennaCLAllocateCheckHost(Vec v)
173: {
174:   PetscScalar    *array;
175:   Vec_Seq        *s;
176:   PetscInt       n = v->map->n;

178:   s    = (Vec_Seq*)v->data;
179:   VecViennaCLAllocateCheck(v);
180:   if (s->array == 0) {
181:     PetscMalloc1(n,&array);
182:     PetscLogObjectMemory((PetscObject)v,n*sizeof(PetscScalar));
183:     s->array           = array;
184:     s->array_allocated = array;
185:   }
186:   return 0;
187: }

189: /*
190:     Allocates space for the vector array on the GPU if it does not exist.
191:     Does NOT change the PetscViennaCLFlag for the vector
192:     Does NOT zero the ViennaCL array

194:  */
195: PetscErrorCode VecViennaCLAllocateCheck(Vec v)
196: {
197:   if (!v->spptr) {
198:     try {
199:       v->spptr                            = new Vec_ViennaCL;
200:       ((Vec_ViennaCL*)v->spptr)->GPUarray_allocated = new ViennaCLVector((PetscBLASInt)v->map->n);
201:       ((Vec_ViennaCL*)v->spptr)->GPUarray = ((Vec_ViennaCL*)v->spptr)->GPUarray_allocated;

203:     } catch(std::exception const & ex) {
204:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
205:     }
206:   }
207:   return 0;
208: }

210: /* Copies a vector from the CPU to the GPU unless we already have an up-to-date copy on the GPU */
211: PetscErrorCode VecViennaCLCopyToGPU(Vec v)
212: {
214:   VecViennaCLAllocateCheck(v);
215:   if (v->map->n > 0) {
216:     if (v->offloadmask == PETSC_OFFLOAD_CPU) {
217:       PetscLogEventBegin(VEC_ViennaCLCopyToGPU,v,0,0,0);
218:       try {
219:         ViennaCLVector *vec = ((Vec_ViennaCL*)v->spptr)->GPUarray;
220:         viennacl::fast_copy(*(PetscScalar**)v->data, *(PetscScalar**)v->data + v->map->n, vec->begin());
221:         ViennaCLWaitForGPU();
222:       } catch(std::exception const & ex) {
223:         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
224:       }
225:       PetscLogCpuToGpu((v->map->n)*sizeof(PetscScalar));
226:       PetscLogEventEnd(VEC_ViennaCLCopyToGPU,v,0,0,0);
227:       v->offloadmask = PETSC_OFFLOAD_BOTH;
228:     }
229:   }
230:   return 0;
231: }

233: /*
234:      VecViennaCLCopyFromGPU - Copies a vector from the GPU to the CPU unless we already have an up-to-date copy on the CPU
235: */
236: PetscErrorCode VecViennaCLCopyFromGPU(Vec v)
237: {
239:   VecViennaCLAllocateCheckHost(v);
240:   if (v->offloadmask == PETSC_OFFLOAD_GPU) {
241:     PetscLogEventBegin(VEC_ViennaCLCopyFromGPU,v,0,0,0);
242:     try {
243:       ViennaCLVector *vec = ((Vec_ViennaCL*)v->spptr)->GPUarray;
244:       viennacl::fast_copy(vec->begin(),vec->end(),*(PetscScalar**)v->data);
245:       ViennaCLWaitForGPU();
246:     } catch(std::exception const & ex) {
247:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
248:     }
249:     PetscLogGpuToCpu((v->map->n)*sizeof(PetscScalar));
250:     PetscLogEventEnd(VEC_ViennaCLCopyFromGPU,v,0,0,0);
251:     v->offloadmask = PETSC_OFFLOAD_BOTH;
252:   }
253:   return 0;
254: }

256: /* Copy on CPU */
257: static PetscErrorCode VecCopy_SeqViennaCL_Private(Vec xin,Vec yin)
258: {
259:   PetscScalar       *ya;
260:   const PetscScalar *xa;

262:   VecViennaCLAllocateCheckHost(xin);
263:   VecViennaCLAllocateCheckHost(yin);
264:   if (xin != yin) {
265:     VecGetArrayRead(xin,&xa);
266:     VecGetArray(yin,&ya);
267:     PetscArraycpy(ya,xa,xin->map->n);
268:     VecRestoreArrayRead(xin,&xa);
269:     VecRestoreArray(yin,&ya);
270:   }
271:   return 0;
272: }

274: static PetscErrorCode VecSetRandom_SeqViennaCL_Private(Vec xin,PetscRandom r)
275: {
276:   PetscInt       n = xin->map->n,i;
277:   PetscScalar    *xx;

279:   VecGetArray(xin,&xx);
280:   for (i=0; i<n; i++) PetscRandomGetValue(r,&xx[i]);
281:   VecRestoreArray(xin,&xx);
282:   return 0;
283: }

285: static PetscErrorCode VecDestroy_SeqViennaCL_Private(Vec v)
286: {
287:   Vec_Seq        *vs = (Vec_Seq*)v->data;

289:   PetscObjectSAWsViewOff(v);
290: #if defined(PETSC_USE_LOG)
291:   PetscLogObjectState((PetscObject)v,"Length=%" PetscInt_FMT,v->map->n);
292: #endif
293:   if (vs->array_allocated) PetscFree(vs->array_allocated);
294:   PetscFree(vs);
295:   return 0;
296: }

298: static PetscErrorCode VecResetArray_SeqViennaCL_Private(Vec vin)
299: {
300:   Vec_Seq *v = (Vec_Seq*)vin->data;

302:   v->array         = v->unplacedarray;
303:   v->unplacedarray = 0;
304:   return 0;
305: }

307: /*MC
308:    VECSEQVIENNACL - VECSEQVIENNACL = "seqviennacl" - The basic sequential vector, modified to use ViennaCL

310:    Options Database Keys:
311: . -vec_type seqviennacl - sets the vector type to VECSEQVIENNACL during a call to VecSetFromOptions()

313:   Level: beginner

315: .seealso: VecCreate(), VecSetType(), VecSetFromOptions(), VecCreateSeqWithArray(), VECMPI, VecType, VecCreateMPI(), VecCreateSeq()
316: M*/

318: PetscErrorCode VecAYPX_SeqViennaCL(Vec yin, PetscScalar alpha, Vec xin)
319: {
320:   const ViennaCLVector  *xgpu;
321:   ViennaCLVector        *ygpu;

323:   VecViennaCLGetArrayRead(xin,&xgpu);
324:   VecViennaCLGetArray(yin,&ygpu);
325:   PetscLogGpuTimeBegin();
326:   try {
327:     if (alpha != 0.0 && xin->map->n > 0) {
328:       *ygpu = *xgpu + alpha * *ygpu;
329:       PetscLogGpuFlops(2.0*yin->map->n);
330:     } else {
331:       *ygpu = *xgpu;
332:     }
333:     ViennaCLWaitForGPU();
334:   } catch(std::exception const & ex) {
335:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
336:   }
337:   PetscLogGpuTimeEnd();
338:   VecViennaCLRestoreArrayRead(xin,&xgpu);
339:   VecViennaCLRestoreArray(yin,&ygpu);
340:   return 0;
341: }

343: PetscErrorCode VecAXPY_SeqViennaCL(Vec yin,PetscScalar alpha,Vec xin)
344: {
345:   const ViennaCLVector  *xgpu;
346:   ViennaCLVector        *ygpu;

348:   if (alpha != 0.0 && xin->map->n > 0) {
349:     VecViennaCLGetArrayRead(xin,&xgpu);
350:     VecViennaCLGetArray(yin,&ygpu);
351:     PetscLogGpuTimeBegin();
352:     try {
353:       *ygpu += alpha * *xgpu;
354:       ViennaCLWaitForGPU();
355:     } catch(std::exception const & ex) {
356:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
357:     }
358:     PetscLogGpuTimeEnd();
359:     VecViennaCLRestoreArrayRead(xin,&xgpu);
360:     VecViennaCLRestoreArray(yin,&ygpu);
361:     PetscLogGpuFlops(2.0*yin->map->n);
362:   }
363:   return 0;
364: }

366: PetscErrorCode VecPointwiseDivide_SeqViennaCL(Vec win, Vec xin, Vec yin)
367: {
368:   const ViennaCLVector  *xgpu,*ygpu;
369:   ViennaCLVector        *wgpu;

371:   if (xin->map->n > 0) {
372:     VecViennaCLGetArrayRead(xin,&xgpu);
373:     VecViennaCLGetArrayRead(yin,&ygpu);
374:     VecViennaCLGetArrayWrite(win,&wgpu);
375:     PetscLogGpuTimeBegin();
376:     try {
377:       *wgpu = viennacl::linalg::element_div(*xgpu, *ygpu);
378:       ViennaCLWaitForGPU();
379:     } catch(std::exception const & ex) {
380:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
381:     }
382:     PetscLogGpuTimeEnd();
383:     PetscLogGpuFlops(win->map->n);
384:     VecViennaCLRestoreArrayRead(xin,&xgpu);
385:     VecViennaCLRestoreArrayRead(yin,&ygpu);
386:     VecViennaCLRestoreArrayWrite(win,&wgpu);
387:   }
388:   return 0;
389: }

391: PetscErrorCode VecWAXPY_SeqViennaCL(Vec win,PetscScalar alpha,Vec xin, Vec yin)
392: {
393:   const ViennaCLVector  *xgpu,*ygpu;
394:   ViennaCLVector        *wgpu;

396:   if (alpha == 0.0 && xin->map->n > 0) {
397:     VecCopy_SeqViennaCL(yin,win);
398:   } else {
399:     VecViennaCLGetArrayRead(xin,&xgpu);
400:     VecViennaCLGetArrayRead(yin,&ygpu);
401:     VecViennaCLGetArrayWrite(win,&wgpu);
402:     PetscLogGpuTimeBegin();
403:     if (alpha == 1.0) {
404:       try {
405:         *wgpu = *ygpu + *xgpu;
406:       } catch(std::exception const & ex) {
407:         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
408:       }
409:       PetscLogGpuFlops(win->map->n);
410:     } else if (alpha == -1.0) {
411:       try {
412:         *wgpu = *ygpu - *xgpu;
413:       } catch(std::exception const & ex) {
414:         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
415:       }
416:       PetscLogGpuFlops(win->map->n);
417:     } else {
418:       try {
419:         *wgpu = *ygpu + alpha * *xgpu;
420:       } catch(std::exception const & ex) {
421:         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
422:       }
423:       PetscLogGpuFlops(2*win->map->n);
424:     }
425:     ViennaCLWaitForGPU();
426:     PetscLogGpuTimeEnd();
427:     VecViennaCLRestoreArrayRead(xin,&xgpu);
428:     VecViennaCLRestoreArrayRead(yin,&ygpu);
429:     VecViennaCLRestoreArrayWrite(win,&wgpu);
430:   }
431:   return 0;
432: }

434: /*
435:  * Operation x = x + sum_i alpha_i * y_i for vectors x, y_i and scalars alpha_i
436:  *
437:  * ViennaCL supports a fast evaluation of x += alpha * y and x += alpha * y + beta * z,
438:  * hence there is an iterated application of these until the final result is obtained
439:  */
440: PetscErrorCode VecMAXPY_SeqViennaCL(Vec xin, PetscInt nv,const PetscScalar *alpha,Vec *y)
441: {
442:   PetscInt       j;

444:   for (j = 0; j < nv; ++j) {
445:     if (j+1 < nv) {
446:       VecAXPBYPCZ_SeqViennaCL(xin,alpha[j],alpha[j+1],1.0,y[j],y[j+1]);
447:       ++j;
448:     } else {
449:       VecAXPY_SeqViennaCL(xin,alpha[j],y[j]);
450:     }
451:   }
452:   ViennaCLWaitForGPU();
453:   return 0;
454: }

456: PetscErrorCode VecDot_SeqViennaCL(Vec xin,Vec yin,PetscScalar *z)
457: {
458:   const ViennaCLVector  *xgpu,*ygpu;

460:   if (xin->map->n > 0) {
461:     VecViennaCLGetArrayRead(xin,&xgpu);
462:     VecViennaCLGetArrayRead(yin,&ygpu);
463:     PetscLogGpuTimeBegin();
464:     try {
465:       *z = viennacl::linalg::inner_prod(*xgpu,*ygpu);
466:     } catch(std::exception const & ex) {
467:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
468:     }
469:     ViennaCLWaitForGPU();
470:     PetscLogGpuTimeEnd();
471:     if (xin->map->n >0) {
472:       PetscLogGpuFlops(2.0*xin->map->n-1);
473:     }
474:     VecViennaCLRestoreArrayRead(xin,&xgpu);
475:     VecViennaCLRestoreArrayRead(yin,&ygpu);
476:   } else *z = 0.0;
477:   return 0;
478: }

480: /*
481:  * Operation z[j] = dot(x, y[j])
482:  *
483:  * We use an iterated application of dot() for each j. For small ranges of j this is still faster than an allocation of extra memory in order to use gemv().
484:  */
485: PetscErrorCode VecMDot_SeqViennaCL(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
486: {
487:   PetscInt             n = xin->map->n,i;
488:   const ViennaCLVector *xgpu,*ygpu;
489:   Vec                  *yyin = (Vec*)yin;
490:   std::vector<viennacl::vector_base<PetscScalar> const *> ygpu_array(nv);

492:   if (xin->map->n > 0) {
493:     VecViennaCLGetArrayRead(xin,&xgpu);
494:     for (i=0; i<nv; i++) {
495:       VecViennaCLGetArrayRead(yyin[i],&ygpu);
496:       ygpu_array[i] = ygpu;
497:     }
498:     PetscLogGpuTimeBegin();
499:     viennacl::vector_tuple<PetscScalar> y_tuple(ygpu_array);
500:     ViennaCLVector result = viennacl::linalg::inner_prod(*xgpu, y_tuple);
501:     viennacl::copy(result.begin(), result.end(), z);
502:     for (i=0; i<nv; i++) {
503:       VecViennaCLRestoreArrayRead(yyin[i],&ygpu);
504:     }
505:     ViennaCLWaitForGPU();
506:     PetscLogGpuTimeEnd();
507:     VecViennaCLRestoreArrayRead(xin,&xgpu);
508:     PetscLogGpuFlops(PetscMax(nv*(2.0*n-1),0.0));
509:   } else {
510:     for (i=0; i<nv; i++) z[i] = 0.0;
511:   }
512:   return 0;
513: }

515: PetscErrorCode VecMTDot_SeqViennaCL(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
516: {
517:   /* Since complex case is not supported at the moment, this is the same as VecMDot_SeqViennaCL */
518:   VecMDot_SeqViennaCL(xin,nv,yin,z);
519:   ViennaCLWaitForGPU();
520:   return 0;
521: }

523: PetscErrorCode VecSet_SeqViennaCL(Vec xin,PetscScalar alpha)
524: {
525:   ViennaCLVector *xgpu;

527:   if (xin->map->n > 0) {
528:     VecViennaCLGetArrayWrite(xin,&xgpu);
529:     PetscLogGpuTimeBegin();
530:     try {
531:       *xgpu = viennacl::scalar_vector<PetscScalar>(xgpu->size(), alpha);
532:       ViennaCLWaitForGPU();
533:     } catch(std::exception const & ex) {
534:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
535:     }
536:     PetscLogGpuTimeEnd();
537:     VecViennaCLRestoreArrayWrite(xin,&xgpu);
538:   }
539:   return 0;
540: }

542: PetscErrorCode VecScale_SeqViennaCL(Vec xin, PetscScalar alpha)
543: {
544:   ViennaCLVector *xgpu;

546:   if (alpha == 0.0 && xin->map->n > 0) {
547:     VecSet_SeqViennaCL(xin,alpha);
548:     PetscLogGpuFlops(xin->map->n);
549:   } else if (alpha != 1.0 && xin->map->n > 0) {
550:     VecViennaCLGetArray(xin,&xgpu);
551:     PetscLogGpuTimeBegin();
552:     try {
553:       *xgpu *= alpha;
554:       ViennaCLWaitForGPU();
555:     } catch(std::exception const & ex) {
556:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
557:     }
558:     PetscLogGpuTimeEnd();
559:     VecViennaCLRestoreArray(xin,&xgpu);
560:     PetscLogGpuFlops(xin->map->n);
561:   }
562:   return 0;
563: }

565: PetscErrorCode VecTDot_SeqViennaCL(Vec xin,Vec yin,PetscScalar *z)
566: {
567:   /* Since complex case is not supported at the moment, this is the same as VecDot_SeqViennaCL */
568:   VecDot_SeqViennaCL(xin, yin, z);
569:   ViennaCLWaitForGPU();
570:   return 0;
571: }

573: PetscErrorCode VecCopy_SeqViennaCL(Vec xin,Vec yin)
574: {
575:   const ViennaCLVector *xgpu;
576:   ViennaCLVector       *ygpu;

578:   if (xin != yin && xin->map->n > 0) {
579:     if (xin->offloadmask == PETSC_OFFLOAD_GPU) {
580:       VecViennaCLGetArrayRead(xin,&xgpu);
581:       VecViennaCLGetArrayWrite(yin,&ygpu);
582:       PetscLogGpuTimeBegin();
583:       try {
584:         *ygpu = *xgpu;
585:         ViennaCLWaitForGPU();
586:       } catch(std::exception const & ex) {
587:         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
588:       }
589:       PetscLogGpuTimeEnd();
590:       VecViennaCLRestoreArrayRead(xin,&xgpu);
591:       VecViennaCLRestoreArrayWrite(yin,&ygpu);

593:     } else if (xin->offloadmask == PETSC_OFFLOAD_CPU) {
594:       /* copy in CPU if we are on the CPU*/
595:       VecCopy_SeqViennaCL_Private(xin,yin);
596:       ViennaCLWaitForGPU();
597:     } else if (xin->offloadmask == PETSC_OFFLOAD_BOTH) {
598:       /* if xin is valid in both places, see where yin is and copy there (because it's probably where we'll want to next use it) */
599:       if (yin->offloadmask == PETSC_OFFLOAD_CPU) {
600:         /* copy in CPU */
601:         VecCopy_SeqViennaCL_Private(xin,yin);
602:         ViennaCLWaitForGPU();
603:       } else if (yin->offloadmask == PETSC_OFFLOAD_GPU) {
604:         /* copy in GPU */
605:         VecViennaCLGetArrayRead(xin,&xgpu);
606:         VecViennaCLGetArrayWrite(yin,&ygpu);
607:         PetscLogGpuTimeBegin();
608:         try {
609:           *ygpu = *xgpu;
610:           ViennaCLWaitForGPU();
611:         } catch(std::exception const & ex) {
612:           SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
613:         }
614:         PetscLogGpuTimeEnd();
615:         VecViennaCLRestoreArrayRead(xin,&xgpu);
616:         VecViennaCLRestoreArrayWrite(yin,&ygpu);
617:       } else if (yin->offloadmask == PETSC_OFFLOAD_BOTH) {
618:         /* xin and yin are both valid in both places (or yin was unallocated before the earlier call to allocatecheck
619:            default to copy in GPU (this is an arbitrary choice) */
620:         VecViennaCLGetArrayRead(xin,&xgpu);
621:         VecViennaCLGetArrayWrite(yin,&ygpu);
622:         PetscLogGpuTimeBegin();
623:         try {
624:           *ygpu = *xgpu;
625:           ViennaCLWaitForGPU();
626:         } catch(std::exception const & ex) {
627:           SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
628:         }
629:         PetscLogGpuTimeEnd();
630:         VecViennaCLRestoreArrayRead(xin,&xgpu);
631:         VecViennaCLRestoreArrayWrite(yin,&ygpu);
632:       } else {
633:         VecCopy_SeqViennaCL_Private(xin,yin);
634:         ViennaCLWaitForGPU();
635:       }
636:     }
637:   }
638:   return 0;
639: }

641: PetscErrorCode VecSwap_SeqViennaCL(Vec xin,Vec yin)
642: {
643:   ViennaCLVector *xgpu,*ygpu;

645:   if (xin != yin && xin->map->n > 0) {
646:     VecViennaCLGetArray(xin,&xgpu);
647:     VecViennaCLGetArray(yin,&ygpu);
648:     PetscLogGpuTimeBegin();
649:     try {
650:       viennacl::swap(*xgpu, *ygpu);
651:       ViennaCLWaitForGPU();
652:     } catch(std::exception const & ex) {
653:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
654:     }
655:     PetscLogGpuTimeEnd();
656:     VecViennaCLRestoreArray(xin,&xgpu);
657:     VecViennaCLRestoreArray(yin,&ygpu);
658:   }
659:   return 0;
660: }

662: // y = alpha * x + beta * y
663: PetscErrorCode VecAXPBY_SeqViennaCL(Vec yin,PetscScalar alpha,PetscScalar beta,Vec xin)
664: {
665:   PetscScalar          a = alpha,b = beta;
666:   const ViennaCLVector *xgpu;
667:   ViennaCLVector       *ygpu;

669:   if (a == 0.0 && xin->map->n > 0) {
670:     VecScale_SeqViennaCL(yin,beta);
671:   } else if (b == 1.0 && xin->map->n > 0) {
672:     VecAXPY_SeqViennaCL(yin,alpha,xin);
673:   } else if (a == 1.0 && xin->map->n > 0) {
674:     VecAYPX_SeqViennaCL(yin,beta,xin);
675:   } else if (b == 0.0 && xin->map->n > 0) {
676:     VecViennaCLGetArrayRead(xin,&xgpu);
677:     VecViennaCLGetArray(yin,&ygpu);
678:     PetscLogGpuTimeBegin();
679:     try {
680:       *ygpu = *xgpu * alpha;
681:       ViennaCLWaitForGPU();
682:     } catch(std::exception const & ex) {
683:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
684:     }
685:     PetscLogGpuTimeEnd();
686:     PetscLogGpuFlops(xin->map->n);
687:     VecViennaCLRestoreArrayRead(xin,&xgpu);
688:     VecViennaCLRestoreArray(yin,&ygpu);
689:   } else if (xin->map->n > 0) {
690:     VecViennaCLGetArrayRead(xin,&xgpu);
691:     VecViennaCLGetArray(yin,&ygpu);
692:     PetscLogGpuTimeBegin();
693:     try {
694:       *ygpu = *xgpu * alpha + *ygpu * beta;
695:       ViennaCLWaitForGPU();
696:     } catch(std::exception const & ex) {
697:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
698:     }
699:     PetscLogGpuTimeEnd();
700:     VecViennaCLRestoreArrayRead(xin,&xgpu);
701:     VecViennaCLRestoreArray(yin,&ygpu);
702:     PetscLogGpuFlops(3.0*xin->map->n);
703:   }
704:   return 0;
705: }

707: /* operation  z = alpha * x + beta *y + gamma *z*/
708: PetscErrorCode VecAXPBYPCZ_SeqViennaCL(Vec zin,PetscScalar alpha,PetscScalar beta,PetscScalar gamma,Vec xin,Vec yin)
709: {
710:   PetscInt             n = zin->map->n;
711:   const ViennaCLVector *xgpu,*ygpu;
712:   ViennaCLVector       *zgpu;

714:   VecViennaCLGetArrayRead(xin,&xgpu);
715:   VecViennaCLGetArrayRead(yin,&ygpu);
716:   VecViennaCLGetArray(zin,&zgpu);
717:   if (alpha == 0.0 && xin->map->n > 0) {
718:     PetscLogGpuTimeBegin();
719:     try {
720:       if (beta == 0.0) {
721:         *zgpu = gamma * *zgpu;
722:         ViennaCLWaitForGPU();
723:         PetscLogGpuFlops(1.0*n);
724:       } else if (gamma == 0.0) {
725:         *zgpu = beta * *ygpu;
726:         ViennaCLWaitForGPU();
727:         PetscLogGpuFlops(1.0*n);
728:       } else {
729:         *zgpu = beta * *ygpu + gamma * *zgpu;
730:         ViennaCLWaitForGPU();
731:         PetscLogGpuFlops(3.0*n);
732:       }
733:     } catch(std::exception const & ex) {
734:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
735:     }
736:     PetscLogGpuTimeEnd();
737:     PetscLogGpuFlops(3.0*n);
738:   } else if (beta == 0.0 && xin->map->n > 0) {
739:     PetscLogGpuTimeBegin();
740:     try {
741:       if (gamma == 0.0) {
742:         *zgpu = alpha * *xgpu;
743:         ViennaCLWaitForGPU();
744:         PetscLogGpuFlops(1.0*n);
745:       } else {
746:         *zgpu = alpha * *xgpu + gamma * *zgpu;
747:         ViennaCLWaitForGPU();
748:         PetscLogGpuFlops(3.0*n);
749:       }
750:     } catch(std::exception const & ex) {
751:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
752:     }
753:     PetscLogGpuTimeEnd();
754:   } else if (gamma == 0.0 && xin->map->n > 0) {
755:     PetscLogGpuTimeBegin();
756:     try {
757:       *zgpu = alpha * *xgpu + beta * *ygpu;
758:       ViennaCLWaitForGPU();
759:     } catch(std::exception const & ex) {
760:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
761:     }
762:     PetscLogGpuTimeEnd();
763:     PetscLogGpuFlops(3.0*n);
764:   } else if (xin->map->n > 0) {
765:     PetscLogGpuTimeBegin();
766:     try {
767:       /* Split operation into two steps. This is not completely ideal, but avoids temporaries (which are far worse) */
768:       if (gamma != 1.0)
769:         *zgpu *= gamma;
770:       *zgpu += alpha * *xgpu + beta * *ygpu;
771:       ViennaCLWaitForGPU();
772:     } catch(std::exception const & ex) {
773:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
774:     }
775:     PetscLogGpuTimeEnd();
776:     VecViennaCLRestoreArray(zin,&zgpu);
777:     VecViennaCLRestoreArrayRead(xin,&xgpu);
778:     VecViennaCLRestoreArrayRead(yin,&ygpu);
779:     PetscLogGpuFlops(5.0*n);
780:   }
781:   return 0;
782: }

784: PetscErrorCode VecPointwiseMult_SeqViennaCL(Vec win,Vec xin,Vec yin)
785: {
786:   PetscInt             n = win->map->n;
787:   const ViennaCLVector *xgpu,*ygpu;
788:   ViennaCLVector       *wgpu;

790:   if (xin->map->n > 0) {
791:     VecViennaCLGetArrayRead(xin,&xgpu);
792:     VecViennaCLGetArrayRead(yin,&ygpu);
793:     VecViennaCLGetArray(win,&wgpu);
794:     PetscLogGpuTimeBegin();
795:     try {
796:       *wgpu = viennacl::linalg::element_prod(*xgpu, *ygpu);
797:       ViennaCLWaitForGPU();
798:     } catch(std::exception const & ex) {
799:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
800:     }
801:     PetscLogGpuTimeEnd();
802:     VecViennaCLRestoreArrayRead(xin,&xgpu);
803:     VecViennaCLRestoreArrayRead(yin,&ygpu);
804:     VecViennaCLRestoreArray(win,&wgpu);
805:     PetscLogGpuFlops(n);
806:   }
807:   return 0;
808: }

810: PetscErrorCode VecNorm_SeqViennaCL(Vec xin,NormType type,PetscReal *z)
811: {
812:   PetscInt             n = xin->map->n;
813:   PetscBLASInt         bn;
814:   const ViennaCLVector *xgpu;

816:   if (xin->map->n > 0) {
817:     PetscBLASIntCast(n,&bn);
818:     VecViennaCLGetArrayRead(xin,&xgpu);
819:     if (type == NORM_2 || type == NORM_FROBENIUS) {
820:       PetscLogGpuTimeBegin();
821:       try {
822:         *z = viennacl::linalg::norm_2(*xgpu);
823:         ViennaCLWaitForGPU();
824:       } catch(std::exception const & ex) {
825:         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
826:       }
827:       PetscLogGpuTimeEnd();
828:       PetscLogGpuFlops(PetscMax(2.0*n-1,0.0));
829:     } else if (type == NORM_INFINITY) {
830:       PetscLogGpuTimeBegin();
831:       try {
832:         *z = viennacl::linalg::norm_inf(*xgpu);
833:         ViennaCLWaitForGPU();
834:       } catch(std::exception const & ex) {
835:         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
836:       }
837:       PetscLogGpuTimeEnd();
838:     } else if (type == NORM_1) {
839:       PetscLogGpuTimeBegin();
840:       try {
841:         *z = viennacl::linalg::norm_1(*xgpu);
842:         ViennaCLWaitForGPU();
843:       } catch(std::exception const & ex) {
844:         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
845:       }
846:       PetscLogGpuTimeEnd();
847:       PetscLogGpuFlops(PetscMax(n-1.0,0.0));
848:     } else if (type == NORM_1_AND_2) {
849:       PetscLogGpuTimeBegin();
850:       try {
851:         *z     = viennacl::linalg::norm_1(*xgpu);
852:         *(z+1) = viennacl::linalg::norm_2(*xgpu);
853:         ViennaCLWaitForGPU();
854:       } catch(std::exception const & ex) {
855:         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
856:       }
857:       PetscLogGpuTimeEnd();
858:       PetscLogGpuFlops(PetscMax(2.0*n-1,0.0));
859:       PetscLogGpuFlops(PetscMax(n-1.0,0.0));
860:     }
861:     VecViennaCLRestoreArrayRead(xin,&xgpu);
862:   } else if (type == NORM_1_AND_2) {
863:     *z      = 0.0;
864:     *(z+1)  = 0.0;
865:   } else *z = 0.0;
866:   return 0;
867: }

869: PetscErrorCode VecSetRandom_SeqViennaCL(Vec xin,PetscRandom r)
870: {
871:   VecSetRandom_SeqViennaCL_Private(xin,r);
872:   xin->offloadmask = PETSC_OFFLOAD_CPU;
873:   return 0;
874: }

876: PetscErrorCode VecResetArray_SeqViennaCL(Vec vin)
877: {
879:   VecViennaCLCopyFromGPU(vin);
880:   VecResetArray_SeqViennaCL_Private(vin);
881:   vin->offloadmask = PETSC_OFFLOAD_CPU;
882:   return 0;
883: }

885: PetscErrorCode VecPlaceArray_SeqViennaCL(Vec vin,const PetscScalar *a)
886: {
888:   VecViennaCLCopyFromGPU(vin);
889:   VecPlaceArray_Seq(vin,a);
890:   vin->offloadmask = PETSC_OFFLOAD_CPU;
891:   return 0;
892: }

894: PetscErrorCode VecReplaceArray_SeqViennaCL(Vec vin,const PetscScalar *a)
895: {
897:   VecViennaCLCopyFromGPU(vin);
898:   VecReplaceArray_Seq(vin,a);
899:   vin->offloadmask = PETSC_OFFLOAD_CPU;
900:   return 0;
901: }

903: /*@C
904:    VecCreateSeqViennaCL - Creates a standard, sequential array-style vector.

906:    Collective

908:    Input Parameters:
909: +  comm - the communicator, should be PETSC_COMM_SELF
910: -  n - the vector length

912:    Output Parameter:
913: .  V - the vector

915:    Notes:
916:    Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
917:    same type as an existing vector.

919:    Level: intermediate

921: .seealso: VecCreateMPI(), VecCreate(), VecDuplicate(), VecDuplicateVecs(), VecCreateGhost()
922: @*/
923: PetscErrorCode VecCreateSeqViennaCL(MPI_Comm comm,PetscInt n,Vec *v)
924: {
925:   VecCreate(comm,v);
926:   VecSetSizes(*v,n,n);
927:   VecSetType(*v,VECSEQVIENNACL);
928:   return 0;
929: }

931: /*@C
932:    VecCreateSeqViennaCLWithArray - Creates a viennacl sequential array-style vector,
933:    where the user provides the array space to store the vector values.

935:    Collective

937:    Input Parameters:
938: +  comm - the communicator, should be PETSC_COMM_SELF
939: .  bs - the block size
940: .  n - the vector length
941: -  array - viennacl array where the vector elements are to be stored.

943:    Output Parameter:
944: .  V - the vector

946:    Notes:
947:    Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
948:    same type as an existing vector.

950:    If the user-provided array is NULL, then VecViennaCLPlaceArray() can be used
951:    at a later stage to SET the array for storing the vector values.

953:    PETSc does NOT free the array when the vector is destroyed via VecDestroy().
954:    The user should not free the array until the vector is destroyed.

956:    Level: intermediate

958: .seealso: VecCreateMPIViennaCLWithArray(), VecCreate(), VecDuplicate(), VecDuplicateVecs(),
959:           VecCreateGhost(), VecCreateSeq(), VecCUDAPlaceArray(), VecCreateSeqWithArray(),
960:           VecCreateMPIWithArray()
961: @*/
962: PETSC_EXTERN PetscErrorCode  VecCreateSeqViennaCLWithArray(MPI_Comm comm,PetscInt bs,PetscInt n,const ViennaCLVector* array,Vec *V)
963: {
964:   PetscMPIInt    size;

966:   VecCreate(comm,V);
967:   VecSetSizes(*V,n,n);
968:   VecSetBlockSize(*V,bs);
969:   MPI_Comm_size(comm,&size);
971:   VecCreate_SeqViennaCL_Private(*V,array);
972:   return 0;
973: }

975: /*@C
976:    VecCreateSeqViennaCLWithArrays - Creates a ViennaCL sequential vector, where
977:    the user provides the array space to store the vector values.

979:    Collective

981:    Input Parameters:
982: +  comm - the communicator, should be PETSC_COMM_SELF
983: .  bs - the block size
984: .  n - the vector length
985: -  cpuarray - CPU memory where the vector elements are to be stored.
986: -  viennaclvec - ViennaCL vector where the Vec entries are to be stored on the device.

988:    Output Parameter:
989: .  V - the vector

991:    Notes:
992:    If both cpuarray and viennaclvec are provided, the caller must ensure that
993:    the provided arrays have identical values.

995:    PETSc does NOT free the provided arrays when the vector is destroyed via
996:    VecDestroy(). The user should not free the array until the vector is
997:    destroyed.

999:    Level: intermediate

1001: .seealso: VecCreateMPIViennaCLWithArrays(), VecCreate(), VecCreateSeqWithArray(),
1002:           VecViennaCLPlaceArray(), VecPlaceArray(), VecCreateSeqCUDAWithArrays(),
1003:           VecViennaCLAllocateCheckHost()
1004: @*/
1005: PetscErrorCode  VecCreateSeqViennaCLWithArrays(MPI_Comm comm,PetscInt bs,PetscInt n,const PetscScalar cpuarray[],const ViennaCLVector* viennaclvec,Vec *V)
1006: {
1007:   PetscMPIInt    size;


1010:   MPI_Comm_size(comm,&size);

1013:   // set V's viennaclvec to be viennaclvec, do not allocate memory on host yet.
1014:   VecCreateSeqViennaCLWithArray(comm,bs,n,viennaclvec,V);

1016:   if (cpuarray && viennaclvec) {
1017:     Vec_Seq *s = (Vec_Seq*)((*V)->data);
1018:     s->array = (PetscScalar*)cpuarray;
1019:     (*V)->offloadmask = PETSC_OFFLOAD_BOTH;
1020:   } else if (cpuarray) {
1021:     Vec_Seq *s = (Vec_Seq*)((*V)->data);
1022:     s->array = (PetscScalar*)cpuarray;
1023:     (*V)->offloadmask = PETSC_OFFLOAD_CPU;
1024:   } else if (viennaclvec) {
1025:     (*V)->offloadmask = PETSC_OFFLOAD_GPU;
1026:   } else {
1027:     (*V)->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1028:   }

1030:   return 0;
1031: }

1033: /*@C
1034:    VecViennaCLPlaceArray - Replace the viennacl vector in a Vec with
1035:    the one provided by the user. This is useful to avoid a copy.

1037:    Not Collective

1039:    Input Parameters:
1040: +  vec - the vector
1041: -  array - the ViennaCL vector

1043:    Notes:
1044:    You can return to the original viennacl vector with a call to
1045:    VecViennaCLResetArray() It is not possible to use VecViennaCLPlaceArray()
1046:    and VecPlaceArray() at the same time on the same vector.

1048:    Level: intermediate

1050: .seealso: VecPlaceArray(), VecSetValues(), VecViennaCLResetArray(),
1051:           VecCUDAPlaceArray(),

1053: @*/
1054: PETSC_EXTERN PetscErrorCode VecViennaCLPlaceArray(Vec vin,const ViennaCLVector* a)
1055: {
1057:   VecViennaCLCopyToGPU(vin);
1059:   ((Vec_Seq*)vin->data)->unplacedarray  = (PetscScalar *) ((Vec_ViennaCL*)vin->spptr)->GPUarray; /* save previous GPU array so reset can bring it back */
1060:   ((Vec_ViennaCL*)vin->spptr)->GPUarray = (ViennaCLVector*)a;
1061:   vin->offloadmask = PETSC_OFFLOAD_GPU;
1062:   PetscObjectStateIncrease((PetscObject)vin);
1063:   return 0;
1064: }

1066: /*@C
1067:    VecViennaCLResetArray - Resets a vector to use its default memory. Call this
1068:    after the use of VecViennaCLPlaceArray().

1070:    Not Collective

1072:    Input Parameters:
1073: .  vec - the vector

1075:    Level: developer

1077: .seealso: VecViennaCLPlaceArray(), VecResetArray(), VecCUDAResetArray(), VecPlaceArray()
1078: @*/
1079: PETSC_EXTERN PetscErrorCode VecViennaCLResetArray(Vec vin)
1080: {
1082:   VecViennaCLCopyToGPU(vin);
1083:   ((Vec_ViennaCL*)vin->spptr)->GPUarray = (ViennaCLVector *) ((Vec_Seq*)vin->data)->unplacedarray;
1084:   ((Vec_Seq*)vin->data)->unplacedarray = 0;
1085:   vin->offloadmask = PETSC_OFFLOAD_GPU;
1086:   PetscObjectStateIncrease((PetscObject)vin);
1087:   return 0;
1088: }

1090: /*  VecDotNorm2 - computes the inner product of two vectors and the 2-norm squared of the second vector
1091:  *
1092:  *  Simply reuses VecDot() and VecNorm(). Performance improvement through custom kernel (kernel generator) possible.
1093:  */
1094: PetscErrorCode VecDotNorm2_SeqViennaCL(Vec s, Vec t, PetscScalar *dp, PetscScalar *nm)
1095: {
1096:   VecDot_SeqViennaCL(s,t,dp);
1097:   VecNorm_SeqViennaCL(t,NORM_2,nm);
1098:   *nm *= *nm; //squared norm required
1099:   return 0;
1100: }

1102: PetscErrorCode VecDuplicate_SeqViennaCL(Vec win,Vec *V)
1103: {
1104:   VecCreateSeqViennaCL(PetscObjectComm((PetscObject)win),win->map->n,V);
1105:   PetscLayoutReference(win->map,&(*V)->map);
1106:   PetscObjectListDuplicate(((PetscObject)win)->olist,&((PetscObject)(*V))->olist);
1107:   PetscFunctionListDuplicate(((PetscObject)win)->qlist,&((PetscObject)(*V))->qlist);
1108:   (*V)->stash.ignorenegidx = win->stash.ignorenegidx;
1109:   return 0;
1110: }

1112: PetscErrorCode VecDestroy_SeqViennaCL(Vec v)
1113: {
1114:   try {
1115:     if (v->spptr) {
1116:       delete ((Vec_ViennaCL*)v->spptr)->GPUarray_allocated;
1117:       delete (Vec_ViennaCL*) v->spptr;
1118:     }
1119:   } catch(char *ex) {
1120:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
1121:   }
1122:   VecDestroy_SeqViennaCL_Private(v);
1123:   return 0;
1124: }

1126: PetscErrorCode VecGetArray_SeqViennaCL(Vec v,PetscScalar **a)
1127: {
1128:   if (v->offloadmask == PETSC_OFFLOAD_GPU) {
1129:     VecViennaCLCopyFromGPU(v);
1130:   } else {
1131:     VecViennaCLAllocateCheckHost(v);
1132:   }
1133:   *a = *((PetscScalar**)v->data);
1134:   return 0;
1135: }

1137: PetscErrorCode VecRestoreArray_SeqViennaCL(Vec v,PetscScalar **a)
1138: {
1139:   v->offloadmask = PETSC_OFFLOAD_CPU;
1140:   return 0;
1141: }

1143: PetscErrorCode VecGetArrayWrite_SeqViennaCL(Vec v,PetscScalar **a)
1144: {
1145:   VecViennaCLAllocateCheckHost(v);
1146:   *a   = *((PetscScalar**)v->data);
1147:   return 0;
1148: }

1150: static PetscErrorCode VecBindToCPU_SeqAIJViennaCL(Vec V,PetscBool flg)
1151: {
1152:   V->boundtocpu = flg;
1153:   if (flg) {
1154:     VecViennaCLCopyFromGPU(V);
1155:     V->offloadmask = PETSC_OFFLOAD_CPU; /* since the CPU code will likely change values in the vector */
1156:     V->ops->dot             = VecDot_Seq;
1157:     V->ops->norm            = VecNorm_Seq;
1158:     V->ops->tdot            = VecTDot_Seq;
1159:     V->ops->scale           = VecScale_Seq;
1160:     V->ops->copy            = VecCopy_Seq;
1161:     V->ops->set             = VecSet_Seq;
1162:     V->ops->swap            = VecSwap_Seq;
1163:     V->ops->axpy            = VecAXPY_Seq;
1164:     V->ops->axpby           = VecAXPBY_Seq;
1165:     V->ops->axpbypcz        = VecAXPBYPCZ_Seq;
1166:     V->ops->pointwisemult   = VecPointwiseMult_Seq;
1167:     V->ops->pointwisedivide = VecPointwiseDivide_Seq;
1168:     V->ops->setrandom       = VecSetRandom_Seq;
1169:     V->ops->dot_local       = VecDot_Seq;
1170:     V->ops->tdot_local      = VecTDot_Seq;
1171:     V->ops->norm_local      = VecNorm_Seq;
1172:     V->ops->mdot_local      = VecMDot_Seq;
1173:     V->ops->mtdot_local     = VecMTDot_Seq;
1174:     V->ops->maxpy           = VecMAXPY_Seq;
1175:     V->ops->mdot            = VecMDot_Seq;
1176:     V->ops->mtdot           = VecMTDot_Seq;
1177:     V->ops->aypx            = VecAYPX_Seq;
1178:     V->ops->waxpy           = VecWAXPY_Seq;
1179:     V->ops->dotnorm2        = NULL;
1180:     V->ops->placearray      = VecPlaceArray_Seq;
1181:     V->ops->replacearray    = VecReplaceArray_Seq;
1182:     V->ops->resetarray      = VecResetArray_Seq;
1183:     V->ops->duplicate       = VecDuplicate_Seq;
1184:   } else {
1185:     V->ops->dot             = VecDot_SeqViennaCL;
1186:     V->ops->norm            = VecNorm_SeqViennaCL;
1187:     V->ops->tdot            = VecTDot_SeqViennaCL;
1188:     V->ops->scale           = VecScale_SeqViennaCL;
1189:     V->ops->copy            = VecCopy_SeqViennaCL;
1190:     V->ops->set             = VecSet_SeqViennaCL;
1191:     V->ops->swap            = VecSwap_SeqViennaCL;
1192:     V->ops->axpy            = VecAXPY_SeqViennaCL;
1193:     V->ops->axpby           = VecAXPBY_SeqViennaCL;
1194:     V->ops->axpbypcz        = VecAXPBYPCZ_SeqViennaCL;
1195:     V->ops->pointwisemult   = VecPointwiseMult_SeqViennaCL;
1196:     V->ops->pointwisedivide = VecPointwiseDivide_SeqViennaCL;
1197:     V->ops->setrandom       = VecSetRandom_SeqViennaCL;
1198:     V->ops->dot_local       = VecDot_SeqViennaCL;
1199:     V->ops->tdot_local      = VecTDot_SeqViennaCL;
1200:     V->ops->norm_local      = VecNorm_SeqViennaCL;
1201:     V->ops->mdot_local      = VecMDot_SeqViennaCL;
1202:     V->ops->mtdot_local     = VecMTDot_SeqViennaCL;
1203:     V->ops->maxpy           = VecMAXPY_SeqViennaCL;
1204:     V->ops->mdot            = VecMDot_SeqViennaCL;
1205:     V->ops->mtdot           = VecMTDot_SeqViennaCL;
1206:     V->ops->aypx            = VecAYPX_SeqViennaCL;
1207:     V->ops->waxpy           = VecWAXPY_SeqViennaCL;
1208:     V->ops->dotnorm2        = VecDotNorm2_SeqViennaCL;
1209:     V->ops->placearray      = VecPlaceArray_SeqViennaCL;
1210:     V->ops->replacearray    = VecReplaceArray_SeqViennaCL;
1211:     V->ops->resetarray      = VecResetArray_SeqViennaCL;
1212:     V->ops->destroy         = VecDestroy_SeqViennaCL;
1213:     V->ops->duplicate       = VecDuplicate_SeqViennaCL;
1214:     V->ops->getarraywrite   = VecGetArrayWrite_SeqViennaCL;
1215:     V->ops->getarray        = VecGetArray_SeqViennaCL;
1216:     V->ops->restorearray    = VecRestoreArray_SeqViennaCL;
1217:   }
1218:   return 0;
1219: }

1221: PETSC_EXTERN PetscErrorCode VecCreate_SeqViennaCL(Vec V)
1222: {
1223:   PetscMPIInt    size;

1225:   MPI_Comm_size(PetscObjectComm((PetscObject)V),&size);
1227:   VecCreate_Seq_Private(V,0);
1228:   PetscObjectChangeTypeName((PetscObject)V,VECSEQVIENNACL);

1230:   VecBindToCPU_SeqAIJViennaCL(V,PETSC_FALSE);
1231:   V->ops->bindtocpu = VecBindToCPU_SeqAIJViennaCL;

1233:   VecViennaCLAllocateCheck(V);
1234:   VecSet_SeqViennaCL(V,0.0);
1235:   return 0;
1236: }

1238: /*@C
1239:   VecViennaCLGetCLContext - Get the OpenCL context in which the Vec resides.

1241:   Caller should cast (*ctx) to (const cl_context). Caller is responsible for
1242:   invoking clReleaseContext().

1244:   Input Parameters:
1245: .  v    - the vector

1247:   Output Parameters:
1248: .  ctx - pointer to the underlying CL context

1250:   Level: intermediate

1252: .seealso: VecViennaCLGetCLQueue(), VecViennaCLGetCLMemRead()
1253: @*/
1254: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLContext(Vec v, PETSC_UINTPTR_T* ctx)
1255: {
1256: #if !defined(PETSC_HAVE_OPENCL)
1257:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to get the associated cl_context.");
1258: #else

1261:   const ViennaCLVector *v_vcl;
1262:   VecViennaCLGetArrayRead(v, &v_vcl);
1263:   try{
1264:     viennacl::ocl::context vcl_ctx = v_vcl->handle().opencl_handle().context();
1265:     const cl_context ocl_ctx = vcl_ctx.handle().get();
1266:     clRetainContext(ocl_ctx);
1267:     *ctx = (PETSC_UINTPTR_T)(ocl_ctx);
1268:   } catch (std::exception const & ex) {
1269:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
1270:   }

1272:   return 0;
1273: #endif
1274: }

1276: /*@C
1277:   VecViennaCLGetCLQueue - Get the OpenCL command queue to which all
1278:   operations of the Vec are enqueued.

1280:   Caller should cast (*queue) to (const cl_command_queue). Caller is
1281:   responsible for invoking clReleaseCommandQueue().

1283:   Input Parameters:
1284: .  v    - the vector

1286:   Output Parameters:
1287: .  ctx - pointer to the CL command queue

1289:   Level: intermediate

1291: .seealso: VecViennaCLGetCLContext(), VecViennaCLGetCLMemRead()
1292: @*/
1293: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLQueue(Vec v, PETSC_UINTPTR_T* queue)
1294: {
1295: #if !defined(PETSC_HAVE_OPENCL)
1296:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to get the associated cl_command_queue.");
1297: #else
1299:   const ViennaCLVector *v_vcl;
1300:   VecViennaCLGetArrayRead(v, &v_vcl);
1301:   try{
1302:     viennacl::ocl::context vcl_ctx = v_vcl->handle().opencl_handle().context();
1303:     const viennacl::ocl::command_queue& vcl_queue = vcl_ctx.current_queue();
1304:     const cl_command_queue ocl_queue = vcl_queue.handle().get();
1305:     clRetainCommandQueue(ocl_queue);
1306:     *queue = (PETSC_UINTPTR_T)(ocl_queue);
1307:   } catch (std::exception const & ex) {
1308:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
1309:   }

1311:   return 0;
1312: #endif
1313: }

1315: /*@C
1316:   VecViennaCLGetCLMemRead - Provides access to the the CL buffer inside a Vec.

1318:   Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1319:   invoking clReleaseMemObject().

1321:   Input Parameters:
1322: .  v    - the vector

1324:   Output Parameters:
1325: .  mem - pointer to the device buffer

1327:   Level: intermediate

1329: .seealso: VecViennaCLGetCLContext(), VecViennaCLGetCLMemWrite()
1330: @*/
1331: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMemRead(Vec v, PETSC_UINTPTR_T* mem)
1332: {
1333: #if !defined(PETSC_HAVE_OPENCL)
1334:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1335: #else
1337:   const ViennaCLVector *v_vcl;
1338:   VecViennaCLGetArrayRead(v, &v_vcl);
1339:   try{
1340:     const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1341:     clRetainMemObject(ocl_mem);
1342:     *mem = (PETSC_UINTPTR_T)(ocl_mem);
1343:   } catch (std::exception const & ex) {
1344:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
1345:   }
1346:   return 0;
1347: #endif
1348: }

1350: /*@C
1351:   VecViennaCLGetCLMemWrite - Provides access to the the CL buffer inside a Vec.

1353:   Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1354:   invoking clReleaseMemObject().

1356:   The device pointer has to be released by calling
1357:   VecViennaCLRestoreCLMemWrite().  Upon restoring the vector data the data on
1358:   the host will be marked as out of date.  A subsequent access of the host data
1359:   will thus incur a data transfer from the device to the host.

1361:   Input Parameters:
1362: .  v    - the vector

1364:   Output Parameters:
1365: .  mem - pointer to the device buffer

1367:   Level: intermediate

1369: .seealso: VecViennaCLGetCLContext(), VecViennaCLRestoreCLMemWrite()
1370: @*/
1371: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMemWrite(Vec v, PETSC_UINTPTR_T* mem)
1372: {
1373: #if !defined(PETSC_HAVE_OPENCL)
1374:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1375: #else
1377:   ViennaCLVector *v_vcl;
1378:   VecViennaCLGetArrayWrite(v, &v_vcl);
1379:   try{
1380:     const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1381:     clRetainMemObject(ocl_mem);
1382:     *mem = (PETSC_UINTPTR_T)(ocl_mem);
1383:   } catch (std::exception const & ex) {
1384:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
1385:   }

1387:   return 0;
1388: #endif
1389: }

1391: /*@C
1392:   VecViennaCLRestoreCLMemWrite - Restores a CL buffer pointer previously
1393:   acquired with VecViennaCLGetCLMemWrite().

1395:    This marks the host data as out of date.  Subsequent access to the
1396:    vector data on the host side with for instance VecGetArray() incurs a
1397:    data transfer.

1399:   Input Parameters:
1400: .  v    - the vector

1402:   Level: intermediate

1404: .seealso: VecViennaCLGetCLContext(), VecViennaCLGetCLMemWrite()
1405: @*/
1406: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreCLMemWrite(Vec v)
1407: {
1408: #if !defined(PETSC_HAVE_OPENCL)
1409:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to restore a Vec's cl_mem");
1410: #else
1412:   VecViennaCLRestoreArrayWrite(v, PETSC_NULL);

1414:   return 0;
1415: #endif
1416: }

1418: /*@C
1419:   VecViennaCLGetCLMem - Provides access to the the CL buffer inside a Vec.

1421:   Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1422:   invoking clReleaseMemObject().

1424:   The device pointer has to be released by calling VecViennaCLRestoreCLMem().
1425:   Upon restoring the vector data the data on the host will be marked as out of
1426:   date.  A subsequent access of the host data will thus incur a data transfer
1427:   from the device to the host.

1429:   Input Parameters:
1430: .  v    - the vector

1432:   Output Parameters:
1433: .  mem - pointer to the device buffer

1435:   Level: intermediate

1437: .seealso: VecViennaCLGetCLContext(), VecViennaCLRestoreCLMem()
1438: @*/
1439: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMem(Vec v, PETSC_UINTPTR_T* mem)
1440: {
1441: #if !defined(PETSC_HAVE_OPENCL)
1442:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1443: #else
1445:   ViennaCLVector *v_vcl;
1446:   VecViennaCLGetArray(v, &v_vcl);
1447:   try{
1448:     const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1449:     clRetainMemObject(ocl_mem);
1450:     *mem = (PETSC_UINTPTR_T)(ocl_mem);
1451:   } catch (std::exception const & ex) {
1452:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
1453:   }

1455:   return 0;
1456: #endif
1457: }

1459: /*@C
1460:   VecViennaCLRestoreCLMem - Restores a CL buffer pointer previously
1461:   acquired with VecViennaCLGetCLMem().

1463:    This marks the host data as out of date. Subsequent access to the vector
1464:    data on the host side with for instance VecGetArray() incurs a data
1465:    transfer.

1467:   Input Parameters:
1468: .  v    - the vector

1470:   Level: intermediate

1472: .seealso: VecViennaCLGetCLContext(), VecViennaCLGetCLMem()
1473: @*/
1474: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreCLMem(Vec v)
1475: {
1476: #if !defined(PETSC_HAVE_OPENCL)
1477:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to restore a Vec's cl_mem");
1478: #else
1480:   VecViennaCLRestoreArray(v, PETSC_NULL);

1482:   return 0;
1483: #endif
1484: }

1486: PetscErrorCode VecCreate_SeqViennaCL_Private(Vec V,const ViennaCLVector *array)
1487: {
1488:   Vec_ViennaCL   *vecviennacl;
1489:   PetscMPIInt    size;

1491:   MPI_Comm_size(PetscObjectComm((PetscObject)V),&size);
1493:   VecCreate_Seq_Private(V,0);
1494:   PetscObjectChangeTypeName((PetscObject)V,VECSEQVIENNACL);
1495:   VecBindToCPU_SeqAIJViennaCL(V,PETSC_FALSE);
1496:   V->ops->bindtocpu = VecBindToCPU_SeqAIJViennaCL;

1498:   if (array) {
1499:     if (!V->spptr) V->spptr = new Vec_ViennaCL;
1500:     vecviennacl = (Vec_ViennaCL*)V->spptr;
1501:     vecviennacl->GPUarray_allocated = 0;
1502:     vecviennacl->GPUarray           = (ViennaCLVector*)array;
1503:     V->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1504:   }

1506:   return 0;
1507: }