Actual source code: vecviennacl.cxx
  1: /*
  2:    Implements the sequential ViennaCL vectors.
  3: */

  5: #include <petscconf.h>
  6: #include <petsc/private/vecimpl.h>
  7: #include <../src/vec/vec/impls/dvecimpl.h>
  8: #include <../src/vec/vec/impls/seq/seqviennacl/viennaclvecimpl.h>

 10: #include <vector>

 12: #include "viennacl/linalg/inner_prod.hpp"
 13: #include "viennacl/linalg/norm_1.hpp"
 14: #include "viennacl/linalg/norm_2.hpp"
 15: #include "viennacl/linalg/norm_inf.hpp"

 17: #ifdef VIENNACL_WITH_OPENCL
 18: #include "viennacl/ocl/backend.hpp"
 19: #endif

 21: PETSC_EXTERN PetscErrorCode VecViennaCLGetArray(Vec v, ViennaCLVector **a)
 22: {

 27:   *a   = 0;
 28:   VecViennaCLCopyToGPU(v);
 29:   *a   = ((Vec_ViennaCL*)v->spptr)->GPUarray;
 30:   ViennaCLWaitForGPU();
 31:   return(0);
 32: }

 34: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArray(Vec v, ViennaCLVector **a)
 35: {

 40:   v->offloadmask = PETSC_OFFLOAD_GPU;

 42:   PetscObjectStateIncrease((PetscObject)v);
 43:   return(0);
 44: }

 46: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayRead(Vec v, const ViennaCLVector **a)
 47: {

 52:   *a   = 0;
 53:   VecViennaCLCopyToGPU(v);
 54:   *a   = ((Vec_ViennaCL*)v->spptr)->GPUarray;
 55:   ViennaCLWaitForGPU();
 56:   return(0);
 57: }

 59: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayRead(Vec v, const ViennaCLVector **a)
 60: {
 63:   return(0);
 64: }

 66: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayWrite(Vec v, ViennaCLVector **a)
 67: {

 72:   *a   = 0;
 73:   VecViennaCLAllocateCheck(v);
 74:   *a   = ((Vec_ViennaCL*)v->spptr)->GPUarray;
 75:   ViennaCLWaitForGPU();
 76:   return(0);
 77: }

 79: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayWrite(Vec v, ViennaCLVector **a)
 80: {

 85:   v->offloadmask = PETSC_OFFLOAD_GPU;

 87:   PetscObjectStateIncrease((PetscObject)v);
 88:   return(0);
 89: }

 91: PETSC_EXTERN PetscErrorCode PetscViennaCLInit()
 92: {
 93:   PetscErrorCode       ierr;
 94:   char                 string[20];
 95:   PetscBool            flg,flg_cuda,flg_opencl,flg_openmp;

 98:   /* ViennaCL backend selection: CUDA, OpenCL, or OpenMP */
 99:   PetscOptionsGetString(NULL,NULL,"-viennacl_backend",string,sizeof(string),&flg);
100:   if (flg) {
101:     try {
102:       PetscStrcasecmp(string,"cuda",&flg_cuda);
103:       PetscStrcasecmp(string,"opencl",&flg_opencl);
104:       PetscStrcasecmp(string,"openmp",&flg_openmp);

106:       /* A default (sequential) CPU backend is always available - even if OpenMP is not enabled. */
107:       if (flg_openmp) viennacl::backend::default_memory_type(viennacl::MAIN_MEMORY);
108: #if defined(PETSC_HAVE_CUDA)
109:       else if (flg_cuda) viennacl::backend::default_memory_type(viennacl::CUDA_MEMORY);
110: #endif
111: #if defined(PETSC_HAVE_OPENCL)
112:       else if (flg_opencl) viennacl::backend::default_memory_type(viennacl::OPENCL_MEMORY);
113: #endif
114:       else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: Backend not recognized or available: %s.\n Pass -viennacl_view to see available backends for ViennaCL.\n", string);
115:     } catch (std::exception const & ex) {
116:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
117:     }
118:   }

120: #if defined(PETSC_HAVE_CUDA)
121:   PetscCUDAInitializeCheck(); /* For CUDA event timers */
122: #endif

124: #if defined(PETSC_HAVE_OPENCL)
125:   /* ViennaCL OpenCL device type configuration */
126:   PetscOptionsGetString(NULL,NULL,"-viennacl_opencl_device_type",string,sizeof(string),&flg);
127:   if (flg) {
128:     try {
129:       PetscStrcasecmp(string,"cpu",&flg);
130:       if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_CPU);

132:       PetscStrcasecmp(string,"gpu",&flg);
133:       if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_GPU);

135:       PetscStrcasecmp(string,"accelerator",&flg);
136:       if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_ACCELERATOR);
137:     } catch (std::exception const & ex) {
138:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
139:     }
140:   }
141: #endif

143:   /* Print available backends */
144:   PetscOptionsHasName(NULL,NULL,"-viennacl_view",&flg);
145:   if (flg) {
146:     PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backends available: ");
147: #if defined(PETSC_HAVE_CUDA)
148:     PetscPrintf(PETSC_COMM_WORLD, "CUDA, ");
149: #endif
150: #if defined(PETSC_HAVE_OPENCL)
151:     PetscPrintf(PETSC_COMM_WORLD, "OpenCL, ");
152: #endif
153: #if defined(PETSC_HAVE_OPENMP)
154:     PetscPrintf(PETSC_COMM_WORLD, "OpenMP ");
155: #else
156:     PetscPrintf(PETSC_COMM_WORLD, "OpenMP (1 thread) ");
157: #endif
158:     PetscPrintf(PETSC_COMM_WORLD, "\n");

160:     /* Print selected backends */
161:     PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backend  selected: ");
162: #if defined(PETSC_HAVE_CUDA)
163:     if (viennacl::backend::default_memory_type() == viennacl::CUDA_MEMORY) {
164:       PetscPrintf(PETSC_COMM_WORLD, "CUDA ");
165:     }
166: #endif
167: #if defined(PETSC_HAVE_OPENCL)
168:     if (viennacl::backend::default_memory_type() == viennacl::OPENCL_MEMORY) {
169:       PetscPrintf(PETSC_COMM_WORLD, "OpenCL ");
170:     }
171: #endif
172: #if defined(PETSC_HAVE_OPENMP)
173:     if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) {
174:       PetscPrintf(PETSC_COMM_WORLD, "OpenMP ");
175:     }
176: #else
177:     if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) {
178:       PetscPrintf(PETSC_COMM_WORLD, "OpenMP (sequential - consider reconfiguration: --with-openmp=1) ");
179:     }
180: #endif
181:     PetscPrintf(PETSC_COMM_WORLD, "\n");
182:   }
183:   return(0);
184: }

186: /*
187:     Allocates space for the vector array on the Host if it does not exist.
188:     Does NOT change the PetscViennaCLFlag for the vector
189:     Does NOT zero the ViennaCL array
190:  */
191: PETSC_EXTERN PetscErrorCode VecViennaCLAllocateCheckHost(Vec v)
192: {
194:   PetscScalar    *array;
195:   Vec_Seq        *s;
196:   PetscInt       n = v->map->n;

199:   s    = (Vec_Seq*)v->data;
200:   VecViennaCLAllocateCheck(v);
201:   if (s->array == 0) {
202:     PetscMalloc1(n,&array);
203:     PetscLogObjectMemory((PetscObject)v,n*sizeof(PetscScalar));
204:     s->array           = array;
205:     s->array_allocated = array;
206:   }
207:   return(0);
208: }

210: /*
211:     Allocates space for the vector array on the GPU if it does not exist.
212:     Does NOT change the PetscViennaCLFlag for the vector
213:     Does NOT zero the ViennaCL array

215:  */
216: PetscErrorCode VecViennaCLAllocateCheck(Vec v)
217: {
219:   if (!v->spptr) {
220:     try {
221:       v->spptr                            = new Vec_ViennaCL;
222:       ((Vec_ViennaCL*)v->spptr)->GPUarray_allocated = new ViennaCLVector((PetscBLASInt)v->map->n);
223:       ((Vec_ViennaCL*)v->spptr)->GPUarray = ((Vec_ViennaCL*)v->spptr)->GPUarray_allocated;

225:     } catch(std::exception const & ex) {
226:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
227:     }
228:   }
229:   return(0);
230: }

232: /* Copies a vector from the CPU to the GPU unless we already have an up-to-date copy on the GPU */
233: PetscErrorCode VecViennaCLCopyToGPU(Vec v)
234: {

239:   VecViennaCLAllocateCheck(v);
240:   if (v->map->n > 0) {
241:     if (v->offloadmask == PETSC_OFFLOAD_CPU) {
242:       PetscLogEventBegin(VEC_ViennaCLCopyToGPU,v,0,0,0);
243:       try {
244:         ViennaCLVector *vec = ((Vec_ViennaCL*)v->spptr)->GPUarray;
245:         viennacl::fast_copy(*(PetscScalar**)v->data, *(PetscScalar**)v->data + v->map->n, vec->begin());
246:         ViennaCLWaitForGPU();
247:       } catch(std::exception const & ex) {
248:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
249:       }
250:       PetscLogCpuToGpu((v->map->n)*sizeof(PetscScalar));
251:       PetscLogEventEnd(VEC_ViennaCLCopyToGPU,v,0,0,0);
252:       v->offloadmask = PETSC_OFFLOAD_BOTH;
253:     }
254:   }
255:   return(0);
256: }

258: /*
259:      VecViennaCLCopyFromGPU - Copies a vector from the GPU to the CPU unless we already have an up-to-date copy on the CPU
260: */
261: PetscErrorCode VecViennaCLCopyFromGPU(Vec v)
262: {

267:   VecViennaCLAllocateCheckHost(v);
268:   if (v->offloadmask == PETSC_OFFLOAD_GPU) {
269:     PetscLogEventBegin(VEC_ViennaCLCopyFromGPU,v,0,0,0);
270:     try {
271:       ViennaCLVector *vec = ((Vec_ViennaCL*)v->spptr)->GPUarray;
272:       viennacl::fast_copy(vec->begin(),vec->end(),*(PetscScalar**)v->data);
273:       ViennaCLWaitForGPU();
274:     } catch(std::exception const & ex) {
275:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
276:     }
277:     PetscLogGpuToCpu((v->map->n)*sizeof(PetscScalar));
278:     PetscLogEventEnd(VEC_ViennaCLCopyFromGPU,v,0,0,0);
279:     v->offloadmask = PETSC_OFFLOAD_BOTH;
280:   }
281:   return(0);
282: }

284: /* Copy on CPU */
285: static PetscErrorCode VecCopy_SeqViennaCL_Private(Vec xin,Vec yin)
286: {
287:   PetscScalar       *ya;
288:   const PetscScalar *xa;
289:   PetscErrorCode    ierr;

292:   VecViennaCLAllocateCheckHost(xin);
293:   VecViennaCLAllocateCheckHost(yin);
294:   if (xin != yin) {
295:     VecGetArrayRead(xin,&xa);
296:     VecGetArray(yin,&ya);
297:     PetscArraycpy(ya,xa,xin->map->n);
298:     VecRestoreArrayRead(xin,&xa);
299:     VecRestoreArray(yin,&ya);
300:   }
301:   return(0);
302: }

304: static PetscErrorCode VecSetRandom_SeqViennaCL_Private(Vec xin,PetscRandom r)
305: {
307:   PetscInt       n = xin->map->n,i;
308:   PetscScalar    *xx;

311:   VecGetArray(xin,&xx);
312:   for (i=0; i<n; i++) {PetscRandomGetValue(r,&xx[i]);}
313:   VecRestoreArray(xin,&xx);
314:   return(0);
315: }

317: static PetscErrorCode VecDestroy_SeqViennaCL_Private(Vec v)
318: {
319:   Vec_Seq        *vs = (Vec_Seq*)v->data;

323:   PetscObjectSAWsViewOff(v);
324: #if defined(PETSC_USE_LOG)
325:   PetscLogObjectState((PetscObject)v,"Length=%D",v->map->n);
326: #endif
327:   if (vs->array_allocated) { PetscFree(vs->array_allocated); }
328:   PetscFree(vs);
329:   return(0);
330: }

332: static PetscErrorCode VecResetArray_SeqViennaCL_Private(Vec vin)
333: {
334:   Vec_Seq *v = (Vec_Seq*)vin->data;

337:   v->array         = v->unplacedarray;
338:   v->unplacedarray = 0;
339:   return(0);
340: }

342: /*MC
343:    VECSEQVIENNACL - VECSEQVIENNACL = "seqviennacl" - The basic sequential vector, modified to use ViennaCL

345:    Options Database Keys:
346: . -vec_type seqviennacl - sets the vector type to VECSEQVIENNACL during a call to VecSetFromOptions()

348:   Level: beginner

350: .seealso: VecCreate(), VecSetType(), VecSetFromOptions(), VecCreateSeqWithArray(), VECMPI, VecType, VecCreateMPI(), VecCreateSeq()
351: M*/

353: PetscErrorCode VecAYPX_SeqViennaCL(Vec yin, PetscScalar alpha, Vec xin)
354: {
355:   const ViennaCLVector  *xgpu;
356:   ViennaCLVector        *ygpu;
357:   PetscErrorCode        ierr;

360:   VecViennaCLGetArrayRead(xin,&xgpu);
361:   VecViennaCLGetArray(yin,&ygpu);
362:   PetscLogGpuTimeBegin();
363:   try {
364:     if (alpha != 0.0 && xin->map->n > 0) {
365:       *ygpu = *xgpu + alpha * *ygpu;
366:       PetscLogGpuFlops(2.0*yin->map->n);
367:     } else {
368:       *ygpu = *xgpu;
369:     }
370:     ViennaCLWaitForGPU();
371:   } catch(std::exception const & ex) {
372:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
373:   }
374:   PetscLogGpuTimeEnd();
375:   VecViennaCLRestoreArrayRead(xin,&xgpu);
376:   VecViennaCLRestoreArray(yin,&ygpu);
377:   return(0);
378: }

380: PetscErrorCode VecAXPY_SeqViennaCL(Vec yin,PetscScalar alpha,Vec xin)
381: {
382:   const ViennaCLVector  *xgpu;
383:   ViennaCLVector        *ygpu;
384:   PetscErrorCode        ierr;

387:   if (alpha != 0.0 && xin->map->n > 0) {
388:     VecViennaCLGetArrayRead(xin,&xgpu);
389:     VecViennaCLGetArray(yin,&ygpu);
390:     PetscLogGpuTimeBegin();
391:     try {
392:       *ygpu += alpha * *xgpu;
393:       ViennaCLWaitForGPU();
394:     } catch(std::exception const & ex) {
395:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
396:     }
397:     PetscLogGpuTimeEnd();
398:     VecViennaCLRestoreArrayRead(xin,&xgpu);
399:     VecViennaCLRestoreArray(yin,&ygpu);
400:     PetscLogGpuFlops(2.0*yin->map->n);
401:   }
402:   return(0);
403: }

405: PetscErrorCode VecPointwiseDivide_SeqViennaCL(Vec win, Vec xin, Vec yin)
406: {
407:   const ViennaCLVector  *xgpu,*ygpu;
408:   ViennaCLVector        *wgpu;
409:   PetscErrorCode        ierr;

412:   if (xin->map->n > 0) {
413:     VecViennaCLGetArrayRead(xin,&xgpu);
414:     VecViennaCLGetArrayRead(yin,&ygpu);
415:     VecViennaCLGetArrayWrite(win,&wgpu);
416:     PetscLogGpuTimeBegin();
417:     try {
418:       *wgpu = viennacl::linalg::element_div(*xgpu, *ygpu);
419:       ViennaCLWaitForGPU();
420:     } catch(std::exception const & ex) {
421:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
422:     }
423:     PetscLogGpuTimeEnd();
424:     PetscLogGpuFlops(win->map->n);
425:     VecViennaCLRestoreArrayRead(xin,&xgpu);
426:     VecViennaCLRestoreArrayRead(yin,&ygpu);
427:     VecViennaCLRestoreArrayWrite(win,&wgpu);
428:   }
429:   return(0);
430: }

432: PetscErrorCode VecWAXPY_SeqViennaCL(Vec win,PetscScalar alpha,Vec xin, Vec yin)
433: {
434:   const ViennaCLVector  *xgpu,*ygpu;
435:   ViennaCLVector        *wgpu;
436:   PetscErrorCode        ierr;

439:   if (alpha == 0.0 && xin->map->n > 0) {
440:     VecCopy_SeqViennaCL(yin,win);
441:   } else {
442:     VecViennaCLGetArrayRead(xin,&xgpu);
443:     VecViennaCLGetArrayRead(yin,&ygpu);
444:     VecViennaCLGetArrayWrite(win,&wgpu);
445:     PetscLogGpuTimeBegin();
446:     if (alpha == 1.0) {
447:       try {
448:         *wgpu = *ygpu + *xgpu;
449:       } catch(std::exception const & ex) {
450:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
451:       }
452:       PetscLogGpuFlops(win->map->n);
453:     } else if (alpha == -1.0) {
454:       try {
455:         *wgpu = *ygpu - *xgpu;
456:       } catch(std::exception const & ex) {
457:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
458:       }
459:       PetscLogGpuFlops(win->map->n);
460:     } else {
461:       try {
462:         *wgpu = *ygpu + alpha * *xgpu;
463:       } catch(std::exception const & ex) {
464:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
465:       }
466:       PetscLogGpuFlops(2*win->map->n);
467:     }
468:     ViennaCLWaitForGPU();
469:     PetscLogGpuTimeEnd();
470:     VecViennaCLRestoreArrayRead(xin,&xgpu);
471:     VecViennaCLRestoreArrayRead(yin,&ygpu);
472:     VecViennaCLRestoreArrayWrite(win,&wgpu);
473:   }
474:   return(0);
475: }

477: /*
478:  * Operation x = x + sum_i alpha_i * y_i for vectors x, y_i and scalars alpha_i
479:  *
480:  * ViennaCL supports a fast evaluation of x += alpha * y and x += alpha * y + beta * z,
481:  * hence there is an iterated application of these until the final result is obtained
482:  */
483: PetscErrorCode VecMAXPY_SeqViennaCL(Vec xin, PetscInt nv,const PetscScalar *alpha,Vec *y)
484: {
486:   PetscInt       j;

489:   for (j = 0; j < nv; ++j) {
490:     if (j+1 < nv) {
491:       VecAXPBYPCZ_SeqViennaCL(xin,alpha[j],alpha[j+1],1.0,y[j],y[j+1]);
492:       ++j;
493:     } else {
494:       VecAXPY_SeqViennaCL(xin,alpha[j],y[j]);
495:     }
496:   }
497:   ViennaCLWaitForGPU();
498:   return(0);
499: }

501: PetscErrorCode VecDot_SeqViennaCL(Vec xin,Vec yin,PetscScalar *z)
502: {
503:   const ViennaCLVector  *xgpu,*ygpu;
504:   PetscErrorCode        ierr;

507:   if (xin->map->n > 0) {
508:     VecViennaCLGetArrayRead(xin,&xgpu);
509:     VecViennaCLGetArrayRead(yin,&ygpu);
510:     PetscLogGpuTimeBegin();
511:     try {
512:       *z = viennacl::linalg::inner_prod(*xgpu,*ygpu);
513:     } catch(std::exception const & ex) {
514:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
515:     }
516:     ViennaCLWaitForGPU();
517:     PetscLogGpuTimeEnd();
518:     if (xin->map->n >0) {
519:       PetscLogGpuFlops(2.0*xin->map->n-1);
520:     }
521:     VecViennaCLRestoreArrayRead(xin,&xgpu);
522:     VecViennaCLRestoreArrayRead(yin,&ygpu);
523:   } else *z = 0.0;
524:   return(0);
525: }

527: /*
528:  * Operation z[j] = dot(x, y[j])
529:  *
530:  * We use an iterated application of dot() for each j. For small ranges of j this is still faster than an allocation of extra memory in order to use gemv().
531:  */
532: PetscErrorCode VecMDot_SeqViennaCL(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
533: {
534:   PetscErrorCode       ierr;
535:   PetscInt             n = xin->map->n,i;
536:   const ViennaCLVector *xgpu,*ygpu;
537:   Vec                  *yyin = (Vec*)yin;
538:   std::vector<viennacl::vector_base<PetscScalar> const *> ygpu_array(nv);

541:   if (xin->map->n > 0) {
542:     VecViennaCLGetArrayRead(xin,&xgpu);
543:     for (i=0; i<nv; i++) {
544:       VecViennaCLGetArrayRead(yyin[i],&ygpu);
545:       ygpu_array[i] = ygpu;
546:     }
547:     PetscLogGpuTimeBegin();
548:     viennacl::vector_tuple<PetscScalar> y_tuple(ygpu_array);
549:     ViennaCLVector result = viennacl::linalg::inner_prod(*xgpu, y_tuple);
550:     viennacl::copy(result.begin(), result.end(), z);
551:     for (i=0; i<nv; i++) {
552:       VecViennaCLRestoreArrayRead(yyin[i],&ygpu);
553:     }
554:     ViennaCLWaitForGPU();
555:     PetscLogGpuTimeEnd();
556:     VecViennaCLRestoreArrayRead(xin,&xgpu);
557:     PetscLogGpuFlops(PetscMax(nv*(2.0*n-1),0.0));
558:   } else {
559:     for (i=0; i<nv; i++) z[i] = 0.0;
560:   }
561:   return(0);
562: }

564: PetscErrorCode VecMTDot_SeqViennaCL(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
565: {

569:   /* Since complex case is not supported at the moment, this is the same as VecMDot_SeqViennaCL */
570:   VecMDot_SeqViennaCL(xin,nv,yin,z);
571:   ViennaCLWaitForGPU();
572:   return(0);
573: }

575: PetscErrorCode VecSet_SeqViennaCL(Vec xin,PetscScalar alpha)
576: {
577:   ViennaCLVector *xgpu;

581:   if (xin->map->n > 0) {
582:     VecViennaCLGetArrayWrite(xin,&xgpu);
583:     PetscLogGpuTimeBegin();
584:     try {
585:       *xgpu = viennacl::scalar_vector<PetscScalar>(xgpu->size(), alpha);
586:       ViennaCLWaitForGPU();
587:     } catch(std::exception const & ex) {
588:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
589:     }
590:     PetscLogGpuTimeEnd();
591:     VecViennaCLRestoreArrayWrite(xin,&xgpu);
592:   }
593:   return(0);
594: }

596: PetscErrorCode VecScale_SeqViennaCL(Vec xin, PetscScalar alpha)
597: {
598:   ViennaCLVector *xgpu;

602:   if (alpha == 0.0 && xin->map->n > 0) {
603:     VecSet_SeqViennaCL(xin,alpha);
604:     PetscLogGpuFlops(xin->map->n);
605:   } else if (alpha != 1.0 && xin->map->n > 0) {
606:     VecViennaCLGetArray(xin,&xgpu);
607:     PetscLogGpuTimeBegin();
608:     try {
609:       *xgpu *= alpha;
610:       ViennaCLWaitForGPU();
611:     } catch(std::exception const & ex) {
612:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
613:     }
614:     PetscLogGpuTimeEnd();
615:     VecViennaCLRestoreArray(xin,&xgpu);
616:     PetscLogGpuFlops(xin->map->n);
617:   }
618:   return(0);
619: }

621: PetscErrorCode VecTDot_SeqViennaCL(Vec xin,Vec yin,PetscScalar *z)
622: {

626:   /* Since complex case is not supported at the moment, this is the same as VecDot_SeqViennaCL */
627:   VecDot_SeqViennaCL(xin, yin, z);
628:   ViennaCLWaitForGPU();
629:   return(0);
630: }

632: PetscErrorCode VecCopy_SeqViennaCL(Vec xin,Vec yin)
633: {
634:   const ViennaCLVector *xgpu;
635:   ViennaCLVector       *ygpu;
636:   PetscErrorCode       ierr;

639:   if (xin != yin && xin->map->n > 0) {
640:     if (xin->offloadmask == PETSC_OFFLOAD_GPU) {
641:       VecViennaCLGetArrayRead(xin,&xgpu);
642:       VecViennaCLGetArrayWrite(yin,&ygpu);
643:       PetscLogGpuTimeBegin();
644:       try {
645:         *ygpu = *xgpu;
646:         ViennaCLWaitForGPU();
647:       } catch(std::exception const & ex) {
648:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
649:       }
650:       PetscLogGpuTimeEnd();
651:       VecViennaCLRestoreArrayRead(xin,&xgpu);
652:       VecViennaCLRestoreArrayWrite(yin,&ygpu);

654:     } else if (xin->offloadmask == PETSC_OFFLOAD_CPU) {
655:       /* copy in CPU if we are on the CPU*/
656:       VecCopy_SeqViennaCL_Private(xin,yin);
657:       ViennaCLWaitForGPU();
658:     } else if (xin->offloadmask == PETSC_OFFLOAD_BOTH) {
659:       /* if xin is valid in both places, see where yin is and copy there (because it's probably where we'll want to next use it) */
660:       if (yin->offloadmask == PETSC_OFFLOAD_CPU) {
661:         /* copy in CPU */
662:         VecCopy_SeqViennaCL_Private(xin,yin);
663:         ViennaCLWaitForGPU();
664:       } else if (yin->offloadmask == PETSC_OFFLOAD_GPU) {
665:         /* copy in GPU */
666:         VecViennaCLGetArrayRead(xin,&xgpu);
667:         VecViennaCLGetArrayWrite(yin,&ygpu);
668:         PetscLogGpuTimeBegin();
669:         try {
670:           *ygpu = *xgpu;
671:           ViennaCLWaitForGPU();
672:         } catch(std::exception const & ex) {
673:           SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
674:         }
675:         PetscLogGpuTimeEnd();
676:         VecViennaCLRestoreArrayRead(xin,&xgpu);
677:         VecViennaCLRestoreArrayWrite(yin,&ygpu);
678:       } else if (yin->offloadmask == PETSC_OFFLOAD_BOTH) {
679:         /* xin and yin are both valid in both places (or yin was unallocated before the earlier call to allocatecheck
680:            default to copy in GPU (this is an arbitrary choice) */
681:         VecViennaCLGetArrayRead(xin,&xgpu);
682:         VecViennaCLGetArrayWrite(yin,&ygpu);
683:         PetscLogGpuTimeBegin();
684:         try {
685:           *ygpu = *xgpu;
686:           ViennaCLWaitForGPU();
687:         } catch(std::exception const & ex) {
688:           SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
689:         }
690:         PetscLogGpuTimeEnd();
691:         VecViennaCLRestoreArrayRead(xin,&xgpu);
692:         VecViennaCLRestoreArrayWrite(yin,&ygpu);
693:       } else {
694:         VecCopy_SeqViennaCL_Private(xin,yin);
695:         ViennaCLWaitForGPU();
696:       }
697:     }
698:   }
699:   return(0);
700: }

702: PetscErrorCode VecSwap_SeqViennaCL(Vec xin,Vec yin)
703: {
705:   ViennaCLVector *xgpu,*ygpu;

708:   if (xin != yin && xin->map->n > 0) {
709:     VecViennaCLGetArray(xin,&xgpu);
710:     VecViennaCLGetArray(yin,&ygpu);
711:     PetscLogGpuTimeBegin();
712:     try {
713:       viennacl::swap(*xgpu, *ygpu);
714:       ViennaCLWaitForGPU();
715:     } catch(std::exception const & ex) {
716:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
717:     }
718:     PetscLogGpuTimeEnd();
719:     VecViennaCLRestoreArray(xin,&xgpu);
720:     VecViennaCLRestoreArray(yin,&ygpu);
721:   }
722:   return(0);
723: }

725: // y = alpha * x + beta * y
726: PetscErrorCode VecAXPBY_SeqViennaCL(Vec yin,PetscScalar alpha,PetscScalar beta,Vec xin)
727: {
728:   PetscErrorCode       ierr;
729:   PetscScalar          a = alpha,b = beta;
730:   const ViennaCLVector *xgpu;
731:   ViennaCLVector       *ygpu;

734:   if (a == 0.0 && xin->map->n > 0) {
735:     VecScale_SeqViennaCL(yin,beta);
736:   } else if (b == 1.0 && xin->map->n > 0) {
737:     VecAXPY_SeqViennaCL(yin,alpha,xin);
738:   } else if (a == 1.0 && xin->map->n > 0) {
739:     VecAYPX_SeqViennaCL(yin,beta,xin);
740:   } else if (b == 0.0 && xin->map->n > 0) {
741:     VecViennaCLGetArrayRead(xin,&xgpu);
742:     VecViennaCLGetArray(yin,&ygpu);
743:     PetscLogGpuTimeBegin();
744:     try {
745:       *ygpu = *xgpu * alpha;
746:       ViennaCLWaitForGPU();
747:     } catch(std::exception const & ex) {
748:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
749:     }
750:     PetscLogGpuTimeEnd();
751:     PetscLogGpuFlops(xin->map->n);
752:     VecViennaCLRestoreArrayRead(xin,&xgpu);
753:     VecViennaCLRestoreArray(yin,&ygpu);
754:   } else if (xin->map->n > 0) {
755:     VecViennaCLGetArrayRead(xin,&xgpu);
756:     VecViennaCLGetArray(yin,&ygpu);
757:     PetscLogGpuTimeBegin();
758:     try {
759:       *ygpu = *xgpu * alpha + *ygpu * beta;
760:       ViennaCLWaitForGPU();
761:     } catch(std::exception const & ex) {
762:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
763:     }
764:     PetscLogGpuTimeEnd();
765:     VecViennaCLRestoreArrayRead(xin,&xgpu);
766:     VecViennaCLRestoreArray(yin,&ygpu);
767:     PetscLogGpuFlops(3.0*xin->map->n);
768:   }
769:   return(0);
770: }

772: /* operation  z = alpha * x + beta *y + gamma *z*/
773: PetscErrorCode VecAXPBYPCZ_SeqViennaCL(Vec zin,PetscScalar alpha,PetscScalar beta,PetscScalar gamma,Vec xin,Vec yin)
774: {
775:   PetscErrorCode       ierr;
776:   PetscInt             n = zin->map->n;
777:   const ViennaCLVector *xgpu,*ygpu;
778:   ViennaCLVector       *zgpu;

781:   VecViennaCLGetArrayRead(xin,&xgpu);
782:   VecViennaCLGetArrayRead(yin,&ygpu);
783:   VecViennaCLGetArray(zin,&zgpu);
784:   if (alpha == 0.0 && xin->map->n > 0) {
785:     PetscLogGpuTimeBegin();
786:     try {
787:       if (beta == 0.0) {
788:         *zgpu = gamma * *zgpu;
789:         ViennaCLWaitForGPU();
790:         PetscLogGpuFlops(1.0*n);
791:       } else if (gamma == 0.0) {
792:         *zgpu = beta * *ygpu;
793:         ViennaCLWaitForGPU();
794:         PetscLogGpuFlops(1.0*n);
795:       } else {
796:         *zgpu = beta * *ygpu + gamma * *zgpu;
797:         ViennaCLWaitForGPU();
798:         PetscLogGpuFlops(3.0*n);
799:       }
800:     } catch(std::exception const & ex) {
801:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
802:     }
803:     PetscLogGpuTimeEnd();
804:     PetscLogGpuFlops(3.0*n);
805:   } else if (beta == 0.0 && xin->map->n > 0) {
806:     PetscLogGpuTimeBegin();
807:     try {
808:       if (gamma == 0.0) {
809:         *zgpu = alpha * *xgpu;
810:         ViennaCLWaitForGPU();
811:         PetscLogGpuFlops(1.0*n);
812:       } else {
813:         *zgpu = alpha * *xgpu + gamma * *zgpu;
814:         ViennaCLWaitForGPU();
815:         PetscLogGpuFlops(3.0*n);
816:       }
817:     } catch(std::exception const & ex) {
818:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
819:     }
820:     PetscLogGpuTimeEnd();
821:   } else if (gamma == 0.0 && xin->map->n > 0) {
822:     PetscLogGpuTimeBegin();
823:     try {
824:       *zgpu = alpha * *xgpu + beta * *ygpu;
825:       ViennaCLWaitForGPU();
826:     } catch(std::exception const & ex) {
827:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
828:     }
829:     PetscLogGpuTimeEnd();
830:     PetscLogGpuFlops(3.0*n);
831:   } else if (xin->map->n > 0) {
832:     PetscLogGpuTimeBegin();
833:     try {
834:       /* Split operation into two steps. This is not completely ideal, but avoids temporaries (which are far worse) */
835:       if (gamma != 1.0)
836:         *zgpu *= gamma;
837:       *zgpu += alpha * *xgpu + beta * *ygpu;
838:       ViennaCLWaitForGPU();
839:     } catch(std::exception const & ex) {
840:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
841:     }
842:     PetscLogGpuTimeEnd();
843:     VecViennaCLRestoreArray(zin,&zgpu);
844:     VecViennaCLRestoreArrayRead(xin,&xgpu);
845:     VecViennaCLRestoreArrayRead(yin,&ygpu);
846:     PetscLogGpuFlops(5.0*n);
847:   }
848:   return(0);
849: }

851: PetscErrorCode VecPointwiseMult_SeqViennaCL(Vec win,Vec xin,Vec yin)
852: {
853:   PetscErrorCode       ierr;
854:   PetscInt             n = win->map->n;
855:   const ViennaCLVector *xgpu,*ygpu;
856:   ViennaCLVector       *wgpu;

859:   if (xin->map->n > 0) {
860:     VecViennaCLGetArrayRead(xin,&xgpu);
861:     VecViennaCLGetArrayRead(yin,&ygpu);
862:     VecViennaCLGetArray(win,&wgpu);
863:     PetscLogGpuTimeBegin();
864:     try {
865:       *wgpu = viennacl::linalg::element_prod(*xgpu, *ygpu);
866:       ViennaCLWaitForGPU();
867:     } catch(std::exception const & ex) {
868:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
869:     }
870:     PetscLogGpuTimeEnd();
871:     VecViennaCLRestoreArrayRead(xin,&xgpu);
872:     VecViennaCLRestoreArrayRead(yin,&ygpu);
873:     VecViennaCLRestoreArray(win,&wgpu);
874:     PetscLogGpuFlops(n);
875:   }
876:   return(0);
877: }

879: PetscErrorCode VecNorm_SeqViennaCL(Vec xin,NormType type,PetscReal *z)
880: {
881:   PetscErrorCode       ierr;
882:   PetscInt             n = xin->map->n;
883:   PetscBLASInt         bn;
884:   const ViennaCLVector *xgpu;

887:   if (xin->map->n > 0) {
888:     PetscBLASIntCast(n,&bn);
889:     VecViennaCLGetArrayRead(xin,&xgpu);
890:     if (type == NORM_2 || type == NORM_FROBENIUS) {
891:       PetscLogGpuTimeBegin();
892:       try {
893:         *z = viennacl::linalg::norm_2(*xgpu);
894:         ViennaCLWaitForGPU();
895:       } catch(std::exception const & ex) {
896:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
897:       }
898:       PetscLogGpuTimeEnd();
899:       PetscLogGpuFlops(PetscMax(2.0*n-1,0.0));
900:     } else if (type == NORM_INFINITY) {
901:       PetscLogGpuTimeBegin();
902:       try {
903:         *z = viennacl::linalg::norm_inf(*xgpu);
904:         ViennaCLWaitForGPU();
905:       } catch(std::exception const & ex) {
906:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
907:       }
908:       PetscLogGpuTimeEnd();
909:     } else if (type == NORM_1) {
910:       PetscLogGpuTimeBegin();
911:       try {
912:         *z = viennacl::linalg::norm_1(*xgpu);
913:         ViennaCLWaitForGPU();
914:       } catch(std::exception const & ex) {
915:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
916:       }
917:       PetscLogGpuTimeEnd();
918:       PetscLogGpuFlops(PetscMax(n-1.0,0.0));
919:     } else if (type == NORM_1_AND_2) {
920:       PetscLogGpuTimeBegin();
921:       try {
922:         *z     = viennacl::linalg::norm_1(*xgpu);
923:         *(z+1) = viennacl::linalg::norm_2(*xgpu);
924:         ViennaCLWaitForGPU();
925:       } catch(std::exception const & ex) {
926:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
927:       }
928:       PetscLogGpuTimeEnd();
929:       PetscLogGpuFlops(PetscMax(2.0*n-1,0.0));
930:       PetscLogGpuFlops(PetscMax(n-1.0,0.0));
931:     }
932:     VecViennaCLRestoreArrayRead(xin,&xgpu);
933:   } else if (type == NORM_1_AND_2) {
934:     *z      = 0.0;
935:     *(z+1)  = 0.0;
936:   } else *z = 0.0;
937:   return(0);
938: }

940: PetscErrorCode VecSetRandom_SeqViennaCL(Vec xin,PetscRandom r)
941: {

945:   VecSetRandom_SeqViennaCL_Private(xin,r);
946:   xin->offloadmask = PETSC_OFFLOAD_CPU;
947:   return(0);
948: }

950: PetscErrorCode VecResetArray_SeqViennaCL(Vec vin)
951: {

956:   VecViennaCLCopyFromGPU(vin);
957:   VecResetArray_SeqViennaCL_Private(vin);
958:   vin->offloadmask = PETSC_OFFLOAD_CPU;
959:   return(0);
960: }

962: PetscErrorCode VecPlaceArray_SeqViennaCL(Vec vin,const PetscScalar *a)
963: {

968:   VecViennaCLCopyFromGPU(vin);
969:   VecPlaceArray_Seq(vin,a);
970:   vin->offloadmask = PETSC_OFFLOAD_CPU;
971:   return(0);
972: }

974: PetscErrorCode VecReplaceArray_SeqViennaCL(Vec vin,const PetscScalar *a)
975: {

980:   VecViennaCLCopyFromGPU(vin);
981:   VecReplaceArray_Seq(vin,a);
982:   vin->offloadmask = PETSC_OFFLOAD_CPU;
983:   return(0);
984: }

986: /*@C
987:    VecCreateSeqViennaCL - Creates a standard, sequential array-style vector.

989:    Collective

991:    Input Parameters:
992: +  comm - the communicator, should be PETSC_COMM_SELF
993: -  n - the vector length

995:    Output Parameter:
996: .  V - the vector

998:    Notes:
999:    Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
1000:    same type as an existing vector.

1002:    Level: intermediate

1004: .seealso: VecCreateMPI(), VecCreate(), VecDuplicate(), VecDuplicateVecs(), VecCreateGhost()
1005: @*/
1006: PetscErrorCode VecCreateSeqViennaCL(MPI_Comm comm,PetscInt n,Vec *v)
1007: {

1011:   VecCreate(comm,v);
1012:   VecSetSizes(*v,n,n);
1013:   VecSetType(*v,VECSEQVIENNACL);
1014:   return(0);
1015: }

1017: /*@C
1018:    VecCreateSeqViennaCLWithArray - Creates a viennacl sequential array-style vector,
1019:    where the user provides the array space to store the vector values.

1021:    Collective

1023:    Input Parameters:
1024: +  comm - the communicator, should be PETSC_COMM_SELF
1025: .  bs - the block size
1026: .  n - the vector length
1027: -  array - viennacl array where the vector elements are to be stored.

1029:    Output Parameter:
1030: .  V - the vector

1032:    Notes:
1033:    Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
1034:    same type as an existing vector.

1036:    If the user-provided array is NULL, then VecViennaCLPlaceArray() can be used
1037:    at a later stage to SET the array for storing the vector values.

1039:    PETSc does NOT free the array when the vector is destroyed via VecDestroy().
1040:    The user should not free the array until the vector is destroyed.

1042:    Level: intermediate

1044: .seealso: VecCreateMPIViennaCLWithArray(), VecCreate(), VecDuplicate(), VecDuplicateVecs(),
1045:           VecCreateGhost(), VecCreateSeq(), VecCUDAPlaceArray(), VecCreateSeqWithArray(),
1046:           VecCreateMPIWithArray()
1047: @*/
1048: PETSC_EXTERN PetscErrorCode  VecCreateSeqViennaCLWithArray(MPI_Comm comm,PetscInt bs,PetscInt n,const ViennaCLVector* array,Vec *V)
1049: {
1051:   PetscMPIInt    size;

1054:   VecCreate(comm,V);
1055:   VecSetSizes(*V,n,n);
1056:   VecSetBlockSize(*V,bs);
1057:   MPI_Comm_size(comm,&size);
1058:   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQ on more than one process");
1059:   VecCreate_SeqViennaCL_Private(*V,array);
1060:   return(0);
1061: }

1063: /*@C
1064:    VecCreateSeqViennaCLWithArrays - Creates a ViennaCL sequential vector, where
1065:    the user provides the array space to store the vector values.

1067:    Collective

1069:    Input Parameters:
1070: +  comm - the communicator, should be PETSC_COMM_SELF
1071: .  bs - the block size
1072: .  n - the vector length
1073: -  cpuarray - CPU memory where the vector elements are to be stored.
1074: -  viennaclvec - ViennaCL vector where the Vec entries are to be stored on the device.

1076:    Output Parameter:
1077: .  V - the vector

1079:    Notes:
1080:    If both cpuarray and viennaclvec are provided, the caller must ensure that
1081:    the provided arrays have identical values.

1083:    PETSc does NOT free the provided arrays when the vector is destroyed via
1084:    VecDestroy(). The user should not free the array until the vector is
1085:    destroyed.

1087:    Level: intermediate

1089: .seealso: VecCreateMPIViennaCLWithArrays(), VecCreate(), VecCreateSeqWithArray(),
1090:           VecViennaCLPlaceArray(), VecPlaceArray(), VecCreateSeqCUDAWithArrays(),
1091:           VecViennaCLAllocateCheckHost()
1092: @*/
1093: PetscErrorCode  VecCreateSeqViennaCLWithArrays(MPI_Comm comm,PetscInt bs,PetscInt n,const PetscScalar cpuarray[],const ViennaCLVector* viennaclvec,Vec *V)
1094: {
1096:   PetscMPIInt    size;


1100:   MPI_Comm_size(comm,&size);
1101:   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQ on more than one process");

1103:   // set V's viennaclvec to be viennaclvec, do not allocate memory on host yet.
1104:   VecCreateSeqViennaCLWithArray(comm,bs,n,viennaclvec,V);

1106:   if (cpuarray && viennaclvec) {
1107:     Vec_Seq *s = (Vec_Seq*)((*V)->data);
1108:     s->array = (PetscScalar*)cpuarray;
1109:     (*V)->offloadmask = PETSC_OFFLOAD_BOTH;
1110:   } else if (cpuarray) {
1111:     Vec_Seq *s = (Vec_Seq*)((*V)->data);
1112:     s->array = (PetscScalar*)cpuarray;
1113:     (*V)->offloadmask = PETSC_OFFLOAD_CPU;
1114:   } else if (viennaclvec) {
1115:     (*V)->offloadmask = PETSC_OFFLOAD_GPU;
1116:   } else {
1117:     (*V)->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1118:   }

1120:   return(0);
1121: }

1123: /*@C
1124:    VecViennaCLPlaceArray - Replace the viennacl vector in a Vec with
1125:    the one provided by the user. This is useful to avoid a copy.

1127:    Not Collective

1129:    Input Parameters:
1130: +  vec - the vector
1131: -  array - the ViennaCL vector

1133:    Notes:
1134:    You can return to the original viennacl vector with a call to
1135:    VecViennaCLResetArray() It is not possible to use VecViennaCLPlaceArray()
1136:    and VecPlaceArray() at the same time on the same vector.

1138:    Level: intermediate

1140: .seealso: VecPlaceArray(), VecSetValues(), VecViennaCLResetArray(),
1141:           VecCUDAPlaceArray(),

1143: @*/
1144: PETSC_EXTERN PetscErrorCode VecViennaCLPlaceArray(Vec vin,const ViennaCLVector* a)
1145: {

1150:   VecViennaCLCopyToGPU(vin);
1151:   if (((Vec_Seq*)vin->data)->unplacedarray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"VecViennaCLPlaceArray()/VecPlaceArray() was already called on this vector, without a call to VecViennaCLResetArray()/VecResetArray()");
1152:   ((Vec_Seq*)vin->data)->unplacedarray  = (PetscScalar *) ((Vec_ViennaCL*)vin->spptr)->GPUarray; /* save previous GPU array so reset can bring it back */
1153:   ((Vec_ViennaCL*)vin->spptr)->GPUarray = (ViennaCLVector*)a;
1154:   vin->offloadmask = PETSC_OFFLOAD_GPU;
1155:   PetscObjectStateIncrease((PetscObject)vin);
1156:   return(0);
1157: }

1159: /*@C
1160:    VecViennaCLResetArray - Resets a vector to use its default memory. Call this
1161:    after the use of VecViennaCLPlaceArray().

1163:    Not Collective

1165:    Input Parameters:
1166: .  vec - the vector

1168:    Level: developer

1170: .seealso: VecViennaCLPlaceArray(), VecResetArray(), VecCUDAResetArray(), VecPlaceArray()
1171: @*/
1172: PETSC_EXTERN PetscErrorCode VecViennaCLResetArray(Vec vin)
1173: {

1178:   VecViennaCLCopyToGPU(vin);
1179:   ((Vec_ViennaCL*)vin->spptr)->GPUarray = (ViennaCLVector *) ((Vec_Seq*)vin->data)->unplacedarray;
1180:   ((Vec_Seq*)vin->data)->unplacedarray = 0;
1181:   vin->offloadmask = PETSC_OFFLOAD_GPU;
1182:   PetscObjectStateIncrease((PetscObject)vin);
1183:   return(0);
1184: }

1186: /*  VecDotNorm2 - computes the inner product of two vectors and the 2-norm squared of the second vector
1187:  *
1188:  *  Simply reuses VecDot() and VecNorm(). Performance improvement through custom kernel (kernel generator) possible.
1189:  */
1190: PetscErrorCode VecDotNorm2_SeqViennaCL(Vec s, Vec t, PetscScalar *dp, PetscScalar *nm)
1191: {
1192:   PetscErrorCode                         ierr;

1195:   VecDot_SeqViennaCL(s,t,dp);
1196:   VecNorm_SeqViennaCL(t,NORM_2,nm);
1197:   *nm *= *nm; //squared norm required
1198:   return(0);
1199: }

1201: PetscErrorCode VecDuplicate_SeqViennaCL(Vec win,Vec *V)
1202: {

1206:   VecCreateSeqViennaCL(PetscObjectComm((PetscObject)win),win->map->n,V);
1207:   PetscLayoutReference(win->map,&(*V)->map);
1208:   PetscObjectListDuplicate(((PetscObject)win)->olist,&((PetscObject)(*V))->olist);
1209:   PetscFunctionListDuplicate(((PetscObject)win)->qlist,&((PetscObject)(*V))->qlist);
1210:   (*V)->stash.ignorenegidx = win->stash.ignorenegidx;
1211:   return(0);
1212: }

1214: PetscErrorCode VecDestroy_SeqViennaCL(Vec v)
1215: {

1219:   try {
1220:     if (v->spptr) {
1221:       delete ((Vec_ViennaCL*)v->spptr)->GPUarray_allocated;
1222:       delete (Vec_ViennaCL*) v->spptr;
1223:     }
1224:   } catch(char *ex) {
1225:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
1226:   }
1227:   VecDestroy_SeqViennaCL_Private(v);
1228:   return(0);
1229: }

1231: PetscErrorCode VecGetArray_SeqViennaCL(Vec v,PetscScalar **a)
1232: {

1236:   if (v->offloadmask == PETSC_OFFLOAD_GPU) {
1237:     VecViennaCLCopyFromGPU(v);
1238:   } else {
1239:     VecViennaCLAllocateCheckHost(v);
1240:   }
1241:   *a = *((PetscScalar**)v->data);
1242:   return(0);
1243: }

1245: PetscErrorCode VecRestoreArray_SeqViennaCL(Vec v,PetscScalar **a)
1246: {
1248:   v->offloadmask = PETSC_OFFLOAD_CPU;
1249:   return(0);
1250: }

1252: PetscErrorCode VecGetArrayWrite_SeqViennaCL(Vec v,PetscScalar **a)
1253: {

1257:   VecViennaCLAllocateCheckHost(v);
1258:   *a   = *((PetscScalar**)v->data);
1259:   return(0);
1260: }

1262: static PetscErrorCode VecBindToCPU_SeqAIJViennaCL(Vec V,PetscBool flg)
1263: {

1267:   V->boundtocpu = flg;
1268:   if (flg) {
1269:     VecViennaCLCopyFromGPU(V);
1270:     V->offloadmask = PETSC_OFFLOAD_CPU; /* since the CPU code will likely change values in the vector */
1271:     V->ops->dot             = VecDot_Seq;
1272:     V->ops->norm            = VecNorm_Seq;
1273:     V->ops->tdot            = VecTDot_Seq;
1274:     V->ops->scale           = VecScale_Seq;
1275:     V->ops->copy            = VecCopy_Seq;
1276:     V->ops->set             = VecSet_Seq;
1277:     V->ops->swap            = VecSwap_Seq;
1278:     V->ops->axpy            = VecAXPY_Seq;
1279:     V->ops->axpby           = VecAXPBY_Seq;
1280:     V->ops->axpbypcz        = VecAXPBYPCZ_Seq;
1281:     V->ops->pointwisemult   = VecPointwiseMult_Seq;
1282:     V->ops->pointwisedivide = VecPointwiseDivide_Seq;
1283:     V->ops->setrandom       = VecSetRandom_Seq;
1284:     V->ops->dot_local       = VecDot_Seq;
1285:     V->ops->tdot_local      = VecTDot_Seq;
1286:     V->ops->norm_local      = VecNorm_Seq;
1287:     V->ops->mdot_local      = VecMDot_Seq;
1288:     V->ops->mtdot_local     = VecMTDot_Seq;
1289:     V->ops->maxpy           = VecMAXPY_Seq;
1290:     V->ops->mdot            = VecMDot_Seq;
1291:     V->ops->mtdot           = VecMTDot_Seq;
1292:     V->ops->aypx            = VecAYPX_Seq;
1293:     V->ops->waxpy           = VecWAXPY_Seq;
1294:     V->ops->dotnorm2        = NULL;
1295:     V->ops->placearray      = VecPlaceArray_Seq;
1296:     V->ops->replacearray    = VecReplaceArray_Seq;
1297:     V->ops->resetarray      = VecResetArray_Seq;
1298:     V->ops->duplicate       = VecDuplicate_Seq;
1299:   } else {
1300:     V->ops->dot             = VecDot_SeqViennaCL;
1301:     V->ops->norm            = VecNorm_SeqViennaCL;
1302:     V->ops->tdot            = VecTDot_SeqViennaCL;
1303:     V->ops->scale           = VecScale_SeqViennaCL;
1304:     V->ops->copy            = VecCopy_SeqViennaCL;
1305:     V->ops->set             = VecSet_SeqViennaCL;
1306:     V->ops->swap            = VecSwap_SeqViennaCL;
1307:     V->ops->axpy            = VecAXPY_SeqViennaCL;
1308:     V->ops->axpby           = VecAXPBY_SeqViennaCL;
1309:     V->ops->axpbypcz        = VecAXPBYPCZ_SeqViennaCL;
1310:     V->ops->pointwisemult   = VecPointwiseMult_SeqViennaCL;
1311:     V->ops->pointwisedivide = VecPointwiseDivide_SeqViennaCL;
1312:     V->ops->setrandom       = VecSetRandom_SeqViennaCL;
1313:     V->ops->dot_local       = VecDot_SeqViennaCL;
1314:     V->ops->tdot_local      = VecTDot_SeqViennaCL;
1315:     V->ops->norm_local      = VecNorm_SeqViennaCL;
1316:     V->ops->mdot_local      = VecMDot_SeqViennaCL;
1317:     V->ops->mtdot_local     = VecMTDot_SeqViennaCL;
1318:     V->ops->maxpy           = VecMAXPY_SeqViennaCL;
1319:     V->ops->mdot            = VecMDot_SeqViennaCL;
1320:     V->ops->mtdot           = VecMTDot_SeqViennaCL;
1321:     V->ops->aypx            = VecAYPX_SeqViennaCL;
1322:     V->ops->waxpy           = VecWAXPY_SeqViennaCL;
1323:     V->ops->dotnorm2        = VecDotNorm2_SeqViennaCL;
1324:     V->ops->placearray      = VecPlaceArray_SeqViennaCL;
1325:     V->ops->replacearray    = VecReplaceArray_SeqViennaCL;
1326:     V->ops->resetarray      = VecResetArray_SeqViennaCL;
1327:     V->ops->destroy         = VecDestroy_SeqViennaCL;
1328:     V->ops->duplicate       = VecDuplicate_SeqViennaCL;
1329:     V->ops->getarraywrite   = VecGetArrayWrite_SeqViennaCL;
1330:     V->ops->getarray        = VecGetArray_SeqViennaCL;
1331:     V->ops->restorearray    = VecRestoreArray_SeqViennaCL;
1332:   }
1333:   return(0);
1334: }

1336: PETSC_EXTERN PetscErrorCode VecCreate_SeqViennaCL(Vec V)
1337: {
1339:   PetscMPIInt    size;

1342:   MPI_Comm_size(PetscObjectComm((PetscObject)V),&size);
1343:   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQVIENNACL on more than one process");
1344:   VecCreate_Seq_Private(V,0);
1345:   PetscObjectChangeTypeName((PetscObject)V,VECSEQVIENNACL);

1347:   VecBindToCPU_SeqAIJViennaCL(V,PETSC_FALSE);
1348:   V->ops->bindtocpu = VecBindToCPU_SeqAIJViennaCL;

1350:   VecViennaCLAllocateCheck(V);
1351:   VecViennaCLAllocateCheckHost(V);
1352:   VecSet(V,0.0);
1353:   VecSet_Seq(V,0.0);
1354:   V->offloadmask = PETSC_OFFLOAD_BOTH;
1355:   return(0);
1356: }

1358: /*@C
1359:   VecViennaCLGetCLContext - Get the OpenCL context in which the Vec resides.

1361:   Caller should cast (*ctx) to (const cl_context). Caller is responsible for
1362:   invoking clReleaseContext().

1364:   Input Parameters:
1365: .  v    - the vector

1367:   Output Parameters:
1368: .  ctx - pointer to the underlying CL context

1370:   Level: intermediate

1372: .seealso: VecViennaCLGetCLQueue(), VecViennaCLGetCLMemRead()
1373: @*/
1374: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLContext(Vec v, PETSC_UINTPTR_T* ctx)
1375: {
1376: #if !defined(PETSC_HAVE_OPENCL)
1377:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to get the associated cl_context.");
1378: #else


1384:   const ViennaCLVector *v_vcl;
1385:   VecViennaCLGetArrayRead(v, &v_vcl);
1386:   try{
1387:     viennacl::ocl::context vcl_ctx = v_vcl->handle().opencl_handle().context();
1388:     const cl_context ocl_ctx = vcl_ctx.handle().get();
1389:     clRetainContext(ocl_ctx);
1390:     *ctx = (PETSC_UINTPTR_T)(ocl_ctx);
1391:   } catch (std::exception const & ex) {
1392:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
1393:   }

1395:   return(0);
1396: #endif
1397: }

1399: /*@C
1400:   VecViennaCLGetCLQueue - Get the OpenCL command queue to which all
1401:   operations of the Vec are enqueued.

1403:   Caller should cast (*queue) to (const cl_command_queue). Caller is
1404:   responsible for invoking clReleaseCommandQueue().

1406:   Input Parameters:
1407: .  v    - the vector

1409:   Output Parameters:
1410: .  ctx - pointer to the CL command queue

1412:   Level: intermediate

1414: .seealso: VecViennaCLGetCLContext(), VecViennaCLGetCLMemRead()
1415: @*/
1416: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLQueue(Vec v, PETSC_UINTPTR_T* queue)
1417: {
1418: #if !defined(PETSC_HAVE_OPENCL)
1419:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to get the associated cl_command_queue.");
1420: #else

1425:   const ViennaCLVector *v_vcl;
1426:   VecViennaCLGetArrayRead(v, &v_vcl);
1427:   try{
1428:     viennacl::ocl::context vcl_ctx = v_vcl->handle().opencl_handle().context();
1429:     const viennacl::ocl::command_queue& vcl_queue = vcl_ctx.current_queue();
1430:     const cl_command_queue ocl_queue = vcl_queue.handle().get();
1431:     clRetainCommandQueue(ocl_queue);
1432:     *queue = (PETSC_UINTPTR_T)(ocl_queue);
1433:   } catch (std::exception const & ex) {
1434:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
1435:   }

1437:   return(0);
1438: #endif
1439: }

1441: /*@C
1442:   VecViennaCLGetCLMemRead - Provides access to the the CL buffer inside a Vec.

1444:   Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1445:   invoking clReleaseMemObject().

1447:   Input Parameters:
1448: .  v    - the vector

1450:   Output Parameters:
1451: .  mem - pointer to the device buffer

1453:   Level: intermediate

1455: .seealso: VecViennaCLGetCLContext(), VecViennaCLGetCLMemWrite()
1456: @*/
1457: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMemRead(Vec v, PETSC_UINTPTR_T* mem)
1458: {
1459: #if !defined(PETSC_HAVE_OPENCL)
1460:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1461: #else

1466:   const ViennaCLVector *v_vcl;
1467:   VecViennaCLGetArrayRead(v, &v_vcl);
1468:   try{
1469:     const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1470:     clRetainMemObject(ocl_mem);
1471:     *mem = (PETSC_UINTPTR_T)(ocl_mem);
1472:   } catch (std::exception const & ex) {
1473:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
1474:   }
1475:   return(0);
1476: #endif
1477: }

1479: /*@C
1480:   VecViennaCLGetCLMemWrite - Provides access to the the CL buffer inside a Vec.

1482:   Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1483:   invoking clReleaseMemObject().

1485:   The device pointer has to be released by calling
1486:   VecViennaCLRestoreCLMemWrite().  Upon restoring the vector data the data on
1487:   the host will be marked as out of date.  A subsequent access of the host data
1488:   will thus incur a data transfer from the device to the host.

1490:   Input Parameters:
1491: .  v    - the vector

1493:   Output Parameters:
1494: .  mem - pointer to the device buffer

1496:   Level: intermediate

1498: .seealso: VecViennaCLGetCLContext(), VecViennaCLRestoreCLMemWrite()
1499: @*/
1500: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMemWrite(Vec v, PETSC_UINTPTR_T* mem)
1501: {
1502: #if !defined(PETSC_HAVE_OPENCL)
1503:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1504: #else

1509:   ViennaCLVector *v_vcl;
1510:   VecViennaCLGetArrayWrite(v, &v_vcl);
1511:   try{
1512:     const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1513:     clRetainMemObject(ocl_mem);
1514:     *mem = (PETSC_UINTPTR_T)(ocl_mem);
1515:   } catch (std::exception const & ex) {
1516:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
1517:   }

1519:   return(0);
1520: #endif
1521: }

1523: /*@C
1524:   VecViennaCLRestoreCLMemWrite - Restores a CL buffer pointer previously
1525:   acquired with VecViennaCLGetCLMemWrite().

1527:    This marks the host data as out of date.  Subsequent access to the
1528:    vector data on the host side with for instance VecGetArray() incurs a
1529:    data transfer.

1531:   Input Parameters:
1532: .  v    - the vector

1534:   Level: intermediate

1536: .seealso: VecViennaCLGetCLContext(), VecViennaCLGetCLMemWrite()
1537: @*/
1538: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreCLMemWrite(Vec v)
1539: {
1540: #if !defined(PETSC_HAVE_OPENCL)
1541:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to restore a Vec's cl_mem");
1542: #else

1547:   VecViennaCLRestoreArrayWrite(v, PETSC_NULL);

1549:   return(0);
1550: #endif
1551: }

1553: /*@C
1554:   VecViennaCLGetCLMem - Provides access to the the CL buffer inside a Vec.

1556:   Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1557:   invoking clReleaseMemObject().

1559:   The device pointer has to be released by calling VecViennaCLRestoreCLMem().
1560:   Upon restoring the vector data the data on the host will be marked as out of
1561:   date.  A subsequent access of the host data will thus incur a data transfer
1562:   from the device to the host.

1564:   Input Parameters:
1565: .  v    - the vector

1567:   Output Parameters:
1568: .  mem - pointer to the device buffer

1570:   Level: intermediate

1572: .seealso: VecViennaCLGetCLContext(), VecViennaCLRestoreCLMem()
1573: @*/
1574: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMem(Vec v, PETSC_UINTPTR_T* mem)
1575: {
1576: #if !defined(PETSC_HAVE_OPENCL)
1577:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1578: #else

1583:   ViennaCLVector *v_vcl;
1584:   VecViennaCLGetArray(v, &v_vcl);
1585:   try{
1586:     const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1587:     clRetainMemObject(ocl_mem);
1588:     *mem = (PETSC_UINTPTR_T)(ocl_mem);
1589:   } catch (std::exception const & ex) {
1590:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
1591:   }

1593:   return(0);
1594: #endif
1595: }

1597: /*@C
1598:   VecViennaCLRestoreCLMem - Restores a CL buffer pointer previously
1599:   acquired with VecViennaCLGetCLMem().

1601:    This marks the host data as out of date. Subsequent access to the vector
1602:    data on the host side with for instance VecGetArray() incurs a data
1603:    transfer.

1605:   Input Parameters:
1606: .  v    - the vector

1608:   Level: intermediate

1610: .seealso: VecViennaCLGetCLContext(), VecViennaCLGetCLMem()
1611: @*/
1612: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreCLMem(Vec v)
1613: {
1614: #if !defined(PETSC_HAVE_OPENCL)
1615:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to restore a Vec's cl_mem");
1616: #else

1621:   VecViennaCLRestoreArray(v, PETSC_NULL);

1623:   return(0);
1624: #endif
1625: }

1627: PetscErrorCode VecCreate_SeqViennaCL_Private(Vec V,const ViennaCLVector *array)
1628: {
1630:   Vec_ViennaCL   *vecviennacl;
1631:   PetscMPIInt    size;

1634:   MPI_Comm_size(PetscObjectComm((PetscObject)V),&size);
1635:   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQVIENNACL on more than one process");
1636:   VecCreate_Seq_Private(V,0);
1637:   PetscObjectChangeTypeName((PetscObject)V,VECSEQVIENNACL);
1638:   VecBindToCPU_SeqAIJViennaCL(V,PETSC_FALSE);
1639:   V->ops->bindtocpu = VecBindToCPU_SeqAIJViennaCL;

1641:   if (array) {
1642:     if (!V->spptr)
1643:       V->spptr = new Vec_ViennaCL;
1644:     vecviennacl = (Vec_ViennaCL*)V->spptr;
1645:     vecviennacl->GPUarray_allocated = 0;
1646:     vecviennacl->GPUarray           = (ViennaCLVector*)array;
1647:     V->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1648:   }

1650:   return(0);
1651: }