Actual source code: vecviennacl.cxx
1: /*
2: Implements the sequential ViennaCL vectors.
3: */
5: #include <petscconf.h>
6: #include <petsc/private/vecimpl.h>
7: #include <../src/vec/vec/impls/dvecimpl.h>
8: #include <../src/vec/vec/impls/seq/seqviennacl/viennaclvecimpl.h>
10: #include <vector>
12: #include "viennacl/linalg/inner_prod.hpp"
13: #include "viennacl/linalg/norm_1.hpp"
14: #include "viennacl/linalg/norm_2.hpp"
15: #include "viennacl/linalg/norm_inf.hpp"
17: #ifdef VIENNACL_WITH_OPENCL
18: #include "viennacl/ocl/backend.hpp"
19: #endif
21: PETSC_EXTERN PetscErrorCode VecViennaCLGetArray(Vec v, ViennaCLVector **a)
22: {
27: *a = 0;
28: VecViennaCLCopyToGPU(v);
29: *a = ((Vec_ViennaCL*)v->spptr)->GPUarray;
30: ViennaCLWaitForGPU();
31: return(0);
32: }
34: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArray(Vec v, ViennaCLVector **a)
35: {
40: v->offloadmask = PETSC_OFFLOAD_GPU;
42: PetscObjectStateIncrease((PetscObject)v);
43: return(0);
44: }
46: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayRead(Vec v, const ViennaCLVector **a)
47: {
52: *a = 0;
53: VecViennaCLCopyToGPU(v);
54: *a = ((Vec_ViennaCL*)v->spptr)->GPUarray;
55: ViennaCLWaitForGPU();
56: return(0);
57: }
59: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayRead(Vec v, const ViennaCLVector **a)
60: {
63: return(0);
64: }
66: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayWrite(Vec v, ViennaCLVector **a)
67: {
72: *a = 0;
73: VecViennaCLAllocateCheck(v);
74: *a = ((Vec_ViennaCL*)v->spptr)->GPUarray;
75: ViennaCLWaitForGPU();
76: return(0);
77: }
79: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayWrite(Vec v, ViennaCLVector **a)
80: {
85: v->offloadmask = PETSC_OFFLOAD_GPU;
87: PetscObjectStateIncrease((PetscObject)v);
88: return(0);
89: }
91: PETSC_EXTERN PetscErrorCode PetscViennaCLInit()
92: {
93: PetscErrorCode ierr;
94: char string[20];
95: PetscBool flg,flg_cuda,flg_opencl,flg_openmp;
98: /* ViennaCL backend selection: CUDA, OpenCL, or OpenMP */
99: PetscOptionsGetString(NULL,NULL,"-viennacl_backend",string,sizeof(string),&flg);
100: if (flg) {
101: try {
102: PetscStrcasecmp(string,"cuda",&flg_cuda);
103: PetscStrcasecmp(string,"opencl",&flg_opencl);
104: PetscStrcasecmp(string,"openmp",&flg_openmp);
106: /* A default (sequential) CPU backend is always available - even if OpenMP is not enabled. */
107: if (flg_openmp) viennacl::backend::default_memory_type(viennacl::MAIN_MEMORY);
108: #if defined(PETSC_HAVE_CUDA)
109: else if (flg_cuda) viennacl::backend::default_memory_type(viennacl::CUDA_MEMORY);
110: #endif
111: #if defined(PETSC_HAVE_OPENCL)
112: else if (flg_opencl) viennacl::backend::default_memory_type(viennacl::OPENCL_MEMORY);
113: #endif
114: else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: Backend not recognized or available: %s.\n Pass -viennacl_view to see available backends for ViennaCL.\n", string);
115: } catch (std::exception const & ex) {
116: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
117: }
118: }
120: #if defined(PETSC_HAVE_CUDA)
121: PetscCUDAInitializeCheck(); /* For CUDA event timers */
122: #endif
124: #if defined(PETSC_HAVE_OPENCL)
125: /* ViennaCL OpenCL device type configuration */
126: PetscOptionsGetString(NULL,NULL,"-viennacl_opencl_device_type",string,sizeof(string),&flg);
127: if (flg) {
128: try {
129: PetscStrcasecmp(string,"cpu",&flg);
130: if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_CPU);
132: PetscStrcasecmp(string,"gpu",&flg);
133: if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_GPU);
135: PetscStrcasecmp(string,"accelerator",&flg);
136: if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_ACCELERATOR);
137: } catch (std::exception const & ex) {
138: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
139: }
140: }
141: #endif
143: /* Print available backends */
144: PetscOptionsHasName(NULL,NULL,"-viennacl_view",&flg);
145: if (flg) {
146: PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backends available: ");
147: #if defined(PETSC_HAVE_CUDA)
148: PetscPrintf(PETSC_COMM_WORLD, "CUDA, ");
149: #endif
150: #if defined(PETSC_HAVE_OPENCL)
151: PetscPrintf(PETSC_COMM_WORLD, "OpenCL, ");
152: #endif
153: #if defined(PETSC_HAVE_OPENMP)
154: PetscPrintf(PETSC_COMM_WORLD, "OpenMP ");
155: #else
156: PetscPrintf(PETSC_COMM_WORLD, "OpenMP (1 thread) ");
157: #endif
158: PetscPrintf(PETSC_COMM_WORLD, "\n");
160: /* Print selected backends */
161: PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backend selected: ");
162: #if defined(PETSC_HAVE_CUDA)
163: if (viennacl::backend::default_memory_type() == viennacl::CUDA_MEMORY) {
164: PetscPrintf(PETSC_COMM_WORLD, "CUDA ");
165: }
166: #endif
167: #if defined(PETSC_HAVE_OPENCL)
168: if (viennacl::backend::default_memory_type() == viennacl::OPENCL_MEMORY) {
169: PetscPrintf(PETSC_COMM_WORLD, "OpenCL ");
170: }
171: #endif
172: #if defined(PETSC_HAVE_OPENMP)
173: if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) {
174: PetscPrintf(PETSC_COMM_WORLD, "OpenMP ");
175: }
176: #else
177: if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) {
178: PetscPrintf(PETSC_COMM_WORLD, "OpenMP (sequential - consider reconfiguration: --with-openmp=1) ");
179: }
180: #endif
181: PetscPrintf(PETSC_COMM_WORLD, "\n");
182: }
183: return(0);
184: }
186: /*
187: Allocates space for the vector array on the Host if it does not exist.
188: Does NOT change the PetscViennaCLFlag for the vector
189: Does NOT zero the ViennaCL array
190: */
191: PETSC_EXTERN PetscErrorCode VecViennaCLAllocateCheckHost(Vec v)
192: {
194: PetscScalar *array;
195: Vec_Seq *s;
196: PetscInt n = v->map->n;
199: s = (Vec_Seq*)v->data;
200: VecViennaCLAllocateCheck(v);
201: if (s->array == 0) {
202: PetscMalloc1(n,&array);
203: PetscLogObjectMemory((PetscObject)v,n*sizeof(PetscScalar));
204: s->array = array;
205: s->array_allocated = array;
206: }
207: return(0);
208: }
210: /*
211: Allocates space for the vector array on the GPU if it does not exist.
212: Does NOT change the PetscViennaCLFlag for the vector
213: Does NOT zero the ViennaCL array
215: */
216: PetscErrorCode VecViennaCLAllocateCheck(Vec v)
217: {
219: if (!v->spptr) {
220: try {
221: v->spptr = new Vec_ViennaCL;
222: ((Vec_ViennaCL*)v->spptr)->GPUarray_allocated = new ViennaCLVector((PetscBLASInt)v->map->n);
223: ((Vec_ViennaCL*)v->spptr)->GPUarray = ((Vec_ViennaCL*)v->spptr)->GPUarray_allocated;
225: } catch(std::exception const & ex) {
226: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
227: }
228: }
229: return(0);
230: }
232: /* Copies a vector from the CPU to the GPU unless we already have an up-to-date copy on the GPU */
233: PetscErrorCode VecViennaCLCopyToGPU(Vec v)
234: {
239: VecViennaCLAllocateCheck(v);
240: if (v->map->n > 0) {
241: if (v->offloadmask == PETSC_OFFLOAD_CPU) {
242: PetscLogEventBegin(VEC_ViennaCLCopyToGPU,v,0,0,0);
243: try {
244: ViennaCLVector *vec = ((Vec_ViennaCL*)v->spptr)->GPUarray;
245: viennacl::fast_copy(*(PetscScalar**)v->data, *(PetscScalar**)v->data + v->map->n, vec->begin());
246: ViennaCLWaitForGPU();
247: } catch(std::exception const & ex) {
248: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
249: }
250: PetscLogCpuToGpu((v->map->n)*sizeof(PetscScalar));
251: PetscLogEventEnd(VEC_ViennaCLCopyToGPU,v,0,0,0);
252: v->offloadmask = PETSC_OFFLOAD_BOTH;
253: }
254: }
255: return(0);
256: }
258: /*
259: VecViennaCLCopyFromGPU - Copies a vector from the GPU to the CPU unless we already have an up-to-date copy on the CPU
260: */
261: PetscErrorCode VecViennaCLCopyFromGPU(Vec v)
262: {
267: VecViennaCLAllocateCheckHost(v);
268: if (v->offloadmask == PETSC_OFFLOAD_GPU) {
269: PetscLogEventBegin(VEC_ViennaCLCopyFromGPU,v,0,0,0);
270: try {
271: ViennaCLVector *vec = ((Vec_ViennaCL*)v->spptr)->GPUarray;
272: viennacl::fast_copy(vec->begin(),vec->end(),*(PetscScalar**)v->data);
273: ViennaCLWaitForGPU();
274: } catch(std::exception const & ex) {
275: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
276: }
277: PetscLogGpuToCpu((v->map->n)*sizeof(PetscScalar));
278: PetscLogEventEnd(VEC_ViennaCLCopyFromGPU,v,0,0,0);
279: v->offloadmask = PETSC_OFFLOAD_BOTH;
280: }
281: return(0);
282: }
284: /* Copy on CPU */
285: static PetscErrorCode VecCopy_SeqViennaCL_Private(Vec xin,Vec yin)
286: {
287: PetscScalar *ya;
288: const PetscScalar *xa;
289: PetscErrorCode ierr;
292: VecViennaCLAllocateCheckHost(xin);
293: VecViennaCLAllocateCheckHost(yin);
294: if (xin != yin) {
295: VecGetArrayRead(xin,&xa);
296: VecGetArray(yin,&ya);
297: PetscArraycpy(ya,xa,xin->map->n);
298: VecRestoreArrayRead(xin,&xa);
299: VecRestoreArray(yin,&ya);
300: }
301: return(0);
302: }
304: static PetscErrorCode VecSetRandom_SeqViennaCL_Private(Vec xin,PetscRandom r)
305: {
307: PetscInt n = xin->map->n,i;
308: PetscScalar *xx;
311: VecGetArray(xin,&xx);
312: for (i=0; i<n; i++) {PetscRandomGetValue(r,&xx[i]);}
313: VecRestoreArray(xin,&xx);
314: return(0);
315: }
317: static PetscErrorCode VecDestroy_SeqViennaCL_Private(Vec v)
318: {
319: Vec_Seq *vs = (Vec_Seq*)v->data;
323: PetscObjectSAWsViewOff(v);
324: #if defined(PETSC_USE_LOG)
325: PetscLogObjectState((PetscObject)v,"Length=%D",v->map->n);
326: #endif
327: if (vs->array_allocated) { PetscFree(vs->array_allocated); }
328: PetscFree(vs);
329: return(0);
330: }
332: static PetscErrorCode VecResetArray_SeqViennaCL_Private(Vec vin)
333: {
334: Vec_Seq *v = (Vec_Seq*)vin->data;
337: v->array = v->unplacedarray;
338: v->unplacedarray = 0;
339: return(0);
340: }
342: /*MC
343: VECSEQVIENNACL - VECSEQVIENNACL = "seqviennacl" - The basic sequential vector, modified to use ViennaCL
345: Options Database Keys:
346: . -vec_type seqviennacl - sets the vector type to VECSEQVIENNACL during a call to VecSetFromOptions()
348: Level: beginner
350: .seealso: VecCreate(), VecSetType(), VecSetFromOptions(), VecCreateSeqWithArray(), VECMPI, VecType, VecCreateMPI(), VecCreateSeq()
351: M*/
353: PetscErrorCode VecAYPX_SeqViennaCL(Vec yin, PetscScalar alpha, Vec xin)
354: {
355: const ViennaCLVector *xgpu;
356: ViennaCLVector *ygpu;
357: PetscErrorCode ierr;
360: VecViennaCLGetArrayRead(xin,&xgpu);
361: VecViennaCLGetArray(yin,&ygpu);
362: PetscLogGpuTimeBegin();
363: try {
364: if (alpha != 0.0 && xin->map->n > 0) {
365: *ygpu = *xgpu + alpha * *ygpu;
366: PetscLogGpuFlops(2.0*yin->map->n);
367: } else {
368: *ygpu = *xgpu;
369: }
370: ViennaCLWaitForGPU();
371: } catch(std::exception const & ex) {
372: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
373: }
374: PetscLogGpuTimeEnd();
375: VecViennaCLRestoreArrayRead(xin,&xgpu);
376: VecViennaCLRestoreArray(yin,&ygpu);
377: return(0);
378: }
380: PetscErrorCode VecAXPY_SeqViennaCL(Vec yin,PetscScalar alpha,Vec xin)
381: {
382: const ViennaCLVector *xgpu;
383: ViennaCLVector *ygpu;
384: PetscErrorCode ierr;
387: if (alpha != 0.0 && xin->map->n > 0) {
388: VecViennaCLGetArrayRead(xin,&xgpu);
389: VecViennaCLGetArray(yin,&ygpu);
390: PetscLogGpuTimeBegin();
391: try {
392: *ygpu += alpha * *xgpu;
393: ViennaCLWaitForGPU();
394: } catch(std::exception const & ex) {
395: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
396: }
397: PetscLogGpuTimeEnd();
398: VecViennaCLRestoreArrayRead(xin,&xgpu);
399: VecViennaCLRestoreArray(yin,&ygpu);
400: PetscLogGpuFlops(2.0*yin->map->n);
401: }
402: return(0);
403: }
405: PetscErrorCode VecPointwiseDivide_SeqViennaCL(Vec win, Vec xin, Vec yin)
406: {
407: const ViennaCLVector *xgpu,*ygpu;
408: ViennaCLVector *wgpu;
409: PetscErrorCode ierr;
412: if (xin->map->n > 0) {
413: VecViennaCLGetArrayRead(xin,&xgpu);
414: VecViennaCLGetArrayRead(yin,&ygpu);
415: VecViennaCLGetArrayWrite(win,&wgpu);
416: PetscLogGpuTimeBegin();
417: try {
418: *wgpu = viennacl::linalg::element_div(*xgpu, *ygpu);
419: ViennaCLWaitForGPU();
420: } catch(std::exception const & ex) {
421: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
422: }
423: PetscLogGpuTimeEnd();
424: PetscLogGpuFlops(win->map->n);
425: VecViennaCLRestoreArrayRead(xin,&xgpu);
426: VecViennaCLRestoreArrayRead(yin,&ygpu);
427: VecViennaCLRestoreArrayWrite(win,&wgpu);
428: }
429: return(0);
430: }
432: PetscErrorCode VecWAXPY_SeqViennaCL(Vec win,PetscScalar alpha,Vec xin, Vec yin)
433: {
434: const ViennaCLVector *xgpu,*ygpu;
435: ViennaCLVector *wgpu;
436: PetscErrorCode ierr;
439: if (alpha == 0.0 && xin->map->n > 0) {
440: VecCopy_SeqViennaCL(yin,win);
441: } else {
442: VecViennaCLGetArrayRead(xin,&xgpu);
443: VecViennaCLGetArrayRead(yin,&ygpu);
444: VecViennaCLGetArrayWrite(win,&wgpu);
445: PetscLogGpuTimeBegin();
446: if (alpha == 1.0) {
447: try {
448: *wgpu = *ygpu + *xgpu;
449: } catch(std::exception const & ex) {
450: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
451: }
452: PetscLogGpuFlops(win->map->n);
453: } else if (alpha == -1.0) {
454: try {
455: *wgpu = *ygpu - *xgpu;
456: } catch(std::exception const & ex) {
457: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
458: }
459: PetscLogGpuFlops(win->map->n);
460: } else {
461: try {
462: *wgpu = *ygpu + alpha * *xgpu;
463: } catch(std::exception const & ex) {
464: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
465: }
466: PetscLogGpuFlops(2*win->map->n);
467: }
468: ViennaCLWaitForGPU();
469: PetscLogGpuTimeEnd();
470: VecViennaCLRestoreArrayRead(xin,&xgpu);
471: VecViennaCLRestoreArrayRead(yin,&ygpu);
472: VecViennaCLRestoreArrayWrite(win,&wgpu);
473: }
474: return(0);
475: }
477: /*
478: * Operation x = x + sum_i alpha_i * y_i for vectors x, y_i and scalars alpha_i
479: *
480: * ViennaCL supports a fast evaluation of x += alpha * y and x += alpha * y + beta * z,
481: * hence there is an iterated application of these until the final result is obtained
482: */
483: PetscErrorCode VecMAXPY_SeqViennaCL(Vec xin, PetscInt nv,const PetscScalar *alpha,Vec *y)
484: {
486: PetscInt j;
489: for (j = 0; j < nv; ++j) {
490: if (j+1 < nv) {
491: VecAXPBYPCZ_SeqViennaCL(xin,alpha[j],alpha[j+1],1.0,y[j],y[j+1]);
492: ++j;
493: } else {
494: VecAXPY_SeqViennaCL(xin,alpha[j],y[j]);
495: }
496: }
497: ViennaCLWaitForGPU();
498: return(0);
499: }
501: PetscErrorCode VecDot_SeqViennaCL(Vec xin,Vec yin,PetscScalar *z)
502: {
503: const ViennaCLVector *xgpu,*ygpu;
504: PetscErrorCode ierr;
507: if (xin->map->n > 0) {
508: VecViennaCLGetArrayRead(xin,&xgpu);
509: VecViennaCLGetArrayRead(yin,&ygpu);
510: PetscLogGpuTimeBegin();
511: try {
512: *z = viennacl::linalg::inner_prod(*xgpu,*ygpu);
513: } catch(std::exception const & ex) {
514: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
515: }
516: ViennaCLWaitForGPU();
517: PetscLogGpuTimeEnd();
518: if (xin->map->n >0) {
519: PetscLogGpuFlops(2.0*xin->map->n-1);
520: }
521: VecViennaCLRestoreArrayRead(xin,&xgpu);
522: VecViennaCLRestoreArrayRead(yin,&ygpu);
523: } else *z = 0.0;
524: return(0);
525: }
527: /*
528: * Operation z[j] = dot(x, y[j])
529: *
530: * We use an iterated application of dot() for each j. For small ranges of j this is still faster than an allocation of extra memory in order to use gemv().
531: */
532: PetscErrorCode VecMDot_SeqViennaCL(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
533: {
534: PetscErrorCode ierr;
535: PetscInt n = xin->map->n,i;
536: const ViennaCLVector *xgpu,*ygpu;
537: Vec *yyin = (Vec*)yin;
538: std::vector<viennacl::vector_base<PetscScalar> const *> ygpu_array(nv);
541: if (xin->map->n > 0) {
542: VecViennaCLGetArrayRead(xin,&xgpu);
543: for (i=0; i<nv; i++) {
544: VecViennaCLGetArrayRead(yyin[i],&ygpu);
545: ygpu_array[i] = ygpu;
546: }
547: PetscLogGpuTimeBegin();
548: viennacl::vector_tuple<PetscScalar> y_tuple(ygpu_array);
549: ViennaCLVector result = viennacl::linalg::inner_prod(*xgpu, y_tuple);
550: viennacl::copy(result.begin(), result.end(), z);
551: for (i=0; i<nv; i++) {
552: VecViennaCLRestoreArrayRead(yyin[i],&ygpu);
553: }
554: ViennaCLWaitForGPU();
555: PetscLogGpuTimeEnd();
556: VecViennaCLRestoreArrayRead(xin,&xgpu);
557: PetscLogGpuFlops(PetscMax(nv*(2.0*n-1),0.0));
558: } else {
559: for (i=0; i<nv; i++) z[i] = 0.0;
560: }
561: return(0);
562: }
564: PetscErrorCode VecMTDot_SeqViennaCL(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
565: {
569: /* Since complex case is not supported at the moment, this is the same as VecMDot_SeqViennaCL */
570: VecMDot_SeqViennaCL(xin,nv,yin,z);
571: ViennaCLWaitForGPU();
572: return(0);
573: }
575: PetscErrorCode VecSet_SeqViennaCL(Vec xin,PetscScalar alpha)
576: {
577: ViennaCLVector *xgpu;
581: if (xin->map->n > 0) {
582: VecViennaCLGetArrayWrite(xin,&xgpu);
583: PetscLogGpuTimeBegin();
584: try {
585: *xgpu = viennacl::scalar_vector<PetscScalar>(xgpu->size(), alpha);
586: ViennaCLWaitForGPU();
587: } catch(std::exception const & ex) {
588: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
589: }
590: PetscLogGpuTimeEnd();
591: VecViennaCLRestoreArrayWrite(xin,&xgpu);
592: }
593: return(0);
594: }
596: PetscErrorCode VecScale_SeqViennaCL(Vec xin, PetscScalar alpha)
597: {
598: ViennaCLVector *xgpu;
602: if (alpha == 0.0 && xin->map->n > 0) {
603: VecSet_SeqViennaCL(xin,alpha);
604: PetscLogGpuFlops(xin->map->n);
605: } else if (alpha != 1.0 && xin->map->n > 0) {
606: VecViennaCLGetArray(xin,&xgpu);
607: PetscLogGpuTimeBegin();
608: try {
609: *xgpu *= alpha;
610: ViennaCLWaitForGPU();
611: } catch(std::exception const & ex) {
612: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
613: }
614: PetscLogGpuTimeEnd();
615: VecViennaCLRestoreArray(xin,&xgpu);
616: PetscLogGpuFlops(xin->map->n);
617: }
618: return(0);
619: }
621: PetscErrorCode VecTDot_SeqViennaCL(Vec xin,Vec yin,PetscScalar *z)
622: {
626: /* Since complex case is not supported at the moment, this is the same as VecDot_SeqViennaCL */
627: VecDot_SeqViennaCL(xin, yin, z);
628: ViennaCLWaitForGPU();
629: return(0);
630: }
632: PetscErrorCode VecCopy_SeqViennaCL(Vec xin,Vec yin)
633: {
634: const ViennaCLVector *xgpu;
635: ViennaCLVector *ygpu;
636: PetscErrorCode ierr;
639: if (xin != yin && xin->map->n > 0) {
640: if (xin->offloadmask == PETSC_OFFLOAD_GPU) {
641: VecViennaCLGetArrayRead(xin,&xgpu);
642: VecViennaCLGetArrayWrite(yin,&ygpu);
643: PetscLogGpuTimeBegin();
644: try {
645: *ygpu = *xgpu;
646: ViennaCLWaitForGPU();
647: } catch(std::exception const & ex) {
648: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
649: }
650: PetscLogGpuTimeEnd();
651: VecViennaCLRestoreArrayRead(xin,&xgpu);
652: VecViennaCLRestoreArrayWrite(yin,&ygpu);
654: } else if (xin->offloadmask == PETSC_OFFLOAD_CPU) {
655: /* copy in CPU if we are on the CPU*/
656: VecCopy_SeqViennaCL_Private(xin,yin);
657: ViennaCLWaitForGPU();
658: } else if (xin->offloadmask == PETSC_OFFLOAD_BOTH) {
659: /* if xin is valid in both places, see where yin is and copy there (because it's probably where we'll want to next use it) */
660: if (yin->offloadmask == PETSC_OFFLOAD_CPU) {
661: /* copy in CPU */
662: VecCopy_SeqViennaCL_Private(xin,yin);
663: ViennaCLWaitForGPU();
664: } else if (yin->offloadmask == PETSC_OFFLOAD_GPU) {
665: /* copy in GPU */
666: VecViennaCLGetArrayRead(xin,&xgpu);
667: VecViennaCLGetArrayWrite(yin,&ygpu);
668: PetscLogGpuTimeBegin();
669: try {
670: *ygpu = *xgpu;
671: ViennaCLWaitForGPU();
672: } catch(std::exception const & ex) {
673: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
674: }
675: PetscLogGpuTimeEnd();
676: VecViennaCLRestoreArrayRead(xin,&xgpu);
677: VecViennaCLRestoreArrayWrite(yin,&ygpu);
678: } else if (yin->offloadmask == PETSC_OFFLOAD_BOTH) {
679: /* xin and yin are both valid in both places (or yin was unallocated before the earlier call to allocatecheck
680: default to copy in GPU (this is an arbitrary choice) */
681: VecViennaCLGetArrayRead(xin,&xgpu);
682: VecViennaCLGetArrayWrite(yin,&ygpu);
683: PetscLogGpuTimeBegin();
684: try {
685: *ygpu = *xgpu;
686: ViennaCLWaitForGPU();
687: } catch(std::exception const & ex) {
688: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
689: }
690: PetscLogGpuTimeEnd();
691: VecViennaCLRestoreArrayRead(xin,&xgpu);
692: VecViennaCLRestoreArrayWrite(yin,&ygpu);
693: } else {
694: VecCopy_SeqViennaCL_Private(xin,yin);
695: ViennaCLWaitForGPU();
696: }
697: }
698: }
699: return(0);
700: }
702: PetscErrorCode VecSwap_SeqViennaCL(Vec xin,Vec yin)
703: {
705: ViennaCLVector *xgpu,*ygpu;
708: if (xin != yin && xin->map->n > 0) {
709: VecViennaCLGetArray(xin,&xgpu);
710: VecViennaCLGetArray(yin,&ygpu);
711: PetscLogGpuTimeBegin();
712: try {
713: viennacl::swap(*xgpu, *ygpu);
714: ViennaCLWaitForGPU();
715: } catch(std::exception const & ex) {
716: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
717: }
718: PetscLogGpuTimeEnd();
719: VecViennaCLRestoreArray(xin,&xgpu);
720: VecViennaCLRestoreArray(yin,&ygpu);
721: }
722: return(0);
723: }
725: // y = alpha * x + beta * y
726: PetscErrorCode VecAXPBY_SeqViennaCL(Vec yin,PetscScalar alpha,PetscScalar beta,Vec xin)
727: {
728: PetscErrorCode ierr;
729: PetscScalar a = alpha,b = beta;
730: const ViennaCLVector *xgpu;
731: ViennaCLVector *ygpu;
734: if (a == 0.0 && xin->map->n > 0) {
735: VecScale_SeqViennaCL(yin,beta);
736: } else if (b == 1.0 && xin->map->n > 0) {
737: VecAXPY_SeqViennaCL(yin,alpha,xin);
738: } else if (a == 1.0 && xin->map->n > 0) {
739: VecAYPX_SeqViennaCL(yin,beta,xin);
740: } else if (b == 0.0 && xin->map->n > 0) {
741: VecViennaCLGetArrayRead(xin,&xgpu);
742: VecViennaCLGetArray(yin,&ygpu);
743: PetscLogGpuTimeBegin();
744: try {
745: *ygpu = *xgpu * alpha;
746: ViennaCLWaitForGPU();
747: } catch(std::exception const & ex) {
748: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
749: }
750: PetscLogGpuTimeEnd();
751: PetscLogGpuFlops(xin->map->n);
752: VecViennaCLRestoreArrayRead(xin,&xgpu);
753: VecViennaCLRestoreArray(yin,&ygpu);
754: } else if (xin->map->n > 0) {
755: VecViennaCLGetArrayRead(xin,&xgpu);
756: VecViennaCLGetArray(yin,&ygpu);
757: PetscLogGpuTimeBegin();
758: try {
759: *ygpu = *xgpu * alpha + *ygpu * beta;
760: ViennaCLWaitForGPU();
761: } catch(std::exception const & ex) {
762: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
763: }
764: PetscLogGpuTimeEnd();
765: VecViennaCLRestoreArrayRead(xin,&xgpu);
766: VecViennaCLRestoreArray(yin,&ygpu);
767: PetscLogGpuFlops(3.0*xin->map->n);
768: }
769: return(0);
770: }
772: /* operation z = alpha * x + beta *y + gamma *z*/
773: PetscErrorCode VecAXPBYPCZ_SeqViennaCL(Vec zin,PetscScalar alpha,PetscScalar beta,PetscScalar gamma,Vec xin,Vec yin)
774: {
775: PetscErrorCode ierr;
776: PetscInt n = zin->map->n;
777: const ViennaCLVector *xgpu,*ygpu;
778: ViennaCLVector *zgpu;
781: VecViennaCLGetArrayRead(xin,&xgpu);
782: VecViennaCLGetArrayRead(yin,&ygpu);
783: VecViennaCLGetArray(zin,&zgpu);
784: if (alpha == 0.0 && xin->map->n > 0) {
785: PetscLogGpuTimeBegin();
786: try {
787: if (beta == 0.0) {
788: *zgpu = gamma * *zgpu;
789: ViennaCLWaitForGPU();
790: PetscLogGpuFlops(1.0*n);
791: } else if (gamma == 0.0) {
792: *zgpu = beta * *ygpu;
793: ViennaCLWaitForGPU();
794: PetscLogGpuFlops(1.0*n);
795: } else {
796: *zgpu = beta * *ygpu + gamma * *zgpu;
797: ViennaCLWaitForGPU();
798: PetscLogGpuFlops(3.0*n);
799: }
800: } catch(std::exception const & ex) {
801: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
802: }
803: PetscLogGpuTimeEnd();
804: PetscLogGpuFlops(3.0*n);
805: } else if (beta == 0.0 && xin->map->n > 0) {
806: PetscLogGpuTimeBegin();
807: try {
808: if (gamma == 0.0) {
809: *zgpu = alpha * *xgpu;
810: ViennaCLWaitForGPU();
811: PetscLogGpuFlops(1.0*n);
812: } else {
813: *zgpu = alpha * *xgpu + gamma * *zgpu;
814: ViennaCLWaitForGPU();
815: PetscLogGpuFlops(3.0*n);
816: }
817: } catch(std::exception const & ex) {
818: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
819: }
820: PetscLogGpuTimeEnd();
821: } else if (gamma == 0.0 && xin->map->n > 0) {
822: PetscLogGpuTimeBegin();
823: try {
824: *zgpu = alpha * *xgpu + beta * *ygpu;
825: ViennaCLWaitForGPU();
826: } catch(std::exception const & ex) {
827: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
828: }
829: PetscLogGpuTimeEnd();
830: PetscLogGpuFlops(3.0*n);
831: } else if (xin->map->n > 0) {
832: PetscLogGpuTimeBegin();
833: try {
834: /* Split operation into two steps. This is not completely ideal, but avoids temporaries (which are far worse) */
835: if (gamma != 1.0)
836: *zgpu *= gamma;
837: *zgpu += alpha * *xgpu + beta * *ygpu;
838: ViennaCLWaitForGPU();
839: } catch(std::exception const & ex) {
840: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
841: }
842: PetscLogGpuTimeEnd();
843: VecViennaCLRestoreArray(zin,&zgpu);
844: VecViennaCLRestoreArrayRead(xin,&xgpu);
845: VecViennaCLRestoreArrayRead(yin,&ygpu);
846: PetscLogGpuFlops(5.0*n);
847: }
848: return(0);
849: }
851: PetscErrorCode VecPointwiseMult_SeqViennaCL(Vec win,Vec xin,Vec yin)
852: {
853: PetscErrorCode ierr;
854: PetscInt n = win->map->n;
855: const ViennaCLVector *xgpu,*ygpu;
856: ViennaCLVector *wgpu;
859: if (xin->map->n > 0) {
860: VecViennaCLGetArrayRead(xin,&xgpu);
861: VecViennaCLGetArrayRead(yin,&ygpu);
862: VecViennaCLGetArray(win,&wgpu);
863: PetscLogGpuTimeBegin();
864: try {
865: *wgpu = viennacl::linalg::element_prod(*xgpu, *ygpu);
866: ViennaCLWaitForGPU();
867: } catch(std::exception const & ex) {
868: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
869: }
870: PetscLogGpuTimeEnd();
871: VecViennaCLRestoreArrayRead(xin,&xgpu);
872: VecViennaCLRestoreArrayRead(yin,&ygpu);
873: VecViennaCLRestoreArray(win,&wgpu);
874: PetscLogGpuFlops(n);
875: }
876: return(0);
877: }
879: PetscErrorCode VecNorm_SeqViennaCL(Vec xin,NormType type,PetscReal *z)
880: {
881: PetscErrorCode ierr;
882: PetscInt n = xin->map->n;
883: PetscBLASInt bn;
884: const ViennaCLVector *xgpu;
887: if (xin->map->n > 0) {
888: PetscBLASIntCast(n,&bn);
889: VecViennaCLGetArrayRead(xin,&xgpu);
890: if (type == NORM_2 || type == NORM_FROBENIUS) {
891: PetscLogGpuTimeBegin();
892: try {
893: *z = viennacl::linalg::norm_2(*xgpu);
894: ViennaCLWaitForGPU();
895: } catch(std::exception const & ex) {
896: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
897: }
898: PetscLogGpuTimeEnd();
899: PetscLogGpuFlops(PetscMax(2.0*n-1,0.0));
900: } else if (type == NORM_INFINITY) {
901: PetscLogGpuTimeBegin();
902: try {
903: *z = viennacl::linalg::norm_inf(*xgpu);
904: ViennaCLWaitForGPU();
905: } catch(std::exception const & ex) {
906: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
907: }
908: PetscLogGpuTimeEnd();
909: } else if (type == NORM_1) {
910: PetscLogGpuTimeBegin();
911: try {
912: *z = viennacl::linalg::norm_1(*xgpu);
913: ViennaCLWaitForGPU();
914: } catch(std::exception const & ex) {
915: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
916: }
917: PetscLogGpuTimeEnd();
918: PetscLogGpuFlops(PetscMax(n-1.0,0.0));
919: } else if (type == NORM_1_AND_2) {
920: PetscLogGpuTimeBegin();
921: try {
922: *z = viennacl::linalg::norm_1(*xgpu);
923: *(z+1) = viennacl::linalg::norm_2(*xgpu);
924: ViennaCLWaitForGPU();
925: } catch(std::exception const & ex) {
926: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
927: }
928: PetscLogGpuTimeEnd();
929: PetscLogGpuFlops(PetscMax(2.0*n-1,0.0));
930: PetscLogGpuFlops(PetscMax(n-1.0,0.0));
931: }
932: VecViennaCLRestoreArrayRead(xin,&xgpu);
933: } else if (type == NORM_1_AND_2) {
934: *z = 0.0;
935: *(z+1) = 0.0;
936: } else *z = 0.0;
937: return(0);
938: }
940: PetscErrorCode VecSetRandom_SeqViennaCL(Vec xin,PetscRandom r)
941: {
945: VecSetRandom_SeqViennaCL_Private(xin,r);
946: xin->offloadmask = PETSC_OFFLOAD_CPU;
947: return(0);
948: }
950: PetscErrorCode VecResetArray_SeqViennaCL(Vec vin)
951: {
956: VecViennaCLCopyFromGPU(vin);
957: VecResetArray_SeqViennaCL_Private(vin);
958: vin->offloadmask = PETSC_OFFLOAD_CPU;
959: return(0);
960: }
962: PetscErrorCode VecPlaceArray_SeqViennaCL(Vec vin,const PetscScalar *a)
963: {
968: VecViennaCLCopyFromGPU(vin);
969: VecPlaceArray_Seq(vin,a);
970: vin->offloadmask = PETSC_OFFLOAD_CPU;
971: return(0);
972: }
974: PetscErrorCode VecReplaceArray_SeqViennaCL(Vec vin,const PetscScalar *a)
975: {
980: VecViennaCLCopyFromGPU(vin);
981: VecReplaceArray_Seq(vin,a);
982: vin->offloadmask = PETSC_OFFLOAD_CPU;
983: return(0);
984: }
986: /*@C
987: VecCreateSeqViennaCL - Creates a standard, sequential array-style vector.
989: Collective
991: Input Parameters:
992: + comm - the communicator, should be PETSC_COMM_SELF
993: - n - the vector length
995: Output Parameter:
996: . V - the vector
998: Notes:
999: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
1000: same type as an existing vector.
1002: Level: intermediate
1004: .seealso: VecCreateMPI(), VecCreate(), VecDuplicate(), VecDuplicateVecs(), VecCreateGhost()
1005: @*/
1006: PetscErrorCode VecCreateSeqViennaCL(MPI_Comm comm,PetscInt n,Vec *v)
1007: {
1011: VecCreate(comm,v);
1012: VecSetSizes(*v,n,n);
1013: VecSetType(*v,VECSEQVIENNACL);
1014: return(0);
1015: }
1017: /*@C
1018: VecCreateSeqViennaCLWithArray - Creates a viennacl sequential array-style vector,
1019: where the user provides the array space to store the vector values.
1021: Collective
1023: Input Parameters:
1024: + comm - the communicator, should be PETSC_COMM_SELF
1025: . bs - the block size
1026: . n - the vector length
1027: - array - viennacl array where the vector elements are to be stored.
1029: Output Parameter:
1030: . V - the vector
1032: Notes:
1033: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
1034: same type as an existing vector.
1036: If the user-provided array is NULL, then VecViennaCLPlaceArray() can be used
1037: at a later stage to SET the array for storing the vector values.
1039: PETSc does NOT free the array when the vector is destroyed via VecDestroy().
1040: The user should not free the array until the vector is destroyed.
1042: Level: intermediate
1044: .seealso: VecCreateMPIViennaCLWithArray(), VecCreate(), VecDuplicate(), VecDuplicateVecs(),
1045: VecCreateGhost(), VecCreateSeq(), VecCUDAPlaceArray(), VecCreateSeqWithArray(),
1046: VecCreateMPIWithArray()
1047: @*/
1048: PETSC_EXTERN PetscErrorCode VecCreateSeqViennaCLWithArray(MPI_Comm comm,PetscInt bs,PetscInt n,const ViennaCLVector* array,Vec *V)
1049: {
1051: PetscMPIInt size;
1054: VecCreate(comm,V);
1055: VecSetSizes(*V,n,n);
1056: VecSetBlockSize(*V,bs);
1057: MPI_Comm_size(comm,&size);
1058: if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQ on more than one process");
1059: VecCreate_SeqViennaCL_Private(*V,array);
1060: return(0);
1061: }
1063: /*@C
1064: VecCreateSeqViennaCLWithArrays - Creates a ViennaCL sequential vector, where
1065: the user provides the array space to store the vector values.
1067: Collective
1069: Input Parameters:
1070: + comm - the communicator, should be PETSC_COMM_SELF
1071: . bs - the block size
1072: . n - the vector length
1073: - cpuarray - CPU memory where the vector elements are to be stored.
1074: - viennaclvec - ViennaCL vector where the Vec entries are to be stored on the device.
1076: Output Parameter:
1077: . V - the vector
1079: Notes:
1080: If both cpuarray and viennaclvec are provided, the caller must ensure that
1081: the provided arrays have identical values.
1083: PETSc does NOT free the provided arrays when the vector is destroyed via
1084: VecDestroy(). The user should not free the array until the vector is
1085: destroyed.
1087: Level: intermediate
1089: .seealso: VecCreateMPIViennaCLWithArrays(), VecCreate(), VecCreateSeqWithArray(),
1090: VecViennaCLPlaceArray(), VecPlaceArray(), VecCreateSeqCUDAWithArrays(),
1091: VecViennaCLAllocateCheckHost()
1092: @*/
1093: PetscErrorCode VecCreateSeqViennaCLWithArrays(MPI_Comm comm,PetscInt bs,PetscInt n,const PetscScalar cpuarray[],const ViennaCLVector* viennaclvec,Vec *V)
1094: {
1096: PetscMPIInt size;
1100: MPI_Comm_size(comm,&size);
1101: if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQ on more than one process");
1103: // set V's viennaclvec to be viennaclvec, do not allocate memory on host yet.
1104: VecCreateSeqViennaCLWithArray(comm,bs,n,viennaclvec,V);
1106: if (cpuarray && viennaclvec) {
1107: Vec_Seq *s = (Vec_Seq*)((*V)->data);
1108: s->array = (PetscScalar*)cpuarray;
1109: (*V)->offloadmask = PETSC_OFFLOAD_BOTH;
1110: } else if (cpuarray) {
1111: Vec_Seq *s = (Vec_Seq*)((*V)->data);
1112: s->array = (PetscScalar*)cpuarray;
1113: (*V)->offloadmask = PETSC_OFFLOAD_CPU;
1114: } else if (viennaclvec) {
1115: (*V)->offloadmask = PETSC_OFFLOAD_GPU;
1116: } else {
1117: (*V)->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1118: }
1120: return(0);
1121: }
1123: /*@C
1124: VecViennaCLPlaceArray - Replace the viennacl vector in a Vec with
1125: the one provided by the user. This is useful to avoid a copy.
1127: Not Collective
1129: Input Parameters:
1130: + vec - the vector
1131: - array - the ViennaCL vector
1133: Notes:
1134: You can return to the original viennacl vector with a call to
1135: VecViennaCLResetArray() It is not possible to use VecViennaCLPlaceArray()
1136: and VecPlaceArray() at the same time on the same vector.
1138: Level: intermediate
1140: .seealso: VecPlaceArray(), VecSetValues(), VecViennaCLResetArray(),
1141: VecCUDAPlaceArray(),
1143: @*/
1144: PETSC_EXTERN PetscErrorCode VecViennaCLPlaceArray(Vec vin,const ViennaCLVector* a)
1145: {
1150: VecViennaCLCopyToGPU(vin);
1151: if (((Vec_Seq*)vin->data)->unplacedarray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"VecViennaCLPlaceArray()/VecPlaceArray() was already called on this vector, without a call to VecViennaCLResetArray()/VecResetArray()");
1152: ((Vec_Seq*)vin->data)->unplacedarray = (PetscScalar *) ((Vec_ViennaCL*)vin->spptr)->GPUarray; /* save previous GPU array so reset can bring it back */
1153: ((Vec_ViennaCL*)vin->spptr)->GPUarray = (ViennaCLVector*)a;
1154: vin->offloadmask = PETSC_OFFLOAD_GPU;
1155: PetscObjectStateIncrease((PetscObject)vin);
1156: return(0);
1157: }
1159: /*@C
1160: VecViennaCLResetArray - Resets a vector to use its default memory. Call this
1161: after the use of VecViennaCLPlaceArray().
1163: Not Collective
1165: Input Parameters:
1166: . vec - the vector
1168: Level: developer
1170: .seealso: VecViennaCLPlaceArray(), VecResetArray(), VecCUDAResetArray(), VecPlaceArray()
1171: @*/
1172: PETSC_EXTERN PetscErrorCode VecViennaCLResetArray(Vec vin)
1173: {
1178: VecViennaCLCopyToGPU(vin);
1179: ((Vec_ViennaCL*)vin->spptr)->GPUarray = (ViennaCLVector *) ((Vec_Seq*)vin->data)->unplacedarray;
1180: ((Vec_Seq*)vin->data)->unplacedarray = 0;
1181: vin->offloadmask = PETSC_OFFLOAD_GPU;
1182: PetscObjectStateIncrease((PetscObject)vin);
1183: return(0);
1184: }
1186: /* VecDotNorm2 - computes the inner product of two vectors and the 2-norm squared of the second vector
1187: *
1188: * Simply reuses VecDot() and VecNorm(). Performance improvement through custom kernel (kernel generator) possible.
1189: */
1190: PetscErrorCode VecDotNorm2_SeqViennaCL(Vec s, Vec t, PetscScalar *dp, PetscScalar *nm)
1191: {
1192: PetscErrorCode ierr;
1195: VecDot_SeqViennaCL(s,t,dp);
1196: VecNorm_SeqViennaCL(t,NORM_2,nm);
1197: *nm *= *nm; //squared norm required
1198: return(0);
1199: }
1201: PetscErrorCode VecDuplicate_SeqViennaCL(Vec win,Vec *V)
1202: {
1206: VecCreateSeqViennaCL(PetscObjectComm((PetscObject)win),win->map->n,V);
1207: PetscLayoutReference(win->map,&(*V)->map);
1208: PetscObjectListDuplicate(((PetscObject)win)->olist,&((PetscObject)(*V))->olist);
1209: PetscFunctionListDuplicate(((PetscObject)win)->qlist,&((PetscObject)(*V))->qlist);
1210: (*V)->stash.ignorenegidx = win->stash.ignorenegidx;
1211: return(0);
1212: }
1214: PetscErrorCode VecDestroy_SeqViennaCL(Vec v)
1215: {
1219: try {
1220: if (v->spptr) {
1221: delete ((Vec_ViennaCL*)v->spptr)->GPUarray_allocated;
1222: delete (Vec_ViennaCL*) v->spptr;
1223: }
1224: } catch(char *ex) {
1225: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
1226: }
1227: VecDestroy_SeqViennaCL_Private(v);
1228: return(0);
1229: }
1231: PetscErrorCode VecGetArray_SeqViennaCL(Vec v,PetscScalar **a)
1232: {
1236: if (v->offloadmask == PETSC_OFFLOAD_GPU) {
1237: VecViennaCLCopyFromGPU(v);
1238: } else {
1239: VecViennaCLAllocateCheckHost(v);
1240: }
1241: *a = *((PetscScalar**)v->data);
1242: return(0);
1243: }
1245: PetscErrorCode VecRestoreArray_SeqViennaCL(Vec v,PetscScalar **a)
1246: {
1248: v->offloadmask = PETSC_OFFLOAD_CPU;
1249: return(0);
1250: }
1252: PetscErrorCode VecGetArrayWrite_SeqViennaCL(Vec v,PetscScalar **a)
1253: {
1257: VecViennaCLAllocateCheckHost(v);
1258: *a = *((PetscScalar**)v->data);
1259: return(0);
1260: }
1262: static PetscErrorCode VecBindToCPU_SeqAIJViennaCL(Vec V,PetscBool flg)
1263: {
1267: V->boundtocpu = flg;
1268: if (flg) {
1269: VecViennaCLCopyFromGPU(V);
1270: V->offloadmask = PETSC_OFFLOAD_CPU; /* since the CPU code will likely change values in the vector */
1271: V->ops->dot = VecDot_Seq;
1272: V->ops->norm = VecNorm_Seq;
1273: V->ops->tdot = VecTDot_Seq;
1274: V->ops->scale = VecScale_Seq;
1275: V->ops->copy = VecCopy_Seq;
1276: V->ops->set = VecSet_Seq;
1277: V->ops->swap = VecSwap_Seq;
1278: V->ops->axpy = VecAXPY_Seq;
1279: V->ops->axpby = VecAXPBY_Seq;
1280: V->ops->axpbypcz = VecAXPBYPCZ_Seq;
1281: V->ops->pointwisemult = VecPointwiseMult_Seq;
1282: V->ops->pointwisedivide = VecPointwiseDivide_Seq;
1283: V->ops->setrandom = VecSetRandom_Seq;
1284: V->ops->dot_local = VecDot_Seq;
1285: V->ops->tdot_local = VecTDot_Seq;
1286: V->ops->norm_local = VecNorm_Seq;
1287: V->ops->mdot_local = VecMDot_Seq;
1288: V->ops->mtdot_local = VecMTDot_Seq;
1289: V->ops->maxpy = VecMAXPY_Seq;
1290: V->ops->mdot = VecMDot_Seq;
1291: V->ops->mtdot = VecMTDot_Seq;
1292: V->ops->aypx = VecAYPX_Seq;
1293: V->ops->waxpy = VecWAXPY_Seq;
1294: V->ops->dotnorm2 = NULL;
1295: V->ops->placearray = VecPlaceArray_Seq;
1296: V->ops->replacearray = VecReplaceArray_Seq;
1297: V->ops->resetarray = VecResetArray_Seq;
1298: V->ops->duplicate = VecDuplicate_Seq;
1299: } else {
1300: V->ops->dot = VecDot_SeqViennaCL;
1301: V->ops->norm = VecNorm_SeqViennaCL;
1302: V->ops->tdot = VecTDot_SeqViennaCL;
1303: V->ops->scale = VecScale_SeqViennaCL;
1304: V->ops->copy = VecCopy_SeqViennaCL;
1305: V->ops->set = VecSet_SeqViennaCL;
1306: V->ops->swap = VecSwap_SeqViennaCL;
1307: V->ops->axpy = VecAXPY_SeqViennaCL;
1308: V->ops->axpby = VecAXPBY_SeqViennaCL;
1309: V->ops->axpbypcz = VecAXPBYPCZ_SeqViennaCL;
1310: V->ops->pointwisemult = VecPointwiseMult_SeqViennaCL;
1311: V->ops->pointwisedivide = VecPointwiseDivide_SeqViennaCL;
1312: V->ops->setrandom = VecSetRandom_SeqViennaCL;
1313: V->ops->dot_local = VecDot_SeqViennaCL;
1314: V->ops->tdot_local = VecTDot_SeqViennaCL;
1315: V->ops->norm_local = VecNorm_SeqViennaCL;
1316: V->ops->mdot_local = VecMDot_SeqViennaCL;
1317: V->ops->mtdot_local = VecMTDot_SeqViennaCL;
1318: V->ops->maxpy = VecMAXPY_SeqViennaCL;
1319: V->ops->mdot = VecMDot_SeqViennaCL;
1320: V->ops->mtdot = VecMTDot_SeqViennaCL;
1321: V->ops->aypx = VecAYPX_SeqViennaCL;
1322: V->ops->waxpy = VecWAXPY_SeqViennaCL;
1323: V->ops->dotnorm2 = VecDotNorm2_SeqViennaCL;
1324: V->ops->placearray = VecPlaceArray_SeqViennaCL;
1325: V->ops->replacearray = VecReplaceArray_SeqViennaCL;
1326: V->ops->resetarray = VecResetArray_SeqViennaCL;
1327: V->ops->destroy = VecDestroy_SeqViennaCL;
1328: V->ops->duplicate = VecDuplicate_SeqViennaCL;
1329: V->ops->getarraywrite = VecGetArrayWrite_SeqViennaCL;
1330: V->ops->getarray = VecGetArray_SeqViennaCL;
1331: V->ops->restorearray = VecRestoreArray_SeqViennaCL;
1332: }
1333: return(0);
1334: }
1336: PETSC_EXTERN PetscErrorCode VecCreate_SeqViennaCL(Vec V)
1337: {
1339: PetscMPIInt size;
1342: MPI_Comm_size(PetscObjectComm((PetscObject)V),&size);
1343: if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQVIENNACL on more than one process");
1344: VecCreate_Seq_Private(V,0);
1345: PetscObjectChangeTypeName((PetscObject)V,VECSEQVIENNACL);
1347: VecBindToCPU_SeqAIJViennaCL(V,PETSC_FALSE);
1348: V->ops->bindtocpu = VecBindToCPU_SeqAIJViennaCL;
1350: VecViennaCLAllocateCheck(V);
1351: VecViennaCLAllocateCheckHost(V);
1352: VecSet(V,0.0);
1353: VecSet_Seq(V,0.0);
1354: V->offloadmask = PETSC_OFFLOAD_BOTH;
1355: return(0);
1356: }
1358: /*@C
1359: VecViennaCLGetCLContext - Get the OpenCL context in which the Vec resides.
1361: Caller should cast (*ctx) to (const cl_context). Caller is responsible for
1362: invoking clReleaseContext().
1364: Input Parameters:
1365: . v - the vector
1367: Output Parameters:
1368: . ctx - pointer to the underlying CL context
1370: Level: intermediate
1372: .seealso: VecViennaCLGetCLQueue(), VecViennaCLGetCLMemRead()
1373: @*/
1374: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLContext(Vec v, PETSC_UINTPTR_T* ctx)
1375: {
1376: #if !defined(PETSC_HAVE_OPENCL)
1377: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to get the associated cl_context.");
1378: #else
1384: const ViennaCLVector *v_vcl;
1385: VecViennaCLGetArrayRead(v, &v_vcl);
1386: try{
1387: viennacl::ocl::context vcl_ctx = v_vcl->handle().opencl_handle().context();
1388: const cl_context ocl_ctx = vcl_ctx.handle().get();
1389: clRetainContext(ocl_ctx);
1390: *ctx = (PETSC_UINTPTR_T)(ocl_ctx);
1391: } catch (std::exception const & ex) {
1392: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
1393: }
1395: return(0);
1396: #endif
1397: }
1399: /*@C
1400: VecViennaCLGetCLQueue - Get the OpenCL command queue to which all
1401: operations of the Vec are enqueued.
1403: Caller should cast (*queue) to (const cl_command_queue). Caller is
1404: responsible for invoking clReleaseCommandQueue().
1406: Input Parameters:
1407: . v - the vector
1409: Output Parameters:
1410: . ctx - pointer to the CL command queue
1412: Level: intermediate
1414: .seealso: VecViennaCLGetCLContext(), VecViennaCLGetCLMemRead()
1415: @*/
1416: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLQueue(Vec v, PETSC_UINTPTR_T* queue)
1417: {
1418: #if !defined(PETSC_HAVE_OPENCL)
1419: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to get the associated cl_command_queue.");
1420: #else
1425: const ViennaCLVector *v_vcl;
1426: VecViennaCLGetArrayRead(v, &v_vcl);
1427: try{
1428: viennacl::ocl::context vcl_ctx = v_vcl->handle().opencl_handle().context();
1429: const viennacl::ocl::command_queue& vcl_queue = vcl_ctx.current_queue();
1430: const cl_command_queue ocl_queue = vcl_queue.handle().get();
1431: clRetainCommandQueue(ocl_queue);
1432: *queue = (PETSC_UINTPTR_T)(ocl_queue);
1433: } catch (std::exception const & ex) {
1434: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
1435: }
1437: return(0);
1438: #endif
1439: }
1441: /*@C
1442: VecViennaCLGetCLMemRead - Provides access to the the CL buffer inside a Vec.
1444: Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1445: invoking clReleaseMemObject().
1447: Input Parameters:
1448: . v - the vector
1450: Output Parameters:
1451: . mem - pointer to the device buffer
1453: Level: intermediate
1455: .seealso: VecViennaCLGetCLContext(), VecViennaCLGetCLMemWrite()
1456: @*/
1457: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMemRead(Vec v, PETSC_UINTPTR_T* mem)
1458: {
1459: #if !defined(PETSC_HAVE_OPENCL)
1460: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1461: #else
1466: const ViennaCLVector *v_vcl;
1467: VecViennaCLGetArrayRead(v, &v_vcl);
1468: try{
1469: const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1470: clRetainMemObject(ocl_mem);
1471: *mem = (PETSC_UINTPTR_T)(ocl_mem);
1472: } catch (std::exception const & ex) {
1473: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
1474: }
1475: return(0);
1476: #endif
1477: }
1479: /*@C
1480: VecViennaCLGetCLMemWrite - Provides access to the the CL buffer inside a Vec.
1482: Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1483: invoking clReleaseMemObject().
1485: The device pointer has to be released by calling
1486: VecViennaCLRestoreCLMemWrite(). Upon restoring the vector data the data on
1487: the host will be marked as out of date. A subsequent access of the host data
1488: will thus incur a data transfer from the device to the host.
1490: Input Parameters:
1491: . v - the vector
1493: Output Parameters:
1494: . mem - pointer to the device buffer
1496: Level: intermediate
1498: .seealso: VecViennaCLGetCLContext(), VecViennaCLRestoreCLMemWrite()
1499: @*/
1500: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMemWrite(Vec v, PETSC_UINTPTR_T* mem)
1501: {
1502: #if !defined(PETSC_HAVE_OPENCL)
1503: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1504: #else
1509: ViennaCLVector *v_vcl;
1510: VecViennaCLGetArrayWrite(v, &v_vcl);
1511: try{
1512: const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1513: clRetainMemObject(ocl_mem);
1514: *mem = (PETSC_UINTPTR_T)(ocl_mem);
1515: } catch (std::exception const & ex) {
1516: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
1517: }
1519: return(0);
1520: #endif
1521: }
1523: /*@C
1524: VecViennaCLRestoreCLMemWrite - Restores a CL buffer pointer previously
1525: acquired with VecViennaCLGetCLMemWrite().
1527: This marks the host data as out of date. Subsequent access to the
1528: vector data on the host side with for instance VecGetArray() incurs a
1529: data transfer.
1531: Input Parameters:
1532: . v - the vector
1534: Level: intermediate
1536: .seealso: VecViennaCLGetCLContext(), VecViennaCLGetCLMemWrite()
1537: @*/
1538: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreCLMemWrite(Vec v)
1539: {
1540: #if !defined(PETSC_HAVE_OPENCL)
1541: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to restore a Vec's cl_mem");
1542: #else
1547: VecViennaCLRestoreArrayWrite(v, PETSC_NULL);
1549: return(0);
1550: #endif
1551: }
1553: /*@C
1554: VecViennaCLGetCLMem - Provides access to the the CL buffer inside a Vec.
1556: Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1557: invoking clReleaseMemObject().
1559: The device pointer has to be released by calling VecViennaCLRestoreCLMem().
1560: Upon restoring the vector data the data on the host will be marked as out of
1561: date. A subsequent access of the host data will thus incur a data transfer
1562: from the device to the host.
1564: Input Parameters:
1565: . v - the vector
1567: Output Parameters:
1568: . mem - pointer to the device buffer
1570: Level: intermediate
1572: .seealso: VecViennaCLGetCLContext(), VecViennaCLRestoreCLMem()
1573: @*/
1574: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMem(Vec v, PETSC_UINTPTR_T* mem)
1575: {
1576: #if !defined(PETSC_HAVE_OPENCL)
1577: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1578: #else
1583: ViennaCLVector *v_vcl;
1584: VecViennaCLGetArray(v, &v_vcl);
1585: try{
1586: const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1587: clRetainMemObject(ocl_mem);
1588: *mem = (PETSC_UINTPTR_T)(ocl_mem);
1589: } catch (std::exception const & ex) {
1590: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
1591: }
1593: return(0);
1594: #endif
1595: }
1597: /*@C
1598: VecViennaCLRestoreCLMem - Restores a CL buffer pointer previously
1599: acquired with VecViennaCLGetCLMem().
1601: This marks the host data as out of date. Subsequent access to the vector
1602: data on the host side with for instance VecGetArray() incurs a data
1603: transfer.
1605: Input Parameters:
1606: . v - the vector
1608: Level: intermediate
1610: .seealso: VecViennaCLGetCLContext(), VecViennaCLGetCLMem()
1611: @*/
1612: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreCLMem(Vec v)
1613: {
1614: #if !defined(PETSC_HAVE_OPENCL)
1615: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to restore a Vec's cl_mem");
1616: #else
1621: VecViennaCLRestoreArray(v, PETSC_NULL);
1623: return(0);
1624: #endif
1625: }
1627: PetscErrorCode VecCreate_SeqViennaCL_Private(Vec V,const ViennaCLVector *array)
1628: {
1630: Vec_ViennaCL *vecviennacl;
1631: PetscMPIInt size;
1634: MPI_Comm_size(PetscObjectComm((PetscObject)V),&size);
1635: if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQVIENNACL on more than one process");
1636: VecCreate_Seq_Private(V,0);
1637: PetscObjectChangeTypeName((PetscObject)V,VECSEQVIENNACL);
1638: VecBindToCPU_SeqAIJViennaCL(V,PETSC_FALSE);
1639: V->ops->bindtocpu = VecBindToCPU_SeqAIJViennaCL;
1641: if (array) {
1642: if (!V->spptr)
1643: V->spptr = new Vec_ViennaCL;
1644: vecviennacl = (Vec_ViennaCL*)V->spptr;
1645: vecviennacl->GPUarray_allocated = 0;
1646: vecviennacl->GPUarray = (ViennaCLVector*)array;
1647: V->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1648: }
1650: return(0);
1651: }