Actual source code: vecviennacl.cxx
1: /*
2: Implements the sequential ViennaCL vectors.
3: */
5: #include <petscconf.h>
6: #include <petsc/private/vecimpl.h>
7: #include <../src/vec/vec/impls/dvecimpl.h>
8: #include <../src/vec/vec/impls/seq/seqviennacl/viennaclvecimpl.h>
10: #include "viennacl/linalg/inner_prod.hpp"
11: #include "viennacl/linalg/norm_1.hpp"
12: #include "viennacl/linalg/norm_2.hpp"
13: #include "viennacl/linalg/norm_inf.hpp"
15: #ifdef VIENNACL_WITH_OPENCL
16: #include "viennacl/ocl/backend.hpp"
17: #endif
19: PETSC_EXTERN PetscErrorCode VecViennaCLGetArray(Vec v, ViennaCLVector **a)
20: {
22: *a = 0;
23: VecViennaCLCopyToGPU(v);
24: *a = ((Vec_ViennaCL*)v->spptr)->GPUarray;
25: ViennaCLWaitForGPU();
26: return 0;
27: }
29: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArray(Vec v, ViennaCLVector **a)
30: {
32: v->offloadmask = PETSC_OFFLOAD_GPU;
34: PetscObjectStateIncrease((PetscObject)v);
35: return 0;
36: }
38: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayRead(Vec v, const ViennaCLVector **a)
39: {
41: *a = 0;
42: VecViennaCLCopyToGPU(v);
43: *a = ((Vec_ViennaCL*)v->spptr)->GPUarray;
44: ViennaCLWaitForGPU();
45: return 0;
46: }
48: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayRead(Vec v, const ViennaCLVector **a)
49: {
51: return 0;
52: }
54: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayWrite(Vec v, ViennaCLVector **a)
55: {
57: *a = 0;
58: VecViennaCLAllocateCheck(v);
59: *a = ((Vec_ViennaCL*)v->spptr)->GPUarray;
60: ViennaCLWaitForGPU();
61: return 0;
62: }
64: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayWrite(Vec v, ViennaCLVector **a)
65: {
67: v->offloadmask = PETSC_OFFLOAD_GPU;
69: PetscObjectStateIncrease((PetscObject)v);
70: return 0;
71: }
73: PETSC_EXTERN PetscErrorCode PetscViennaCLInit()
74: {
75: char string[20];
76: PetscBool flg,flg_cuda,flg_opencl,flg_openmp;
78: /* ViennaCL backend selection: CUDA, OpenCL, or OpenMP */
79: PetscOptionsGetString(NULL,NULL,"-viennacl_backend",string,sizeof(string),&flg);
80: if (flg) {
81: try {
82: PetscStrcasecmp(string,"cuda",&flg_cuda);
83: PetscStrcasecmp(string,"opencl",&flg_opencl);
84: PetscStrcasecmp(string,"openmp",&flg_openmp);
86: /* A default (sequential) CPU backend is always available - even if OpenMP is not enabled. */
87: if (flg_openmp) viennacl::backend::default_memory_type(viennacl::MAIN_MEMORY);
88: #if defined(PETSC_HAVE_CUDA)
89: else if (flg_cuda) viennacl::backend::default_memory_type(viennacl::CUDA_MEMORY);
90: #endif
91: #if defined(PETSC_HAVE_OPENCL)
92: else if (flg_opencl) viennacl::backend::default_memory_type(viennacl::OPENCL_MEMORY);
93: #endif
94: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: Backend not recognized or available: %s.\n Pass -viennacl_view to see available backends for ViennaCL.", string);
95: } catch (std::exception const & ex) {
96: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
97: }
98: }
100: if (PetscDefined(HAVE_CUDA)) {
101: /* For CUDA event timers */
102: PetscDeviceInitialize(PETSC_DEVICE_CUDA);
103: }
105: #if defined(PETSC_HAVE_OPENCL)
106: /* ViennaCL OpenCL device type configuration */
107: PetscOptionsGetString(NULL,NULL,"-viennacl_opencl_device_type",string,sizeof(string),&flg);
108: if (flg) {
109: try {
110: PetscStrcasecmp(string,"cpu",&flg);
111: if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_CPU);
113: PetscStrcasecmp(string,"gpu",&flg);
114: if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_GPU);
116: PetscStrcasecmp(string,"accelerator",&flg);
117: if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_ACCELERATOR);
118: } catch (std::exception const & ex) {
119: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
120: }
121: }
122: #endif
124: /* Print available backends */
125: PetscOptionsHasName(NULL,NULL,"-viennacl_view",&flg);
126: if (flg) {
127: PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backends available: ");
128: #if defined(PETSC_HAVE_CUDA)
129: PetscPrintf(PETSC_COMM_WORLD, "CUDA, ");
130: #endif
131: #if defined(PETSC_HAVE_OPENCL)
132: PetscPrintf(PETSC_COMM_WORLD, "OpenCL, ");
133: #endif
134: #if defined(PETSC_HAVE_OPENMP)
135: PetscPrintf(PETSC_COMM_WORLD, "OpenMP ");
136: #else
137: PetscPrintf(PETSC_COMM_WORLD, "OpenMP (1 thread) ");
138: #endif
139: PetscPrintf(PETSC_COMM_WORLD, "\n");
141: /* Print selected backends */
142: PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backend selected: ");
143: #if defined(PETSC_HAVE_CUDA)
144: if (viennacl::backend::default_memory_type() == viennacl::CUDA_MEMORY) {
145: PetscPrintf(PETSC_COMM_WORLD, "CUDA ");
146: }
147: #endif
148: #if defined(PETSC_HAVE_OPENCL)
149: if (viennacl::backend::default_memory_type() == viennacl::OPENCL_MEMORY) {
150: PetscPrintf(PETSC_COMM_WORLD, "OpenCL ");
151: }
152: #endif
153: #if defined(PETSC_HAVE_OPENMP)
154: if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) {
155: PetscPrintf(PETSC_COMM_WORLD, "OpenMP ");
156: }
157: #else
158: if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) {
159: PetscPrintf(PETSC_COMM_WORLD, "OpenMP (sequential - consider reconfiguration: --with-openmp=1) ");
160: }
161: #endif
162: PetscPrintf(PETSC_COMM_WORLD, "\n");
163: }
164: return 0;
165: }
167: /*
168: Allocates space for the vector array on the Host if it does not exist.
169: Does NOT change the PetscViennaCLFlag for the vector
170: Does NOT zero the ViennaCL array
171: */
172: PETSC_EXTERN PetscErrorCode VecViennaCLAllocateCheckHost(Vec v)
173: {
174: PetscScalar *array;
175: Vec_Seq *s;
176: PetscInt n = v->map->n;
178: s = (Vec_Seq*)v->data;
179: VecViennaCLAllocateCheck(v);
180: if (s->array == 0) {
181: PetscMalloc1(n,&array);
182: PetscLogObjectMemory((PetscObject)v,n*sizeof(PetscScalar));
183: s->array = array;
184: s->array_allocated = array;
185: }
186: return 0;
187: }
189: /*
190: Allocates space for the vector array on the GPU if it does not exist.
191: Does NOT change the PetscViennaCLFlag for the vector
192: Does NOT zero the ViennaCL array
194: */
195: PetscErrorCode VecViennaCLAllocateCheck(Vec v)
196: {
197: if (!v->spptr) {
198: try {
199: v->spptr = new Vec_ViennaCL;
200: ((Vec_ViennaCL*)v->spptr)->GPUarray_allocated = new ViennaCLVector((PetscBLASInt)v->map->n);
201: ((Vec_ViennaCL*)v->spptr)->GPUarray = ((Vec_ViennaCL*)v->spptr)->GPUarray_allocated;
203: } catch(std::exception const & ex) {
204: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
205: }
206: }
207: return 0;
208: }
210: /* Copies a vector from the CPU to the GPU unless we already have an up-to-date copy on the GPU */
211: PetscErrorCode VecViennaCLCopyToGPU(Vec v)
212: {
214: VecViennaCLAllocateCheck(v);
215: if (v->map->n > 0) {
216: if (v->offloadmask == PETSC_OFFLOAD_CPU) {
217: PetscLogEventBegin(VEC_ViennaCLCopyToGPU,v,0,0,0);
218: try {
219: ViennaCLVector *vec = ((Vec_ViennaCL*)v->spptr)->GPUarray;
220: viennacl::fast_copy(*(PetscScalar**)v->data, *(PetscScalar**)v->data + v->map->n, vec->begin());
221: ViennaCLWaitForGPU();
222: } catch(std::exception const & ex) {
223: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
224: }
225: PetscLogCpuToGpu((v->map->n)*sizeof(PetscScalar));
226: PetscLogEventEnd(VEC_ViennaCLCopyToGPU,v,0,0,0);
227: v->offloadmask = PETSC_OFFLOAD_BOTH;
228: }
229: }
230: return 0;
231: }
233: /*
234: VecViennaCLCopyFromGPU - Copies a vector from the GPU to the CPU unless we already have an up-to-date copy on the CPU
235: */
236: PetscErrorCode VecViennaCLCopyFromGPU(Vec v)
237: {
239: VecViennaCLAllocateCheckHost(v);
240: if (v->offloadmask == PETSC_OFFLOAD_GPU) {
241: PetscLogEventBegin(VEC_ViennaCLCopyFromGPU,v,0,0,0);
242: try {
243: ViennaCLVector *vec = ((Vec_ViennaCL*)v->spptr)->GPUarray;
244: viennacl::fast_copy(vec->begin(),vec->end(),*(PetscScalar**)v->data);
245: ViennaCLWaitForGPU();
246: } catch(std::exception const & ex) {
247: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
248: }
249: PetscLogGpuToCpu((v->map->n)*sizeof(PetscScalar));
250: PetscLogEventEnd(VEC_ViennaCLCopyFromGPU,v,0,0,0);
251: v->offloadmask = PETSC_OFFLOAD_BOTH;
252: }
253: return 0;
254: }
256: /* Copy on CPU */
257: static PetscErrorCode VecCopy_SeqViennaCL_Private(Vec xin,Vec yin)
258: {
259: PetscScalar *ya;
260: const PetscScalar *xa;
262: VecViennaCLAllocateCheckHost(xin);
263: VecViennaCLAllocateCheckHost(yin);
264: if (xin != yin) {
265: VecGetArrayRead(xin,&xa);
266: VecGetArray(yin,&ya);
267: PetscArraycpy(ya,xa,xin->map->n);
268: VecRestoreArrayRead(xin,&xa);
269: VecRestoreArray(yin,&ya);
270: }
271: return 0;
272: }
274: static PetscErrorCode VecSetRandom_SeqViennaCL_Private(Vec xin,PetscRandom r)
275: {
276: PetscInt n = xin->map->n,i;
277: PetscScalar *xx;
279: VecGetArray(xin,&xx);
280: for (i=0; i<n; i++) PetscRandomGetValue(r,&xx[i]);
281: VecRestoreArray(xin,&xx);
282: return 0;
283: }
285: static PetscErrorCode VecDestroy_SeqViennaCL_Private(Vec v)
286: {
287: Vec_Seq *vs = (Vec_Seq*)v->data;
289: PetscObjectSAWsViewOff(v);
290: #if defined(PETSC_USE_LOG)
291: PetscLogObjectState((PetscObject)v,"Length=%" PetscInt_FMT,v->map->n);
292: #endif
293: if (vs->array_allocated) PetscFree(vs->array_allocated);
294: PetscFree(vs);
295: return 0;
296: }
298: static PetscErrorCode VecResetArray_SeqViennaCL_Private(Vec vin)
299: {
300: Vec_Seq *v = (Vec_Seq*)vin->data;
302: v->array = v->unplacedarray;
303: v->unplacedarray = 0;
304: return 0;
305: }
307: /*MC
308: VECSEQVIENNACL - VECSEQVIENNACL = "seqviennacl" - The basic sequential vector, modified to use ViennaCL
310: Options Database Keys:
311: . -vec_type seqviennacl - sets the vector type to VECSEQVIENNACL during a call to VecSetFromOptions()
313: Level: beginner
315: .seealso: VecCreate(), VecSetType(), VecSetFromOptions(), VecCreateSeqWithArray(), VECMPI, VecType, VecCreateMPI(), VecCreateSeq()
316: M*/
318: PetscErrorCode VecAYPX_SeqViennaCL(Vec yin, PetscScalar alpha, Vec xin)
319: {
320: const ViennaCLVector *xgpu;
321: ViennaCLVector *ygpu;
323: VecViennaCLGetArrayRead(xin,&xgpu);
324: VecViennaCLGetArray(yin,&ygpu);
325: PetscLogGpuTimeBegin();
326: try {
327: if (alpha != 0.0 && xin->map->n > 0) {
328: *ygpu = *xgpu + alpha * *ygpu;
329: PetscLogGpuFlops(2.0*yin->map->n);
330: } else {
331: *ygpu = *xgpu;
332: }
333: ViennaCLWaitForGPU();
334: } catch(std::exception const & ex) {
335: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
336: }
337: PetscLogGpuTimeEnd();
338: VecViennaCLRestoreArrayRead(xin,&xgpu);
339: VecViennaCLRestoreArray(yin,&ygpu);
340: return 0;
341: }
343: PetscErrorCode VecAXPY_SeqViennaCL(Vec yin,PetscScalar alpha,Vec xin)
344: {
345: const ViennaCLVector *xgpu;
346: ViennaCLVector *ygpu;
348: if (alpha != 0.0 && xin->map->n > 0) {
349: VecViennaCLGetArrayRead(xin,&xgpu);
350: VecViennaCLGetArray(yin,&ygpu);
351: PetscLogGpuTimeBegin();
352: try {
353: *ygpu += alpha * *xgpu;
354: ViennaCLWaitForGPU();
355: } catch(std::exception const & ex) {
356: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
357: }
358: PetscLogGpuTimeEnd();
359: VecViennaCLRestoreArrayRead(xin,&xgpu);
360: VecViennaCLRestoreArray(yin,&ygpu);
361: PetscLogGpuFlops(2.0*yin->map->n);
362: }
363: return 0;
364: }
366: PetscErrorCode VecPointwiseDivide_SeqViennaCL(Vec win, Vec xin, Vec yin)
367: {
368: const ViennaCLVector *xgpu,*ygpu;
369: ViennaCLVector *wgpu;
371: if (xin->map->n > 0) {
372: VecViennaCLGetArrayRead(xin,&xgpu);
373: VecViennaCLGetArrayRead(yin,&ygpu);
374: VecViennaCLGetArrayWrite(win,&wgpu);
375: PetscLogGpuTimeBegin();
376: try {
377: *wgpu = viennacl::linalg::element_div(*xgpu, *ygpu);
378: ViennaCLWaitForGPU();
379: } catch(std::exception const & ex) {
380: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
381: }
382: PetscLogGpuTimeEnd();
383: PetscLogGpuFlops(win->map->n);
384: VecViennaCLRestoreArrayRead(xin,&xgpu);
385: VecViennaCLRestoreArrayRead(yin,&ygpu);
386: VecViennaCLRestoreArrayWrite(win,&wgpu);
387: }
388: return 0;
389: }
391: PetscErrorCode VecWAXPY_SeqViennaCL(Vec win,PetscScalar alpha,Vec xin, Vec yin)
392: {
393: const ViennaCLVector *xgpu,*ygpu;
394: ViennaCLVector *wgpu;
396: if (alpha == 0.0 && xin->map->n > 0) {
397: VecCopy_SeqViennaCL(yin,win);
398: } else {
399: VecViennaCLGetArrayRead(xin,&xgpu);
400: VecViennaCLGetArrayRead(yin,&ygpu);
401: VecViennaCLGetArrayWrite(win,&wgpu);
402: PetscLogGpuTimeBegin();
403: if (alpha == 1.0) {
404: try {
405: *wgpu = *ygpu + *xgpu;
406: } catch(std::exception const & ex) {
407: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
408: }
409: PetscLogGpuFlops(win->map->n);
410: } else if (alpha == -1.0) {
411: try {
412: *wgpu = *ygpu - *xgpu;
413: } catch(std::exception const & ex) {
414: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
415: }
416: PetscLogGpuFlops(win->map->n);
417: } else {
418: try {
419: *wgpu = *ygpu + alpha * *xgpu;
420: } catch(std::exception const & ex) {
421: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
422: }
423: PetscLogGpuFlops(2*win->map->n);
424: }
425: ViennaCLWaitForGPU();
426: PetscLogGpuTimeEnd();
427: VecViennaCLRestoreArrayRead(xin,&xgpu);
428: VecViennaCLRestoreArrayRead(yin,&ygpu);
429: VecViennaCLRestoreArrayWrite(win,&wgpu);
430: }
431: return 0;
432: }
434: /*
435: * Operation x = x + sum_i alpha_i * y_i for vectors x, y_i and scalars alpha_i
436: *
437: * ViennaCL supports a fast evaluation of x += alpha * y and x += alpha * y + beta * z,
438: * hence there is an iterated application of these until the final result is obtained
439: */
440: PetscErrorCode VecMAXPY_SeqViennaCL(Vec xin, PetscInt nv,const PetscScalar *alpha,Vec *y)
441: {
442: PetscInt j;
444: for (j = 0; j < nv; ++j) {
445: if (j+1 < nv) {
446: VecAXPBYPCZ_SeqViennaCL(xin,alpha[j],alpha[j+1],1.0,y[j],y[j+1]);
447: ++j;
448: } else {
449: VecAXPY_SeqViennaCL(xin,alpha[j],y[j]);
450: }
451: }
452: ViennaCLWaitForGPU();
453: return 0;
454: }
456: PetscErrorCode VecDot_SeqViennaCL(Vec xin,Vec yin,PetscScalar *z)
457: {
458: const ViennaCLVector *xgpu,*ygpu;
460: if (xin->map->n > 0) {
461: VecViennaCLGetArrayRead(xin,&xgpu);
462: VecViennaCLGetArrayRead(yin,&ygpu);
463: PetscLogGpuTimeBegin();
464: try {
465: *z = viennacl::linalg::inner_prod(*xgpu,*ygpu);
466: } catch(std::exception const & ex) {
467: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
468: }
469: ViennaCLWaitForGPU();
470: PetscLogGpuTimeEnd();
471: if (xin->map->n >0) {
472: PetscLogGpuFlops(2.0*xin->map->n-1);
473: }
474: VecViennaCLRestoreArrayRead(xin,&xgpu);
475: VecViennaCLRestoreArrayRead(yin,&ygpu);
476: } else *z = 0.0;
477: return 0;
478: }
480: /*
481: * Operation z[j] = dot(x, y[j])
482: *
483: * We use an iterated application of dot() for each j. For small ranges of j this is still faster than an allocation of extra memory in order to use gemv().
484: */
485: PetscErrorCode VecMDot_SeqViennaCL(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
486: {
487: PetscInt n = xin->map->n,i;
488: const ViennaCLVector *xgpu,*ygpu;
489: Vec *yyin = (Vec*)yin;
490: std::vector<viennacl::vector_base<PetscScalar> const *> ygpu_array(nv);
492: if (xin->map->n > 0) {
493: VecViennaCLGetArrayRead(xin,&xgpu);
494: for (i=0; i<nv; i++) {
495: VecViennaCLGetArrayRead(yyin[i],&ygpu);
496: ygpu_array[i] = ygpu;
497: }
498: PetscLogGpuTimeBegin();
499: viennacl::vector_tuple<PetscScalar> y_tuple(ygpu_array);
500: ViennaCLVector result = viennacl::linalg::inner_prod(*xgpu, y_tuple);
501: viennacl::copy(result.begin(), result.end(), z);
502: for (i=0; i<nv; i++) {
503: VecViennaCLRestoreArrayRead(yyin[i],&ygpu);
504: }
505: ViennaCLWaitForGPU();
506: PetscLogGpuTimeEnd();
507: VecViennaCLRestoreArrayRead(xin,&xgpu);
508: PetscLogGpuFlops(PetscMax(nv*(2.0*n-1),0.0));
509: } else {
510: for (i=0; i<nv; i++) z[i] = 0.0;
511: }
512: return 0;
513: }
515: PetscErrorCode VecMTDot_SeqViennaCL(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
516: {
517: /* Since complex case is not supported at the moment, this is the same as VecMDot_SeqViennaCL */
518: VecMDot_SeqViennaCL(xin,nv,yin,z);
519: ViennaCLWaitForGPU();
520: return 0;
521: }
523: PetscErrorCode VecSet_SeqViennaCL(Vec xin,PetscScalar alpha)
524: {
525: ViennaCLVector *xgpu;
527: if (xin->map->n > 0) {
528: VecViennaCLGetArrayWrite(xin,&xgpu);
529: PetscLogGpuTimeBegin();
530: try {
531: *xgpu = viennacl::scalar_vector<PetscScalar>(xgpu->size(), alpha);
532: ViennaCLWaitForGPU();
533: } catch(std::exception const & ex) {
534: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
535: }
536: PetscLogGpuTimeEnd();
537: VecViennaCLRestoreArrayWrite(xin,&xgpu);
538: }
539: return 0;
540: }
542: PetscErrorCode VecScale_SeqViennaCL(Vec xin, PetscScalar alpha)
543: {
544: ViennaCLVector *xgpu;
546: if (alpha == 0.0 && xin->map->n > 0) {
547: VecSet_SeqViennaCL(xin,alpha);
548: PetscLogGpuFlops(xin->map->n);
549: } else if (alpha != 1.0 && xin->map->n > 0) {
550: VecViennaCLGetArray(xin,&xgpu);
551: PetscLogGpuTimeBegin();
552: try {
553: *xgpu *= alpha;
554: ViennaCLWaitForGPU();
555: } catch(std::exception const & ex) {
556: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
557: }
558: PetscLogGpuTimeEnd();
559: VecViennaCLRestoreArray(xin,&xgpu);
560: PetscLogGpuFlops(xin->map->n);
561: }
562: return 0;
563: }
565: PetscErrorCode VecTDot_SeqViennaCL(Vec xin,Vec yin,PetscScalar *z)
566: {
567: /* Since complex case is not supported at the moment, this is the same as VecDot_SeqViennaCL */
568: VecDot_SeqViennaCL(xin, yin, z);
569: ViennaCLWaitForGPU();
570: return 0;
571: }
573: PetscErrorCode VecCopy_SeqViennaCL(Vec xin,Vec yin)
574: {
575: const ViennaCLVector *xgpu;
576: ViennaCLVector *ygpu;
578: if (xin != yin && xin->map->n > 0) {
579: if (xin->offloadmask == PETSC_OFFLOAD_GPU) {
580: VecViennaCLGetArrayRead(xin,&xgpu);
581: VecViennaCLGetArrayWrite(yin,&ygpu);
582: PetscLogGpuTimeBegin();
583: try {
584: *ygpu = *xgpu;
585: ViennaCLWaitForGPU();
586: } catch(std::exception const & ex) {
587: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
588: }
589: PetscLogGpuTimeEnd();
590: VecViennaCLRestoreArrayRead(xin,&xgpu);
591: VecViennaCLRestoreArrayWrite(yin,&ygpu);
593: } else if (xin->offloadmask == PETSC_OFFLOAD_CPU) {
594: /* copy in CPU if we are on the CPU*/
595: VecCopy_SeqViennaCL_Private(xin,yin);
596: ViennaCLWaitForGPU();
597: } else if (xin->offloadmask == PETSC_OFFLOAD_BOTH) {
598: /* if xin is valid in both places, see where yin is and copy there (because it's probably where we'll want to next use it) */
599: if (yin->offloadmask == PETSC_OFFLOAD_CPU) {
600: /* copy in CPU */
601: VecCopy_SeqViennaCL_Private(xin,yin);
602: ViennaCLWaitForGPU();
603: } else if (yin->offloadmask == PETSC_OFFLOAD_GPU) {
604: /* copy in GPU */
605: VecViennaCLGetArrayRead(xin,&xgpu);
606: VecViennaCLGetArrayWrite(yin,&ygpu);
607: PetscLogGpuTimeBegin();
608: try {
609: *ygpu = *xgpu;
610: ViennaCLWaitForGPU();
611: } catch(std::exception const & ex) {
612: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
613: }
614: PetscLogGpuTimeEnd();
615: VecViennaCLRestoreArrayRead(xin,&xgpu);
616: VecViennaCLRestoreArrayWrite(yin,&ygpu);
617: } else if (yin->offloadmask == PETSC_OFFLOAD_BOTH) {
618: /* xin and yin are both valid in both places (or yin was unallocated before the earlier call to allocatecheck
619: default to copy in GPU (this is an arbitrary choice) */
620: VecViennaCLGetArrayRead(xin,&xgpu);
621: VecViennaCLGetArrayWrite(yin,&ygpu);
622: PetscLogGpuTimeBegin();
623: try {
624: *ygpu = *xgpu;
625: ViennaCLWaitForGPU();
626: } catch(std::exception const & ex) {
627: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
628: }
629: PetscLogGpuTimeEnd();
630: VecViennaCLRestoreArrayRead(xin,&xgpu);
631: VecViennaCLRestoreArrayWrite(yin,&ygpu);
632: } else {
633: VecCopy_SeqViennaCL_Private(xin,yin);
634: ViennaCLWaitForGPU();
635: }
636: }
637: }
638: return 0;
639: }
641: PetscErrorCode VecSwap_SeqViennaCL(Vec xin,Vec yin)
642: {
643: ViennaCLVector *xgpu,*ygpu;
645: if (xin != yin && xin->map->n > 0) {
646: VecViennaCLGetArray(xin,&xgpu);
647: VecViennaCLGetArray(yin,&ygpu);
648: PetscLogGpuTimeBegin();
649: try {
650: viennacl::swap(*xgpu, *ygpu);
651: ViennaCLWaitForGPU();
652: } catch(std::exception const & ex) {
653: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
654: }
655: PetscLogGpuTimeEnd();
656: VecViennaCLRestoreArray(xin,&xgpu);
657: VecViennaCLRestoreArray(yin,&ygpu);
658: }
659: return 0;
660: }
662: // y = alpha * x + beta * y
663: PetscErrorCode VecAXPBY_SeqViennaCL(Vec yin,PetscScalar alpha,PetscScalar beta,Vec xin)
664: {
665: PetscScalar a = alpha,b = beta;
666: const ViennaCLVector *xgpu;
667: ViennaCLVector *ygpu;
669: if (a == 0.0 && xin->map->n > 0) {
670: VecScale_SeqViennaCL(yin,beta);
671: } else if (b == 1.0 && xin->map->n > 0) {
672: VecAXPY_SeqViennaCL(yin,alpha,xin);
673: } else if (a == 1.0 && xin->map->n > 0) {
674: VecAYPX_SeqViennaCL(yin,beta,xin);
675: } else if (b == 0.0 && xin->map->n > 0) {
676: VecViennaCLGetArrayRead(xin,&xgpu);
677: VecViennaCLGetArray(yin,&ygpu);
678: PetscLogGpuTimeBegin();
679: try {
680: *ygpu = *xgpu * alpha;
681: ViennaCLWaitForGPU();
682: } catch(std::exception const & ex) {
683: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
684: }
685: PetscLogGpuTimeEnd();
686: PetscLogGpuFlops(xin->map->n);
687: VecViennaCLRestoreArrayRead(xin,&xgpu);
688: VecViennaCLRestoreArray(yin,&ygpu);
689: } else if (xin->map->n > 0) {
690: VecViennaCLGetArrayRead(xin,&xgpu);
691: VecViennaCLGetArray(yin,&ygpu);
692: PetscLogGpuTimeBegin();
693: try {
694: *ygpu = *xgpu * alpha + *ygpu * beta;
695: ViennaCLWaitForGPU();
696: } catch(std::exception const & ex) {
697: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
698: }
699: PetscLogGpuTimeEnd();
700: VecViennaCLRestoreArrayRead(xin,&xgpu);
701: VecViennaCLRestoreArray(yin,&ygpu);
702: PetscLogGpuFlops(3.0*xin->map->n);
703: }
704: return 0;
705: }
707: /* operation z = alpha * x + beta *y + gamma *z*/
708: PetscErrorCode VecAXPBYPCZ_SeqViennaCL(Vec zin,PetscScalar alpha,PetscScalar beta,PetscScalar gamma,Vec xin,Vec yin)
709: {
710: PetscInt n = zin->map->n;
711: const ViennaCLVector *xgpu,*ygpu;
712: ViennaCLVector *zgpu;
714: VecViennaCLGetArrayRead(xin,&xgpu);
715: VecViennaCLGetArrayRead(yin,&ygpu);
716: VecViennaCLGetArray(zin,&zgpu);
717: if (alpha == 0.0 && xin->map->n > 0) {
718: PetscLogGpuTimeBegin();
719: try {
720: if (beta == 0.0) {
721: *zgpu = gamma * *zgpu;
722: ViennaCLWaitForGPU();
723: PetscLogGpuFlops(1.0*n);
724: } else if (gamma == 0.0) {
725: *zgpu = beta * *ygpu;
726: ViennaCLWaitForGPU();
727: PetscLogGpuFlops(1.0*n);
728: } else {
729: *zgpu = beta * *ygpu + gamma * *zgpu;
730: ViennaCLWaitForGPU();
731: PetscLogGpuFlops(3.0*n);
732: }
733: } catch(std::exception const & ex) {
734: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
735: }
736: PetscLogGpuTimeEnd();
737: PetscLogGpuFlops(3.0*n);
738: } else if (beta == 0.0 && xin->map->n > 0) {
739: PetscLogGpuTimeBegin();
740: try {
741: if (gamma == 0.0) {
742: *zgpu = alpha * *xgpu;
743: ViennaCLWaitForGPU();
744: PetscLogGpuFlops(1.0*n);
745: } else {
746: *zgpu = alpha * *xgpu + gamma * *zgpu;
747: ViennaCLWaitForGPU();
748: PetscLogGpuFlops(3.0*n);
749: }
750: } catch(std::exception const & ex) {
751: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
752: }
753: PetscLogGpuTimeEnd();
754: } else if (gamma == 0.0 && xin->map->n > 0) {
755: PetscLogGpuTimeBegin();
756: try {
757: *zgpu = alpha * *xgpu + beta * *ygpu;
758: ViennaCLWaitForGPU();
759: } catch(std::exception const & ex) {
760: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
761: }
762: PetscLogGpuTimeEnd();
763: PetscLogGpuFlops(3.0*n);
764: } else if (xin->map->n > 0) {
765: PetscLogGpuTimeBegin();
766: try {
767: /* Split operation into two steps. This is not completely ideal, but avoids temporaries (which are far worse) */
768: if (gamma != 1.0)
769: *zgpu *= gamma;
770: *zgpu += alpha * *xgpu + beta * *ygpu;
771: ViennaCLWaitForGPU();
772: } catch(std::exception const & ex) {
773: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
774: }
775: PetscLogGpuTimeEnd();
776: VecViennaCLRestoreArray(zin,&zgpu);
777: VecViennaCLRestoreArrayRead(xin,&xgpu);
778: VecViennaCLRestoreArrayRead(yin,&ygpu);
779: PetscLogGpuFlops(5.0*n);
780: }
781: return 0;
782: }
784: PetscErrorCode VecPointwiseMult_SeqViennaCL(Vec win,Vec xin,Vec yin)
785: {
786: PetscInt n = win->map->n;
787: const ViennaCLVector *xgpu,*ygpu;
788: ViennaCLVector *wgpu;
790: if (xin->map->n > 0) {
791: VecViennaCLGetArrayRead(xin,&xgpu);
792: VecViennaCLGetArrayRead(yin,&ygpu);
793: VecViennaCLGetArray(win,&wgpu);
794: PetscLogGpuTimeBegin();
795: try {
796: *wgpu = viennacl::linalg::element_prod(*xgpu, *ygpu);
797: ViennaCLWaitForGPU();
798: } catch(std::exception const & ex) {
799: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
800: }
801: PetscLogGpuTimeEnd();
802: VecViennaCLRestoreArrayRead(xin,&xgpu);
803: VecViennaCLRestoreArrayRead(yin,&ygpu);
804: VecViennaCLRestoreArray(win,&wgpu);
805: PetscLogGpuFlops(n);
806: }
807: return 0;
808: }
810: PetscErrorCode VecNorm_SeqViennaCL(Vec xin,NormType type,PetscReal *z)
811: {
812: PetscInt n = xin->map->n;
813: PetscBLASInt bn;
814: const ViennaCLVector *xgpu;
816: if (xin->map->n > 0) {
817: PetscBLASIntCast(n,&bn);
818: VecViennaCLGetArrayRead(xin,&xgpu);
819: if (type == NORM_2 || type == NORM_FROBENIUS) {
820: PetscLogGpuTimeBegin();
821: try {
822: *z = viennacl::linalg::norm_2(*xgpu);
823: ViennaCLWaitForGPU();
824: } catch(std::exception const & ex) {
825: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
826: }
827: PetscLogGpuTimeEnd();
828: PetscLogGpuFlops(PetscMax(2.0*n-1,0.0));
829: } else if (type == NORM_INFINITY) {
830: PetscLogGpuTimeBegin();
831: try {
832: *z = viennacl::linalg::norm_inf(*xgpu);
833: ViennaCLWaitForGPU();
834: } catch(std::exception const & ex) {
835: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
836: }
837: PetscLogGpuTimeEnd();
838: } else if (type == NORM_1) {
839: PetscLogGpuTimeBegin();
840: try {
841: *z = viennacl::linalg::norm_1(*xgpu);
842: ViennaCLWaitForGPU();
843: } catch(std::exception const & ex) {
844: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
845: }
846: PetscLogGpuTimeEnd();
847: PetscLogGpuFlops(PetscMax(n-1.0,0.0));
848: } else if (type == NORM_1_AND_2) {
849: PetscLogGpuTimeBegin();
850: try {
851: *z = viennacl::linalg::norm_1(*xgpu);
852: *(z+1) = viennacl::linalg::norm_2(*xgpu);
853: ViennaCLWaitForGPU();
854: } catch(std::exception const & ex) {
855: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
856: }
857: PetscLogGpuTimeEnd();
858: PetscLogGpuFlops(PetscMax(2.0*n-1,0.0));
859: PetscLogGpuFlops(PetscMax(n-1.0,0.0));
860: }
861: VecViennaCLRestoreArrayRead(xin,&xgpu);
862: } else if (type == NORM_1_AND_2) {
863: *z = 0.0;
864: *(z+1) = 0.0;
865: } else *z = 0.0;
866: return 0;
867: }
869: PetscErrorCode VecSetRandom_SeqViennaCL(Vec xin,PetscRandom r)
870: {
871: VecSetRandom_SeqViennaCL_Private(xin,r);
872: xin->offloadmask = PETSC_OFFLOAD_CPU;
873: return 0;
874: }
876: PetscErrorCode VecResetArray_SeqViennaCL(Vec vin)
877: {
879: VecViennaCLCopyFromGPU(vin);
880: VecResetArray_SeqViennaCL_Private(vin);
881: vin->offloadmask = PETSC_OFFLOAD_CPU;
882: return 0;
883: }
885: PetscErrorCode VecPlaceArray_SeqViennaCL(Vec vin,const PetscScalar *a)
886: {
888: VecViennaCLCopyFromGPU(vin);
889: VecPlaceArray_Seq(vin,a);
890: vin->offloadmask = PETSC_OFFLOAD_CPU;
891: return 0;
892: }
894: PetscErrorCode VecReplaceArray_SeqViennaCL(Vec vin,const PetscScalar *a)
895: {
897: VecViennaCLCopyFromGPU(vin);
898: VecReplaceArray_Seq(vin,a);
899: vin->offloadmask = PETSC_OFFLOAD_CPU;
900: return 0;
901: }
903: /*@C
904: VecCreateSeqViennaCL - Creates a standard, sequential array-style vector.
906: Collective
908: Input Parameters:
909: + comm - the communicator, should be PETSC_COMM_SELF
910: - n - the vector length
912: Output Parameter:
913: . V - the vector
915: Notes:
916: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
917: same type as an existing vector.
919: Level: intermediate
921: .seealso: VecCreateMPI(), VecCreate(), VecDuplicate(), VecDuplicateVecs(), VecCreateGhost()
922: @*/
923: PetscErrorCode VecCreateSeqViennaCL(MPI_Comm comm,PetscInt n,Vec *v)
924: {
925: VecCreate(comm,v);
926: VecSetSizes(*v,n,n);
927: VecSetType(*v,VECSEQVIENNACL);
928: return 0;
929: }
931: /*@C
932: VecCreateSeqViennaCLWithArray - Creates a viennacl sequential array-style vector,
933: where the user provides the array space to store the vector values.
935: Collective
937: Input Parameters:
938: + comm - the communicator, should be PETSC_COMM_SELF
939: . bs - the block size
940: . n - the vector length
941: - array - viennacl array where the vector elements are to be stored.
943: Output Parameter:
944: . V - the vector
946: Notes:
947: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
948: same type as an existing vector.
950: If the user-provided array is NULL, then VecViennaCLPlaceArray() can be used
951: at a later stage to SET the array for storing the vector values.
953: PETSc does NOT free the array when the vector is destroyed via VecDestroy().
954: The user should not free the array until the vector is destroyed.
956: Level: intermediate
958: .seealso: VecCreateMPIViennaCLWithArray(), VecCreate(), VecDuplicate(), VecDuplicateVecs(),
959: VecCreateGhost(), VecCreateSeq(), VecCUDAPlaceArray(), VecCreateSeqWithArray(),
960: VecCreateMPIWithArray()
961: @*/
962: PETSC_EXTERN PetscErrorCode VecCreateSeqViennaCLWithArray(MPI_Comm comm,PetscInt bs,PetscInt n,const ViennaCLVector* array,Vec *V)
963: {
964: PetscMPIInt size;
966: VecCreate(comm,V);
967: VecSetSizes(*V,n,n);
968: VecSetBlockSize(*V,bs);
969: MPI_Comm_size(comm,&size);
971: VecCreate_SeqViennaCL_Private(*V,array);
972: return 0;
973: }
975: /*@C
976: VecCreateSeqViennaCLWithArrays - Creates a ViennaCL sequential vector, where
977: the user provides the array space to store the vector values.
979: Collective
981: Input Parameters:
982: + comm - the communicator, should be PETSC_COMM_SELF
983: . bs - the block size
984: . n - the vector length
985: - cpuarray - CPU memory where the vector elements are to be stored.
986: - viennaclvec - ViennaCL vector where the Vec entries are to be stored on the device.
988: Output Parameter:
989: . V - the vector
991: Notes:
992: If both cpuarray and viennaclvec are provided, the caller must ensure that
993: the provided arrays have identical values.
995: PETSc does NOT free the provided arrays when the vector is destroyed via
996: VecDestroy(). The user should not free the array until the vector is
997: destroyed.
999: Level: intermediate
1001: .seealso: VecCreateMPIViennaCLWithArrays(), VecCreate(), VecCreateSeqWithArray(),
1002: VecViennaCLPlaceArray(), VecPlaceArray(), VecCreateSeqCUDAWithArrays(),
1003: VecViennaCLAllocateCheckHost()
1004: @*/
1005: PetscErrorCode VecCreateSeqViennaCLWithArrays(MPI_Comm comm,PetscInt bs,PetscInt n,const PetscScalar cpuarray[],const ViennaCLVector* viennaclvec,Vec *V)
1006: {
1007: PetscMPIInt size;
1010: MPI_Comm_size(comm,&size);
1013: // set V's viennaclvec to be viennaclvec, do not allocate memory on host yet.
1014: VecCreateSeqViennaCLWithArray(comm,bs,n,viennaclvec,V);
1016: if (cpuarray && viennaclvec) {
1017: Vec_Seq *s = (Vec_Seq*)((*V)->data);
1018: s->array = (PetscScalar*)cpuarray;
1019: (*V)->offloadmask = PETSC_OFFLOAD_BOTH;
1020: } else if (cpuarray) {
1021: Vec_Seq *s = (Vec_Seq*)((*V)->data);
1022: s->array = (PetscScalar*)cpuarray;
1023: (*V)->offloadmask = PETSC_OFFLOAD_CPU;
1024: } else if (viennaclvec) {
1025: (*V)->offloadmask = PETSC_OFFLOAD_GPU;
1026: } else {
1027: (*V)->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1028: }
1030: return 0;
1031: }
1033: /*@C
1034: VecViennaCLPlaceArray - Replace the viennacl vector in a Vec with
1035: the one provided by the user. This is useful to avoid a copy.
1037: Not Collective
1039: Input Parameters:
1040: + vec - the vector
1041: - array - the ViennaCL vector
1043: Notes:
1044: You can return to the original viennacl vector with a call to
1045: VecViennaCLResetArray() It is not possible to use VecViennaCLPlaceArray()
1046: and VecPlaceArray() at the same time on the same vector.
1048: Level: intermediate
1050: .seealso: VecPlaceArray(), VecSetValues(), VecViennaCLResetArray(),
1051: VecCUDAPlaceArray(),
1053: @*/
1054: PETSC_EXTERN PetscErrorCode VecViennaCLPlaceArray(Vec vin,const ViennaCLVector* a)
1055: {
1057: VecViennaCLCopyToGPU(vin);
1059: ((Vec_Seq*)vin->data)->unplacedarray = (PetscScalar *) ((Vec_ViennaCL*)vin->spptr)->GPUarray; /* save previous GPU array so reset can bring it back */
1060: ((Vec_ViennaCL*)vin->spptr)->GPUarray = (ViennaCLVector*)a;
1061: vin->offloadmask = PETSC_OFFLOAD_GPU;
1062: PetscObjectStateIncrease((PetscObject)vin);
1063: return 0;
1064: }
1066: /*@C
1067: VecViennaCLResetArray - Resets a vector to use its default memory. Call this
1068: after the use of VecViennaCLPlaceArray().
1070: Not Collective
1072: Input Parameters:
1073: . vec - the vector
1075: Level: developer
1077: .seealso: VecViennaCLPlaceArray(), VecResetArray(), VecCUDAResetArray(), VecPlaceArray()
1078: @*/
1079: PETSC_EXTERN PetscErrorCode VecViennaCLResetArray(Vec vin)
1080: {
1082: VecViennaCLCopyToGPU(vin);
1083: ((Vec_ViennaCL*)vin->spptr)->GPUarray = (ViennaCLVector *) ((Vec_Seq*)vin->data)->unplacedarray;
1084: ((Vec_Seq*)vin->data)->unplacedarray = 0;
1085: vin->offloadmask = PETSC_OFFLOAD_GPU;
1086: PetscObjectStateIncrease((PetscObject)vin);
1087: return 0;
1088: }
1090: /* VecDotNorm2 - computes the inner product of two vectors and the 2-norm squared of the second vector
1091: *
1092: * Simply reuses VecDot() and VecNorm(). Performance improvement through custom kernel (kernel generator) possible.
1093: */
1094: PetscErrorCode VecDotNorm2_SeqViennaCL(Vec s, Vec t, PetscScalar *dp, PetscScalar *nm)
1095: {
1096: VecDot_SeqViennaCL(s,t,dp);
1097: VecNorm_SeqViennaCL(t,NORM_2,nm);
1098: *nm *= *nm; //squared norm required
1099: return 0;
1100: }
1102: PetscErrorCode VecDuplicate_SeqViennaCL(Vec win,Vec *V)
1103: {
1104: VecCreateSeqViennaCL(PetscObjectComm((PetscObject)win),win->map->n,V);
1105: PetscLayoutReference(win->map,&(*V)->map);
1106: PetscObjectListDuplicate(((PetscObject)win)->olist,&((PetscObject)(*V))->olist);
1107: PetscFunctionListDuplicate(((PetscObject)win)->qlist,&((PetscObject)(*V))->qlist);
1108: (*V)->stash.ignorenegidx = win->stash.ignorenegidx;
1109: return 0;
1110: }
1112: PetscErrorCode VecDestroy_SeqViennaCL(Vec v)
1113: {
1114: try {
1115: if (v->spptr) {
1116: delete ((Vec_ViennaCL*)v->spptr)->GPUarray_allocated;
1117: delete (Vec_ViennaCL*) v->spptr;
1118: }
1119: } catch(char *ex) {
1120: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
1121: }
1122: VecDestroy_SeqViennaCL_Private(v);
1123: return 0;
1124: }
1126: PetscErrorCode VecGetArray_SeqViennaCL(Vec v,PetscScalar **a)
1127: {
1128: if (v->offloadmask == PETSC_OFFLOAD_GPU) {
1129: VecViennaCLCopyFromGPU(v);
1130: } else {
1131: VecViennaCLAllocateCheckHost(v);
1132: }
1133: *a = *((PetscScalar**)v->data);
1134: return 0;
1135: }
1137: PetscErrorCode VecRestoreArray_SeqViennaCL(Vec v,PetscScalar **a)
1138: {
1139: v->offloadmask = PETSC_OFFLOAD_CPU;
1140: return 0;
1141: }
1143: PetscErrorCode VecGetArrayWrite_SeqViennaCL(Vec v,PetscScalar **a)
1144: {
1145: VecViennaCLAllocateCheckHost(v);
1146: *a = *((PetscScalar**)v->data);
1147: return 0;
1148: }
1150: static PetscErrorCode VecBindToCPU_SeqAIJViennaCL(Vec V,PetscBool flg)
1151: {
1152: V->boundtocpu = flg;
1153: if (flg) {
1154: VecViennaCLCopyFromGPU(V);
1155: V->offloadmask = PETSC_OFFLOAD_CPU; /* since the CPU code will likely change values in the vector */
1156: V->ops->dot = VecDot_Seq;
1157: V->ops->norm = VecNorm_Seq;
1158: V->ops->tdot = VecTDot_Seq;
1159: V->ops->scale = VecScale_Seq;
1160: V->ops->copy = VecCopy_Seq;
1161: V->ops->set = VecSet_Seq;
1162: V->ops->swap = VecSwap_Seq;
1163: V->ops->axpy = VecAXPY_Seq;
1164: V->ops->axpby = VecAXPBY_Seq;
1165: V->ops->axpbypcz = VecAXPBYPCZ_Seq;
1166: V->ops->pointwisemult = VecPointwiseMult_Seq;
1167: V->ops->pointwisedivide = VecPointwiseDivide_Seq;
1168: V->ops->setrandom = VecSetRandom_Seq;
1169: V->ops->dot_local = VecDot_Seq;
1170: V->ops->tdot_local = VecTDot_Seq;
1171: V->ops->norm_local = VecNorm_Seq;
1172: V->ops->mdot_local = VecMDot_Seq;
1173: V->ops->mtdot_local = VecMTDot_Seq;
1174: V->ops->maxpy = VecMAXPY_Seq;
1175: V->ops->mdot = VecMDot_Seq;
1176: V->ops->mtdot = VecMTDot_Seq;
1177: V->ops->aypx = VecAYPX_Seq;
1178: V->ops->waxpy = VecWAXPY_Seq;
1179: V->ops->dotnorm2 = NULL;
1180: V->ops->placearray = VecPlaceArray_Seq;
1181: V->ops->replacearray = VecReplaceArray_Seq;
1182: V->ops->resetarray = VecResetArray_Seq;
1183: V->ops->duplicate = VecDuplicate_Seq;
1184: } else {
1185: V->ops->dot = VecDot_SeqViennaCL;
1186: V->ops->norm = VecNorm_SeqViennaCL;
1187: V->ops->tdot = VecTDot_SeqViennaCL;
1188: V->ops->scale = VecScale_SeqViennaCL;
1189: V->ops->copy = VecCopy_SeqViennaCL;
1190: V->ops->set = VecSet_SeqViennaCL;
1191: V->ops->swap = VecSwap_SeqViennaCL;
1192: V->ops->axpy = VecAXPY_SeqViennaCL;
1193: V->ops->axpby = VecAXPBY_SeqViennaCL;
1194: V->ops->axpbypcz = VecAXPBYPCZ_SeqViennaCL;
1195: V->ops->pointwisemult = VecPointwiseMult_SeqViennaCL;
1196: V->ops->pointwisedivide = VecPointwiseDivide_SeqViennaCL;
1197: V->ops->setrandom = VecSetRandom_SeqViennaCL;
1198: V->ops->dot_local = VecDot_SeqViennaCL;
1199: V->ops->tdot_local = VecTDot_SeqViennaCL;
1200: V->ops->norm_local = VecNorm_SeqViennaCL;
1201: V->ops->mdot_local = VecMDot_SeqViennaCL;
1202: V->ops->mtdot_local = VecMTDot_SeqViennaCL;
1203: V->ops->maxpy = VecMAXPY_SeqViennaCL;
1204: V->ops->mdot = VecMDot_SeqViennaCL;
1205: V->ops->mtdot = VecMTDot_SeqViennaCL;
1206: V->ops->aypx = VecAYPX_SeqViennaCL;
1207: V->ops->waxpy = VecWAXPY_SeqViennaCL;
1208: V->ops->dotnorm2 = VecDotNorm2_SeqViennaCL;
1209: V->ops->placearray = VecPlaceArray_SeqViennaCL;
1210: V->ops->replacearray = VecReplaceArray_SeqViennaCL;
1211: V->ops->resetarray = VecResetArray_SeqViennaCL;
1212: V->ops->destroy = VecDestroy_SeqViennaCL;
1213: V->ops->duplicate = VecDuplicate_SeqViennaCL;
1214: V->ops->getarraywrite = VecGetArrayWrite_SeqViennaCL;
1215: V->ops->getarray = VecGetArray_SeqViennaCL;
1216: V->ops->restorearray = VecRestoreArray_SeqViennaCL;
1217: }
1218: return 0;
1219: }
1221: PETSC_EXTERN PetscErrorCode VecCreate_SeqViennaCL(Vec V)
1222: {
1223: PetscMPIInt size;
1225: MPI_Comm_size(PetscObjectComm((PetscObject)V),&size);
1227: VecCreate_Seq_Private(V,0);
1228: PetscObjectChangeTypeName((PetscObject)V,VECSEQVIENNACL);
1230: VecBindToCPU_SeqAIJViennaCL(V,PETSC_FALSE);
1231: V->ops->bindtocpu = VecBindToCPU_SeqAIJViennaCL;
1233: VecViennaCLAllocateCheck(V);
1234: VecSet_SeqViennaCL(V,0.0);
1235: return 0;
1236: }
1238: /*@C
1239: VecViennaCLGetCLContext - Get the OpenCL context in which the Vec resides.
1241: Caller should cast (*ctx) to (const cl_context). Caller is responsible for
1242: invoking clReleaseContext().
1244: Input Parameters:
1245: . v - the vector
1247: Output Parameters:
1248: . ctx - pointer to the underlying CL context
1250: Level: intermediate
1252: .seealso: VecViennaCLGetCLQueue(), VecViennaCLGetCLMemRead()
1253: @*/
1254: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLContext(Vec v, PETSC_UINTPTR_T* ctx)
1255: {
1256: #if !defined(PETSC_HAVE_OPENCL)
1257: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to get the associated cl_context.");
1258: #else
1261: const ViennaCLVector *v_vcl;
1262: VecViennaCLGetArrayRead(v, &v_vcl);
1263: try{
1264: viennacl::ocl::context vcl_ctx = v_vcl->handle().opencl_handle().context();
1265: const cl_context ocl_ctx = vcl_ctx.handle().get();
1266: clRetainContext(ocl_ctx);
1267: *ctx = (PETSC_UINTPTR_T)(ocl_ctx);
1268: } catch (std::exception const & ex) {
1269: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
1270: }
1272: return 0;
1273: #endif
1274: }
1276: /*@C
1277: VecViennaCLGetCLQueue - Get the OpenCL command queue to which all
1278: operations of the Vec are enqueued.
1280: Caller should cast (*queue) to (const cl_command_queue). Caller is
1281: responsible for invoking clReleaseCommandQueue().
1283: Input Parameters:
1284: . v - the vector
1286: Output Parameters:
1287: . ctx - pointer to the CL command queue
1289: Level: intermediate
1291: .seealso: VecViennaCLGetCLContext(), VecViennaCLGetCLMemRead()
1292: @*/
1293: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLQueue(Vec v, PETSC_UINTPTR_T* queue)
1294: {
1295: #if !defined(PETSC_HAVE_OPENCL)
1296: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to get the associated cl_command_queue.");
1297: #else
1299: const ViennaCLVector *v_vcl;
1300: VecViennaCLGetArrayRead(v, &v_vcl);
1301: try{
1302: viennacl::ocl::context vcl_ctx = v_vcl->handle().opencl_handle().context();
1303: const viennacl::ocl::command_queue& vcl_queue = vcl_ctx.current_queue();
1304: const cl_command_queue ocl_queue = vcl_queue.handle().get();
1305: clRetainCommandQueue(ocl_queue);
1306: *queue = (PETSC_UINTPTR_T)(ocl_queue);
1307: } catch (std::exception const & ex) {
1308: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
1309: }
1311: return 0;
1312: #endif
1313: }
1315: /*@C
1316: VecViennaCLGetCLMemRead - Provides access to the the CL buffer inside a Vec.
1318: Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1319: invoking clReleaseMemObject().
1321: Input Parameters:
1322: . v - the vector
1324: Output Parameters:
1325: . mem - pointer to the device buffer
1327: Level: intermediate
1329: .seealso: VecViennaCLGetCLContext(), VecViennaCLGetCLMemWrite()
1330: @*/
1331: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMemRead(Vec v, PETSC_UINTPTR_T* mem)
1332: {
1333: #if !defined(PETSC_HAVE_OPENCL)
1334: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1335: #else
1337: const ViennaCLVector *v_vcl;
1338: VecViennaCLGetArrayRead(v, &v_vcl);
1339: try{
1340: const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1341: clRetainMemObject(ocl_mem);
1342: *mem = (PETSC_UINTPTR_T)(ocl_mem);
1343: } catch (std::exception const & ex) {
1344: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
1345: }
1346: return 0;
1347: #endif
1348: }
1350: /*@C
1351: VecViennaCLGetCLMemWrite - Provides access to the the CL buffer inside a Vec.
1353: Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1354: invoking clReleaseMemObject().
1356: The device pointer has to be released by calling
1357: VecViennaCLRestoreCLMemWrite(). Upon restoring the vector data the data on
1358: the host will be marked as out of date. A subsequent access of the host data
1359: will thus incur a data transfer from the device to the host.
1361: Input Parameters:
1362: . v - the vector
1364: Output Parameters:
1365: . mem - pointer to the device buffer
1367: Level: intermediate
1369: .seealso: VecViennaCLGetCLContext(), VecViennaCLRestoreCLMemWrite()
1370: @*/
1371: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMemWrite(Vec v, PETSC_UINTPTR_T* mem)
1372: {
1373: #if !defined(PETSC_HAVE_OPENCL)
1374: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1375: #else
1377: ViennaCLVector *v_vcl;
1378: VecViennaCLGetArrayWrite(v, &v_vcl);
1379: try{
1380: const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1381: clRetainMemObject(ocl_mem);
1382: *mem = (PETSC_UINTPTR_T)(ocl_mem);
1383: } catch (std::exception const & ex) {
1384: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
1385: }
1387: return 0;
1388: #endif
1389: }
1391: /*@C
1392: VecViennaCLRestoreCLMemWrite - Restores a CL buffer pointer previously
1393: acquired with VecViennaCLGetCLMemWrite().
1395: This marks the host data as out of date. Subsequent access to the
1396: vector data on the host side with for instance VecGetArray() incurs a
1397: data transfer.
1399: Input Parameters:
1400: . v - the vector
1402: Level: intermediate
1404: .seealso: VecViennaCLGetCLContext(), VecViennaCLGetCLMemWrite()
1405: @*/
1406: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreCLMemWrite(Vec v)
1407: {
1408: #if !defined(PETSC_HAVE_OPENCL)
1409: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to restore a Vec's cl_mem");
1410: #else
1412: VecViennaCLRestoreArrayWrite(v, PETSC_NULL);
1414: return 0;
1415: #endif
1416: }
1418: /*@C
1419: VecViennaCLGetCLMem - Provides access to the the CL buffer inside a Vec.
1421: Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1422: invoking clReleaseMemObject().
1424: The device pointer has to be released by calling VecViennaCLRestoreCLMem().
1425: Upon restoring the vector data the data on the host will be marked as out of
1426: date. A subsequent access of the host data will thus incur a data transfer
1427: from the device to the host.
1429: Input Parameters:
1430: . v - the vector
1432: Output Parameters:
1433: . mem - pointer to the device buffer
1435: Level: intermediate
1437: .seealso: VecViennaCLGetCLContext(), VecViennaCLRestoreCLMem()
1438: @*/
1439: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMem(Vec v, PETSC_UINTPTR_T* mem)
1440: {
1441: #if !defined(PETSC_HAVE_OPENCL)
1442: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1443: #else
1445: ViennaCLVector *v_vcl;
1446: VecViennaCLGetArray(v, &v_vcl);
1447: try{
1448: const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1449: clRetainMemObject(ocl_mem);
1450: *mem = (PETSC_UINTPTR_T)(ocl_mem);
1451: } catch (std::exception const & ex) {
1452: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
1453: }
1455: return 0;
1456: #endif
1457: }
1459: /*@C
1460: VecViennaCLRestoreCLMem - Restores a CL buffer pointer previously
1461: acquired with VecViennaCLGetCLMem().
1463: This marks the host data as out of date. Subsequent access to the vector
1464: data on the host side with for instance VecGetArray() incurs a data
1465: transfer.
1467: Input Parameters:
1468: . v - the vector
1470: Level: intermediate
1472: .seealso: VecViennaCLGetCLContext(), VecViennaCLGetCLMem()
1473: @*/
1474: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreCLMem(Vec v)
1475: {
1476: #if !defined(PETSC_HAVE_OPENCL)
1477: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to restore a Vec's cl_mem");
1478: #else
1480: VecViennaCLRestoreArray(v, PETSC_NULL);
1482: return 0;
1483: #endif
1484: }
1486: PetscErrorCode VecCreate_SeqViennaCL_Private(Vec V,const ViennaCLVector *array)
1487: {
1488: Vec_ViennaCL *vecviennacl;
1489: PetscMPIInt size;
1491: MPI_Comm_size(PetscObjectComm((PetscObject)V),&size);
1493: VecCreate_Seq_Private(V,0);
1494: PetscObjectChangeTypeName((PetscObject)V,VECSEQVIENNACL);
1495: VecBindToCPU_SeqAIJViennaCL(V,PETSC_FALSE);
1496: V->ops->bindtocpu = VecBindToCPU_SeqAIJViennaCL;
1498: if (array) {
1499: if (!V->spptr) V->spptr = new Vec_ViennaCL;
1500: vecviennacl = (Vec_ViennaCL*)V->spptr;
1501: vecviennacl->GPUarray_allocated = 0;
1502: vecviennacl->GPUarray = (ViennaCLVector*)array;
1503: V->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1504: }
1506: return 0;
1507: }