Actual source code: vecviennacl.cxx
petsc-3.9.4 2018-09-11
1: /*
2: Implements the sequential ViennaCL vectors.
3: */
5: #include <petscconf.h>
6: #include <petsc/private/vecimpl.h>
7: #include <../src/vec/vec/impls/dvecimpl.h>
8: #include <../src/vec/vec/impls/seq/seqviennacl/viennaclvecimpl.h>
10: #include <vector>
12: #include "viennacl/linalg/inner_prod.hpp"
13: #include "viennacl/linalg/norm_1.hpp"
14: #include "viennacl/linalg/norm_2.hpp"
15: #include "viennacl/linalg/norm_inf.hpp"
17: #ifdef VIENNACL_WITH_OPENCL
18: #include "viennacl/ocl/backend.hpp"
19: #endif
22: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayReadWrite(Vec v, ViennaCLVector **a)
23: {
28: *a = 0;
29: VecViennaCLCopyToGPU(v);
30: *a = ((Vec_ViennaCL*)v->spptr)->GPUarray;
31: ViennaCLWaitForGPU();
32: return(0);
33: }
35: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayReadWrite(Vec v, ViennaCLVector **a)
36: {
41: v->valid_GPU_array = PETSC_OFFLOAD_GPU;
43: PetscObjectStateIncrease((PetscObject)v);
44: return(0);
45: }
47: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayRead(Vec v, const ViennaCLVector **a)
48: {
53: *a = 0;
54: VecViennaCLCopyToGPU(v);
55: *a = ((Vec_ViennaCL*)v->spptr)->GPUarray;
56: ViennaCLWaitForGPU();
57: return(0);
58: }
60: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayRead(Vec v, const ViennaCLVector **a)
61: {
64: return(0);
65: }
67: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayWrite(Vec v, ViennaCLVector **a)
68: {
73: *a = 0;
74: VecViennaCLAllocateCheck(v);
75: *a = ((Vec_ViennaCL*)v->spptr)->GPUarray;
76: ViennaCLWaitForGPU();
77: return(0);
78: }
80: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayWrite(Vec v, ViennaCLVector **a)
81: {
86: v->valid_GPU_array = PETSC_OFFLOAD_GPU;
88: PetscObjectStateIncrease((PetscObject)v);
89: return(0);
90: }
94: PETSC_EXTERN PetscErrorCode PetscViennaCLInit()
95: {
96: PetscErrorCode ierr;
97: char string[20];
98: PetscBool flg,flg_cuda,flg_opencl,flg_openmp;
101: /* ViennaCL backend selection: CUDA, OpenCL, or OpenMP */
102: PetscOptionsGetString(NULL,NULL,"-viennacl_backend",string,12,&flg);
103: if (flg) {
104: try {
105: PetscStrcasecmp(string,"cuda",&flg_cuda);
106: PetscStrcasecmp(string,"opencl",&flg_opencl);
107: PetscStrcasecmp(string,"openmp",&flg_openmp);
109: /* A default (sequential) CPU backend is always available - even if OpenMP is not enabled. */
110: if (flg_openmp) viennacl::backend::default_memory_type(viennacl::MAIN_MEMORY);
111: #if defined(PETSC_HAVE_CUDA)
112: else if (flg_cuda) viennacl::backend::default_memory_type(viennacl::CUDA_MEMORY);
113: #endif
114: #if defined(PETSC_HAVE_OPENCL)
115: else if (flg_opencl) viennacl::backend::default_memory_type(viennacl::OPENCL_MEMORY);
116: #endif
117: else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: Backend not recognized or available: %s.\n Pass -viennacl_view to see available backends for ViennaCL.\n", string);
118: } catch (std::exception const & ex) {
119: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
120: }
121: }
123: #if defined(PETSC_HAVE_OPENCL)
124: /* ViennaCL OpenCL device type configuration */
125: PetscOptionsGetString(NULL,NULL,"-viennacl_opencl_device_type",string,12,&flg);
126: if (flg) {
127: try {
128: PetscStrcasecmp(string,"cpu",&flg);
129: if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_CPU);
131: PetscStrcasecmp(string,"gpu",&flg);
132: if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_GPU);
134: PetscStrcasecmp(string,"accelerator",&flg);
135: if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_ACCELERATOR);
136: } catch (std::exception const & ex) {
137: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
138: }
139: }
140: #endif
142: /* Print available backends */
143: PetscOptionsHasName(NULL,NULL,"-viennacl_view",&flg);
144: if (flg) {
145: PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backends available: ");
146: #if defined(PETSC_HAVE_CUDA)
147: PetscPrintf(PETSC_COMM_WORLD, "CUDA, ");
148: #endif
149: #if defined(PETSC_HAVE_OPENCL)
150: PetscPrintf(PETSC_COMM_WORLD, "OpenCL, ");
151: #endif
152: #if defined(PETSC_HAVE_OPENMP)
153: PetscPrintf(PETSC_COMM_WORLD, "OpenMP ");
154: #else
155: PetscPrintf(PETSC_COMM_WORLD, "OpenMP (1 thread) ");
156: #endif
157: PetscPrintf(PETSC_COMM_WORLD, "\n");
159: /* Print selected backends */
160: PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backend selected: ");
161: #if defined(PETSC_HAVE_CUDA)
162: if (viennacl::backend::default_memory_type() == viennacl::CUDA_MEMORY) {
163: PetscPrintf(PETSC_COMM_WORLD, "CUDA ");
164: }
165: #endif
166: #if defined(PETSC_HAVE_OPENCL)
167: if (viennacl::backend::default_memory_type() == viennacl::OPENCL_MEMORY) {
168: PetscPrintf(PETSC_COMM_WORLD, "OpenCL ");
169: }
170: #endif
171: #if defined(PETSC_HAVE_OPENMP)
172: if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) {
173: PetscPrintf(PETSC_COMM_WORLD, "OpenMP ");
174: }
175: #else
176: if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) {
177: PetscPrintf(PETSC_COMM_WORLD, "OpenMP (sequential - consider reconfiguration: --with-openmp=1) ");
178: }
179: #endif
180: PetscPrintf(PETSC_COMM_WORLD, "\n");
181: }
182: return(0);
183: }
185: /*
186: Allocates space for the vector array on the Host if it does not exist.
187: Does NOT change the PetscViennaCLFlag for the vector
188: Does NOT zero the ViennaCL array
189: */
190: PETSC_EXTERN PetscErrorCode VecViennaCLAllocateCheckHost(Vec v)
191: {
193: PetscScalar *array;
194: Vec_Seq *s;
195: PetscInt n = v->map->n;
198: s = (Vec_Seq*)v->data;
199: VecViennaCLAllocateCheck(v);
200: if (s->array == 0) {
201: PetscMalloc1(n,&array);
202: PetscLogObjectMemory((PetscObject)v,n*sizeof(PetscScalar));
203: s->array = array;
204: s->array_allocated = array;
205: }
206: return(0);
207: }
210: /*
211: Allocates space for the vector array on the GPU if it does not exist.
212: Does NOT change the PetscViennaCLFlag for the vector
213: Does NOT zero the ViennaCL array
215: */
216: PetscErrorCode VecViennaCLAllocateCheck(Vec v)
217: {
219: int rank;
222: MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
223: // First allocate memory on the GPU if needed
224: if (!v->spptr) {
225: try {
226: v->spptr = new Vec_ViennaCL;
227: ((Vec_ViennaCL*)v->spptr)->GPUarray = new ViennaCLVector((PetscBLASInt)v->map->n);
229: } catch(std::exception const & ex) {
230: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
231: }
232: }
233: return(0);
234: }
237: /* Copies a vector from the CPU to the GPU unless we already have an up-to-date copy on the GPU */
238: PetscErrorCode VecViennaCLCopyToGPU(Vec v)
239: {
244: VecViennaCLAllocateCheck(v);
245: if (v->map->n > 0) {
246: if (v->valid_GPU_array == PETSC_OFFLOAD_CPU) {
247: PetscLogEventBegin(VEC_ViennaCLCopyToGPU,v,0,0,0);
248: try {
249: ViennaCLVector *vec = ((Vec_ViennaCL*)v->spptr)->GPUarray;
250: viennacl::fast_copy(*(PetscScalar**)v->data, *(PetscScalar**)v->data + v->map->n, vec->begin());
251: ViennaCLWaitForGPU();
252: } catch(std::exception const & ex) {
253: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
254: }
255: PetscLogEventEnd(VEC_ViennaCLCopyToGPU,v,0,0,0);
256: v->valid_GPU_array = PETSC_OFFLOAD_BOTH;
257: }
258: }
259: return(0);
260: }
264: /*
265: VecViennaCLCopyFromGPU - Copies a vector from the GPU to the CPU unless we already have an up-to-date copy on the CPU
266: */
267: PetscErrorCode VecViennaCLCopyFromGPU(Vec v)
268: {
273: VecViennaCLAllocateCheckHost(v);
274: if (v->valid_GPU_array == PETSC_OFFLOAD_GPU) {
275: PetscLogEventBegin(VEC_ViennaCLCopyFromGPU,v,0,0,0);
276: try {
277: ViennaCLVector *vec = ((Vec_ViennaCL*)v->spptr)->GPUarray;
278: viennacl::fast_copy(vec->begin(),vec->end(),*(PetscScalar**)v->data);
279: ViennaCLWaitForGPU();
280: } catch(std::exception const & ex) {
281: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
282: }
283: PetscLogEventEnd(VEC_ViennaCLCopyFromGPU,v,0,0,0);
284: v->valid_GPU_array = PETSC_OFFLOAD_BOTH;
285: }
286: return(0);
287: }
290: /* Copy on CPU */
291: static PetscErrorCode VecCopy_SeqViennaCL_Private(Vec xin,Vec yin)
292: {
293: PetscScalar *ya;
294: const PetscScalar *xa;
295: PetscErrorCode ierr;
298: VecViennaCLAllocateCheckHost(xin);
299: VecViennaCLAllocateCheckHost(yin);
300: if (xin != yin) {
301: VecGetArrayRead(xin,&xa);
302: VecGetArray(yin,&ya);
303: PetscMemcpy(ya,xa,xin->map->n*sizeof(PetscScalar));
304: VecRestoreArrayRead(xin,&xa);
305: VecRestoreArray(yin,&ya);
306: }
307: return(0);
308: }
310: static PetscErrorCode VecSetRandom_SeqViennaCL_Private(Vec xin,PetscRandom r)
311: {
313: PetscInt n = xin->map->n,i;
314: PetscScalar *xx;
317: VecGetArray(xin,&xx);
318: for (i=0; i<n; i++) {PetscRandomGetValue(r,&xx[i]);}
319: VecRestoreArray(xin,&xx);
320: return(0);
321: }
323: static PetscErrorCode VecDestroy_SeqViennaCL_Private(Vec v)
324: {
325: Vec_Seq *vs = (Vec_Seq*)v->data;
329: PetscObjectSAWsViewOff(v);
330: #if defined(PETSC_USE_LOG)
331: PetscLogObjectState((PetscObject)v,"Length=%D",v->map->n);
332: #endif
333: if (vs->array_allocated) PetscFree(vs->array_allocated);
334: PetscFree(vs);
335: return(0);
336: }
338: static PetscErrorCode VecResetArray_SeqViennaCL_Private(Vec vin)
339: {
340: Vec_Seq *v = (Vec_Seq*)vin->data;
343: v->array = v->unplacedarray;
344: v->unplacedarray = 0;
345: return(0);
346: }
349: /*MC
350: VECSEQVIENNACL - VECSEQVIENNACL = "seqviennacl" - The basic sequential vector, modified to use ViennaCL
352: Options Database Keys:
353: . -vec_type seqviennacl - sets the vector type to VECSEQVIENNACL during a call to VecSetFromOptions()
355: Level: beginner
357: .seealso: VecCreate(), VecSetType(), VecSetFromOptions(), VecCreateSeqWithArray(), VECMPI, VecType, VecCreateMPI(), VecCreateSeq()
358: M*/
361: PetscErrorCode VecAYPX_SeqViennaCL(Vec yin, PetscScalar alpha, Vec xin)
362: {
363: const ViennaCLVector *xgpu;
364: ViennaCLVector *ygpu;
365: PetscErrorCode ierr;
368: VecViennaCLGetArrayRead(xin,&xgpu);
369: VecViennaCLGetArrayReadWrite(yin,&ygpu);
370: try {
371: if (alpha != 0.0 && xin->map->n > 0) {
372: *ygpu = *xgpu + alpha * *ygpu;
373: PetscLogFlops(2.0*yin->map->n);
374: } else {
375: *ygpu = *xgpu;
376: }
377: ViennaCLWaitForGPU();
378: } catch(std::exception const & ex) {
379: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
380: }
381: VecViennaCLRestoreArrayRead(xin,&xgpu);
382: VecViennaCLRestoreArrayReadWrite(yin,&ygpu);
383: return(0);
384: }
387: PetscErrorCode VecAXPY_SeqViennaCL(Vec yin,PetscScalar alpha,Vec xin)
388: {
389: const ViennaCLVector *xgpu;
390: ViennaCLVector *ygpu;
391: PetscErrorCode ierr;
394: if (alpha != 0.0 && xin->map->n > 0) {
395: VecViennaCLGetArrayRead(xin,&xgpu);
396: VecViennaCLGetArrayReadWrite(yin,&ygpu);
397: try {
398: *ygpu += alpha * *xgpu;
399: ViennaCLWaitForGPU();
400: } catch(std::exception const & ex) {
401: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
402: }
403: VecViennaCLRestoreArrayRead(xin,&xgpu);
404: VecViennaCLRestoreArrayReadWrite(yin,&ygpu);
405: PetscLogFlops(2.0*yin->map->n);
406: }
407: return(0);
408: }
411: PetscErrorCode VecPointwiseDivide_SeqViennaCL(Vec win, Vec xin, Vec yin)
412: {
413: const ViennaCLVector *xgpu,*ygpu;
414: ViennaCLVector *wgpu;
415: PetscErrorCode ierr;
418: if (xin->map->n > 0) {
419: VecViennaCLGetArrayRead(xin,&xgpu);
420: VecViennaCLGetArrayRead(yin,&ygpu);
421: VecViennaCLGetArrayWrite(win,&wgpu);
422: try {
423: *wgpu = viennacl::linalg::element_div(*xgpu, *ygpu);
424: ViennaCLWaitForGPU();
425: } catch(std::exception const & ex) {
426: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
427: }
428: PetscLogFlops(win->map->n);
429: VecViennaCLRestoreArrayRead(xin,&xgpu);
430: VecViennaCLRestoreArrayRead(yin,&ygpu);
431: VecViennaCLRestoreArrayWrite(win,&wgpu);
432: }
433: return(0);
434: }
437: PetscErrorCode VecWAXPY_SeqViennaCL(Vec win,PetscScalar alpha,Vec xin, Vec yin)
438: {
439: const ViennaCLVector *xgpu,*ygpu;
440: ViennaCLVector *wgpu;
441: PetscErrorCode ierr;
444: if (alpha == 0.0 && xin->map->n > 0) {
445: VecCopy_SeqViennaCL(yin,win);
446: } else {
447: VecViennaCLGetArrayRead(xin,&xgpu);
448: VecViennaCLGetArrayRead(yin,&ygpu);
449: VecViennaCLGetArrayWrite(win,&wgpu);
450: if (alpha == 1.0) {
451: try {
452: *wgpu = *ygpu + *xgpu;
453: } catch(std::exception const & ex) {
454: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
455: }
456: PetscLogFlops(win->map->n);
457: } else if (alpha == -1.0) {
458: try {
459: *wgpu = *ygpu - *xgpu;
460: } catch(std::exception const & ex) {
461: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
462: }
463: PetscLogFlops(win->map->n);
464: } else {
465: try {
466: *wgpu = *ygpu + alpha * *xgpu;
467: } catch(std::exception const & ex) {
468: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
469: }
470: PetscLogFlops(2*win->map->n);
471: }
472: ViennaCLWaitForGPU();
473: VecViennaCLRestoreArrayRead(xin,&xgpu);
474: VecViennaCLRestoreArrayRead(yin,&ygpu);
475: VecViennaCLRestoreArrayWrite(win,&wgpu);
476: }
477: return(0);
478: }
481: /*
482: * Operation x = x + sum_i alpha_i * y_i for vectors x, y_i and scalars alpha_i
483: *
484: * ViennaCL supports a fast evaluation of x += alpha * y and x += alpha * y + beta * z,
485: * hence there is an iterated application of these until the final result is obtained
486: */
487: PetscErrorCode VecMAXPY_SeqViennaCL(Vec xin, PetscInt nv,const PetscScalar *alpha,Vec *y)
488: {
490: PetscInt j;
493: for (j = 0; j < nv; ++j) {
494: if (j+1 < nv) {
495: VecAXPBYPCZ_SeqViennaCL(xin,alpha[j],alpha[j+1],1.0,y[j],y[j+1]);
496: ++j;
497: } else {
498: VecAXPY_SeqViennaCL(xin,alpha[j],y[j]);
499: }
500: }
501: ViennaCLWaitForGPU();
502: return(0);
503: }
506: PetscErrorCode VecDot_SeqViennaCL(Vec xin,Vec yin,PetscScalar *z)
507: {
508: const ViennaCLVector *xgpu,*ygpu;
509: PetscErrorCode ierr;
512: if (xin->map->n > 0) {
513: VecViennaCLGetArrayRead(xin,&xgpu);
514: VecViennaCLGetArrayRead(yin,&ygpu);
515: try {
516: *z = viennacl::linalg::inner_prod(*xgpu,*ygpu);
517: } catch(std::exception const & ex) {
518: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
519: }
520: if (xin->map->n >0) {
521: PetscLogFlops(2.0*xin->map->n-1);
522: }
523: ViennaCLWaitForGPU();
524: VecViennaCLRestoreArrayRead(xin,&xgpu);
525: VecViennaCLRestoreArrayRead(yin,&ygpu);
526: } else *z = 0.0;
527: return(0);
528: }
532: /*
533: * Operation z[j] = dot(x, y[j])
534: *
535: * We use an iterated application of dot() for each j. For small ranges of j this is still faster than an allocation of extra memory in order to use gemv().
536: */
537: PetscErrorCode VecMDot_SeqViennaCL(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
538: {
539: PetscErrorCode ierr;
540: PetscInt n = xin->map->n,i;
541: const ViennaCLVector *xgpu,*ygpu;
542: Vec *yyin = (Vec*)yin;
543: std::vector<viennacl::vector_base<PetscScalar> const *> ygpu_array(nv);
546: if (xin->map->n > 0) {
547: VecViennaCLGetArrayRead(xin,&xgpu);
548: for (i=0; i<nv; i++) {
549: VecViennaCLGetArrayRead(yyin[i],&ygpu);
550: ygpu_array[i] = ygpu;
551: }
553: viennacl::vector_tuple<PetscScalar> y_tuple(ygpu_array);
554: ViennaCLVector result = viennacl::linalg::inner_prod(*xgpu, y_tuple);
556: for (i=0; i<nv; i++) {
557: viennacl::copy(result.begin(), result.end(), z);
558: VecViennaCLRestoreArrayRead(yyin[i],&ygpu);
559: }
561: ViennaCLWaitForGPU();
562: VecViennaCLRestoreArrayRead(xin,&xgpu);
563: PetscLogFlops(PetscMax(nv*(2.0*n-1),0.0));
564: } else {
565: for (i=0; i<nv; i++) z[i] = 0.0;
566: }
567: return(0);
568: }
570: PetscErrorCode VecMTDot_SeqViennaCL(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
571: {
575: /* Since complex case is not supported at the moment, this is the same as VecMDot_SeqViennaCL */
576: VecMDot_SeqViennaCL(xin,nv,yin,z);
577: ViennaCLWaitForGPU();
578: return(0);
579: }
582: PetscErrorCode VecSet_SeqViennaCL(Vec xin,PetscScalar alpha)
583: {
584: ViennaCLVector *xgpu;
588: if (xin->map->n > 0) {
589: VecViennaCLGetArrayWrite(xin,&xgpu);
590: try {
591: *xgpu = viennacl::scalar_vector<PetscScalar>(xgpu->size(), alpha);
592: ViennaCLWaitForGPU();
593: } catch(std::exception const & ex) {
594: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
595: }
596: VecViennaCLRestoreArrayWrite(xin,&xgpu);
597: }
598: return(0);
599: }
601: PetscErrorCode VecScale_SeqViennaCL(Vec xin, PetscScalar alpha)
602: {
603: ViennaCLVector *xgpu;
607: if (alpha == 0.0 && xin->map->n > 0) {
608: VecSet_SeqViennaCL(xin,alpha);
609: PetscLogFlops(xin->map->n);
610: } else if (alpha != 1.0 && xin->map->n > 0) {
611: VecViennaCLGetArrayReadWrite(xin,&xgpu);
612: try {
613: *xgpu *= alpha;
614: ViennaCLWaitForGPU();
615: } catch(std::exception const & ex) {
616: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
617: }
618: VecViennaCLRestoreArrayReadWrite(xin,&xgpu);
619: PetscLogFlops(xin->map->n);
620: }
621: return(0);
622: }
625: PetscErrorCode VecTDot_SeqViennaCL(Vec xin,Vec yin,PetscScalar *z)
626: {
630: /* Since complex case is not supported at the moment, this is the same as VecDot_SeqViennaCL */
631: VecDot_SeqViennaCL(xin, yin, z);
632: ViennaCLWaitForGPU();
633: return(0);
634: }
637: PetscErrorCode VecCopy_SeqViennaCL(Vec xin,Vec yin)
638: {
639: const ViennaCLVector *xgpu;
640: ViennaCLVector *ygpu;
641: PetscErrorCode ierr;
644: if (xin != yin && xin->map->n > 0) {
645: if (xin->valid_GPU_array == PETSC_OFFLOAD_GPU) {
646: VecViennaCLGetArrayRead(xin,&xgpu);
647: VecViennaCLGetArrayWrite(yin,&ygpu);
648: try {
649: *ygpu = *xgpu;
650: ViennaCLWaitForGPU();
651: } catch(std::exception const & ex) {
652: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
653: }
654: VecViennaCLRestoreArrayRead(xin,&xgpu);
655: VecViennaCLRestoreArrayWrite(yin,&ygpu);
657: } else if (xin->valid_GPU_array == PETSC_OFFLOAD_CPU) {
658: /* copy in CPU if we are on the CPU*/
659: VecCopy_SeqViennaCL_Private(xin,yin);
660: ViennaCLWaitForGPU();
661: } else if (xin->valid_GPU_array == PETSC_OFFLOAD_BOTH) {
662: /* if xin is valid in both places, see where yin is and copy there (because it's probably where we'll want to next use it) */
663: if (yin->valid_GPU_array == PETSC_OFFLOAD_CPU) {
664: /* copy in CPU */
665: VecCopy_SeqViennaCL_Private(xin,yin);
666: ViennaCLWaitForGPU();
667: } else if (yin->valid_GPU_array == PETSC_OFFLOAD_GPU) {
668: /* copy in GPU */
669: VecViennaCLGetArrayRead(xin,&xgpu);
670: VecViennaCLGetArrayWrite(yin,&ygpu);
671: try {
672: *ygpu = *xgpu;
673: ViennaCLWaitForGPU();
674: } catch(std::exception const & ex) {
675: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
676: }
677: VecViennaCLRestoreArrayRead(xin,&xgpu);
678: VecViennaCLRestoreArrayWrite(yin,&ygpu);
679: } else if (yin->valid_GPU_array == PETSC_OFFLOAD_BOTH) {
680: /* xin and yin are both valid in both places (or yin was unallocated before the earlier call to allocatecheck
681: default to copy in GPU (this is an arbitrary choice) */
682: VecViennaCLGetArrayRead(xin,&xgpu);
683: VecViennaCLGetArrayWrite(yin,&ygpu);
684: try {
685: *ygpu = *xgpu;
686: ViennaCLWaitForGPU();
687: } catch(std::exception const & ex) {
688: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
689: }
690: VecViennaCLRestoreArrayRead(xin,&xgpu);
691: VecViennaCLRestoreArrayWrite(yin,&ygpu);
692: } else {
693: VecCopy_SeqViennaCL_Private(xin,yin);
694: ViennaCLWaitForGPU();
695: }
696: }
697: }
698: return(0);
699: }
702: PetscErrorCode VecSwap_SeqViennaCL(Vec xin,Vec yin)
703: {
705: ViennaCLVector *xgpu,*ygpu;
708: if (xin != yin && xin->map->n > 0) {
709: VecViennaCLGetArrayReadWrite(xin,&xgpu);
710: VecViennaCLGetArrayReadWrite(yin,&ygpu);
712: try {
713: viennacl::swap(*xgpu, *ygpu);
714: ViennaCLWaitForGPU();
715: } catch(std::exception const & ex) {
716: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
717: }
718: VecViennaCLRestoreArrayReadWrite(xin,&xgpu);
719: VecViennaCLRestoreArrayReadWrite(yin,&ygpu);
720: }
721: return(0);
722: }
725: // y = alpha * x + beta * y
726: PetscErrorCode VecAXPBY_SeqViennaCL(Vec yin,PetscScalar alpha,PetscScalar beta,Vec xin)
727: {
728: PetscErrorCode ierr;
729: PetscScalar a = alpha,b = beta;
730: const ViennaCLVector *xgpu;
731: ViennaCLVector *ygpu;
734: if (a == 0.0 && xin->map->n > 0) {
735: VecScale_SeqViennaCL(yin,beta);
736: } else if (b == 1.0 && xin->map->n > 0) {
737: VecAXPY_SeqViennaCL(yin,alpha,xin);
738: } else if (a == 1.0 && xin->map->n > 0) {
739: VecAYPX_SeqViennaCL(yin,beta,xin);
740: } else if (b == 0.0 && xin->map->n > 0) {
741: VecViennaCLGetArrayRead(xin,&xgpu);
742: VecViennaCLGetArrayReadWrite(yin,&ygpu);
743: try {
744: *ygpu = *xgpu * alpha;
745: ViennaCLWaitForGPU();
746: } catch(std::exception const & ex) {
747: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
748: }
749: PetscLogFlops(xin->map->n);
750: VecViennaCLRestoreArrayRead(xin,&xgpu);
751: VecViennaCLRestoreArrayReadWrite(yin,&ygpu);
752: } else if (xin->map->n > 0) {
753: VecViennaCLGetArrayRead(xin,&xgpu);
754: VecViennaCLGetArrayReadWrite(yin,&ygpu);
755: try {
756: *ygpu = *xgpu * alpha + *ygpu * beta;
757: ViennaCLWaitForGPU();
758: } catch(std::exception const & ex) {
759: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
760: }
761: VecViennaCLRestoreArrayRead(xin,&xgpu);
762: VecViennaCLRestoreArrayReadWrite(yin,&ygpu);
763: PetscLogFlops(3.0*xin->map->n);
764: }
765: return(0);
766: }
769: /* operation z = alpha * x + beta *y + gamma *z*/
770: PetscErrorCode VecAXPBYPCZ_SeqViennaCL(Vec zin,PetscScalar alpha,PetscScalar beta,PetscScalar gamma,Vec xin,Vec yin)
771: {
772: PetscErrorCode ierr;
773: PetscInt n = zin->map->n;
774: const ViennaCLVector *xgpu,*ygpu;
775: ViennaCLVector *zgpu;
778: VecViennaCLGetArrayRead(xin,&xgpu);
779: VecViennaCLGetArrayRead(yin,&ygpu);
780: VecViennaCLGetArrayReadWrite(zin,&zgpu);
781: if (alpha == 0.0 && xin->map->n > 0) {
782: try {
783: if (beta == 0.0) {
784: *zgpu = gamma * *zgpu;
785: ViennaCLWaitForGPU();
786: PetscLogFlops(1.0*n);
787: } else if (gamma == 0.0) {
788: *zgpu = beta * *ygpu;
789: ViennaCLWaitForGPU();
790: PetscLogFlops(1.0*n);
791: } else {
792: *zgpu = beta * *ygpu + gamma * *zgpu;
793: ViennaCLWaitForGPU();
794: PetscLogFlops(3.0*n);
795: }
796: } catch(std::exception const & ex) {
797: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
798: }
799: PetscLogFlops(3.0*n);
800: } else if (beta == 0.0 && xin->map->n > 0) {
801: try {
802: if (gamma == 0.0) {
803: *zgpu = alpha * *xgpu;
804: ViennaCLWaitForGPU();
805: PetscLogFlops(1.0*n);
806: } else {
807: *zgpu = alpha * *xgpu + gamma * *zgpu;
808: ViennaCLWaitForGPU();
809: PetscLogFlops(3.0*n);
810: }
811: } catch(std::exception const & ex) {
812: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
813: }
814: } else if (gamma == 0.0 && xin->map->n > 0) {
815: try {
816: *zgpu = alpha * *xgpu + beta * *ygpu;
817: ViennaCLWaitForGPU();
818: } catch(std::exception const & ex) {
819: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
820: }
821: PetscLogFlops(3.0*n);
822: } else if (xin->map->n > 0) {
823: try {
824: /* Split operation into two steps. This is not completely ideal, but avoids temporaries (which are far worse) */
825: if (gamma != 1.0)
826: *zgpu *= gamma;
827: *zgpu += alpha * *xgpu + beta * *ygpu;
828: ViennaCLWaitForGPU();
829: } catch(std::exception const & ex) {
830: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
831: }
832: VecViennaCLRestoreArrayReadWrite(zin,&zgpu);
833: VecViennaCLRestoreArrayRead(xin,&xgpu);
834: VecViennaCLRestoreArrayRead(yin,&ygpu);
835: PetscLogFlops(5.0*n);
836: }
837: return(0);
838: }
840: PetscErrorCode VecPointwiseMult_SeqViennaCL(Vec win,Vec xin,Vec yin)
841: {
842: PetscErrorCode ierr;
843: PetscInt n = win->map->n;
844: const ViennaCLVector *xgpu,*ygpu;
845: ViennaCLVector *wgpu;
848: if (xin->map->n > 0) {
849: VecViennaCLGetArrayRead(xin,&xgpu);
850: VecViennaCLGetArrayRead(yin,&ygpu);
851: VecViennaCLGetArrayReadWrite(win,&wgpu);
852: try {
853: *wgpu = viennacl::linalg::element_prod(*xgpu, *ygpu);
854: ViennaCLWaitForGPU();
855: } catch(std::exception const & ex) {
856: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
857: }
858: VecViennaCLRestoreArrayRead(xin,&xgpu);
859: VecViennaCLRestoreArrayRead(yin,&ygpu);
860: VecViennaCLRestoreArrayReadWrite(win,&wgpu);
861: PetscLogFlops(n);
862: }
863: return(0);
864: }
867: PetscErrorCode VecNorm_SeqViennaCL(Vec xin,NormType type,PetscReal *z)
868: {
869: PetscErrorCode ierr;
870: PetscInt n = xin->map->n;
871: PetscBLASInt bn;
872: const ViennaCLVector *xgpu;
875: if (xin->map->n > 0) {
876: PetscBLASIntCast(n,&bn);
877: VecViennaCLGetArrayRead(xin,&xgpu);
878: if (type == NORM_2 || type == NORM_FROBENIUS) {
879: try {
880: *z = viennacl::linalg::norm_2(*xgpu);
881: ViennaCLWaitForGPU();
882: } catch(std::exception const & ex) {
883: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
884: }
885: PetscLogFlops(PetscMax(2.0*n-1,0.0));
886: } else if (type == NORM_INFINITY) {
887: VecViennaCLGetArrayRead(xin,&xgpu);
888: try {
889: *z = viennacl::linalg::norm_inf(*xgpu);
890: ViennaCLWaitForGPU();
891: } catch(std::exception const & ex) {
892: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
893: }
894: VecViennaCLRestoreArrayRead(xin,&xgpu);
895: } else if (type == NORM_1) {
896: try {
897: *z = viennacl::linalg::norm_1(*xgpu);
898: ViennaCLWaitForGPU();
899: } catch(std::exception const & ex) {
900: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
901: }
902: PetscLogFlops(PetscMax(n-1.0,0.0));
903: } else if (type == NORM_1_AND_2) {
904: try {
905: *z = viennacl::linalg::norm_1(*xgpu);
906: *(z+1) = viennacl::linalg::norm_2(*xgpu);
907: ViennaCLWaitForGPU();
908: } catch(std::exception const & ex) {
909: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
910: }
911: PetscLogFlops(PetscMax(2.0*n-1,0.0));
912: PetscLogFlops(PetscMax(n-1.0,0.0));
913: }
914: VecViennaCLRestoreArrayRead(xin,&xgpu);
915: } else if (type == NORM_1_AND_2) {
916: *z = 0.0;
917: *(z+1) = 0.0;
918: } else *z = 0.0;
919: return(0);
920: }
923: PetscErrorCode VecSetRandom_SeqViennaCL(Vec xin,PetscRandom r)
924: {
928: VecSetRandom_SeqViennaCL_Private(xin,r);
929: xin->valid_GPU_array = PETSC_OFFLOAD_CPU;
930: return(0);
931: }
933: PetscErrorCode VecResetArray_SeqViennaCL(Vec vin)
934: {
939: VecViennaCLCopyFromGPU(vin);
940: VecResetArray_SeqViennaCL_Private(vin);
941: vin->valid_GPU_array = PETSC_OFFLOAD_CPU;
942: return(0);
943: }
945: PetscErrorCode VecPlaceArray_SeqViennaCL(Vec vin,const PetscScalar *a)
946: {
951: VecViennaCLCopyFromGPU(vin);
952: VecPlaceArray_Seq(vin,a);
953: vin->valid_GPU_array = PETSC_OFFLOAD_CPU;
954: return(0);
955: }
958: PetscErrorCode VecReplaceArray_SeqViennaCL(Vec vin,const PetscScalar *a)
959: {
964: VecViennaCLCopyFromGPU(vin);
965: VecReplaceArray_Seq(vin,a);
966: vin->valid_GPU_array = PETSC_OFFLOAD_CPU;
967: return(0);
968: }
971: /*@
972: VecCreateSeqViennaCL - Creates a standard, sequential array-style vector.
974: Collective on MPI_Comm
976: Input Parameter:
977: + comm - the communicator, should be PETSC_COMM_SELF
978: - n - the vector length
980: Output Parameter:
981: . V - the vector
983: Notes:
984: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
985: same type as an existing vector.
987: Level: intermediate
989: Concepts: vectors^creating sequential
991: .seealso: VecCreateMPI(), VecCreate(), VecDuplicate(), VecDuplicateVecs(), VecCreateGhost()
992: @*/
993: PetscErrorCode VecCreateSeqViennaCL(MPI_Comm comm,PetscInt n,Vec *v)
994: {
998: VecCreate(comm,v);
999: VecSetSizes(*v,n,n);
1000: VecSetType(*v,VECSEQVIENNACL);
1001: return(0);
1002: }
1005: /* VecDotNorm2 - computes the inner product of two vectors and the 2-norm squared of the second vector
1006: *
1007: * Simply reuses VecDot() and VecNorm(). Performance improvement through custom kernel (kernel generator) possible.
1008: */
1009: PetscErrorCode VecDotNorm2_SeqViennaCL(Vec s, Vec t, PetscScalar *dp, PetscScalar *nm)
1010: {
1011: PetscErrorCode ierr;
1014: VecDot_SeqViennaCL(s,t,dp);
1015: VecNorm_SeqViennaCL(t,NORM_2,nm);
1016: *nm *= *nm; //squared norm required
1017: return(0);
1018: }
1020: PetscErrorCode VecDuplicate_SeqViennaCL(Vec win,Vec *V)
1021: {
1025: VecCreateSeqViennaCL(PetscObjectComm((PetscObject)win),win->map->n,V);
1026: PetscLayoutReference(win->map,&(*V)->map);
1027: PetscObjectListDuplicate(((PetscObject)win)->olist,&((PetscObject)(*V))->olist);
1028: PetscFunctionListDuplicate(((PetscObject)win)->qlist,&((PetscObject)(*V))->qlist);
1029: (*V)->stash.ignorenegidx = win->stash.ignorenegidx;
1030: return(0);
1031: }
1033: PetscErrorCode VecDestroy_SeqViennaCL(Vec v)
1034: {
1038: try {
1039: if (v->spptr) {
1040: delete ((Vec_ViennaCL*)v->spptr)->GPUarray;
1041: delete (Vec_ViennaCL*) v->spptr;
1042: }
1043: } catch(char *ex) {
1044: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
1045: }
1046: VecDestroy_SeqViennaCL_Private(v);
1047: return(0);
1048: }
1051: PETSC_EXTERN PetscErrorCode VecCreate_SeqViennaCL(Vec V)
1052: {
1054: PetscMPIInt size;
1057: MPI_Comm_size(PetscObjectComm((PetscObject)V),&size);
1058: if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQVIENNACL on more than one process");
1059: VecCreate_Seq_Private(V,0);
1060: PetscObjectChangeTypeName((PetscObject)V,VECSEQVIENNACL);
1062: V->ops->dot = VecDot_SeqViennaCL;
1063: V->ops->norm = VecNorm_SeqViennaCL;
1064: V->ops->tdot = VecTDot_SeqViennaCL;
1065: V->ops->scale = VecScale_SeqViennaCL;
1066: V->ops->copy = VecCopy_SeqViennaCL;
1067: V->ops->set = VecSet_SeqViennaCL;
1068: V->ops->swap = VecSwap_SeqViennaCL;
1069: V->ops->axpy = VecAXPY_SeqViennaCL;
1070: V->ops->axpby = VecAXPBY_SeqViennaCL;
1071: V->ops->axpbypcz = VecAXPBYPCZ_SeqViennaCL;
1072: V->ops->pointwisemult = VecPointwiseMult_SeqViennaCL;
1073: V->ops->pointwisedivide = VecPointwiseDivide_SeqViennaCL;
1074: V->ops->setrandom = VecSetRandom_SeqViennaCL;
1075: V->ops->dot_local = VecDot_SeqViennaCL;
1076: V->ops->tdot_local = VecTDot_SeqViennaCL;
1077: V->ops->norm_local = VecNorm_SeqViennaCL;
1078: V->ops->mdot_local = VecMDot_SeqViennaCL;
1079: V->ops->mtdot_local = VecMTDot_SeqViennaCL;
1080: V->ops->maxpy = VecMAXPY_SeqViennaCL;
1081: V->ops->mdot = VecMDot_SeqViennaCL;
1082: V->ops->mtdot = VecMTDot_SeqViennaCL;
1083: V->ops->aypx = VecAYPX_SeqViennaCL;
1084: V->ops->waxpy = VecWAXPY_SeqViennaCL;
1085: V->ops->dotnorm2 = VecDotNorm2_SeqViennaCL;
1086: V->ops->placearray = VecPlaceArray_SeqViennaCL;
1087: V->ops->replacearray = VecReplaceArray_SeqViennaCL;
1088: V->ops->resetarray = VecResetArray_SeqViennaCL;
1089: V->ops->destroy = VecDestroy_SeqViennaCL;
1090: V->ops->duplicate = VecDuplicate_SeqViennaCL;
1092: VecViennaCLAllocateCheck(V);
1093: V->valid_GPU_array = PETSC_OFFLOAD_GPU;
1094: VecSet(V,0.0);
1095: return(0);
1096: }