Actual source code: vecviennacl.cxx
petsc-3.11.4 2019-09-28
1: /*
2: Implements the sequential ViennaCL vectors.
3: */
5: #include <petscconf.h>
6: #include <petsc/private/vecimpl.h>
7: #include <../src/vec/vec/impls/dvecimpl.h>
8: #include <../src/vec/vec/impls/seq/seqviennacl/viennaclvecimpl.h>
10: #include <vector>
12: #include "viennacl/linalg/inner_prod.hpp"
13: #include "viennacl/linalg/norm_1.hpp"
14: #include "viennacl/linalg/norm_2.hpp"
15: #include "viennacl/linalg/norm_inf.hpp"
17: #ifdef VIENNACL_WITH_OPENCL
18: #include "viennacl/ocl/backend.hpp"
19: #endif
22: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayReadWrite(Vec v, ViennaCLVector **a)
23: {
28: *a = 0;
29: VecViennaCLCopyToGPU(v);
30: *a = ((Vec_ViennaCL*)v->spptr)->GPUarray;
31: ViennaCLWaitForGPU();
32: return(0);
33: }
35: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayReadWrite(Vec v, ViennaCLVector **a)
36: {
41: v->valid_GPU_array = PETSC_OFFLOAD_GPU;
43: PetscObjectStateIncrease((PetscObject)v);
44: return(0);
45: }
47: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayRead(Vec v, const ViennaCLVector **a)
48: {
53: *a = 0;
54: VecViennaCLCopyToGPU(v);
55: *a = ((Vec_ViennaCL*)v->spptr)->GPUarray;
56: ViennaCLWaitForGPU();
57: return(0);
58: }
60: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayRead(Vec v, const ViennaCLVector **a)
61: {
64: return(0);
65: }
67: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayWrite(Vec v, ViennaCLVector **a)
68: {
73: *a = 0;
74: VecViennaCLAllocateCheck(v);
75: *a = ((Vec_ViennaCL*)v->spptr)->GPUarray;
76: ViennaCLWaitForGPU();
77: return(0);
78: }
80: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayWrite(Vec v, ViennaCLVector **a)
81: {
86: v->valid_GPU_array = PETSC_OFFLOAD_GPU;
88: PetscObjectStateIncrease((PetscObject)v);
89: return(0);
90: }
94: PETSC_EXTERN PetscErrorCode PetscViennaCLInit()
95: {
96: PetscErrorCode ierr;
97: char string[20];
98: PetscBool flg,flg_cuda,flg_opencl,flg_openmp;
101: /* ViennaCL backend selection: CUDA, OpenCL, or OpenMP */
102: PetscOptionsGetString(NULL,NULL,"-viennacl_backend",string,12,&flg);
103: if (flg) {
104: try {
105: PetscStrcasecmp(string,"cuda",&flg_cuda);
106: PetscStrcasecmp(string,"opencl",&flg_opencl);
107: PetscStrcasecmp(string,"openmp",&flg_openmp);
109: /* A default (sequential) CPU backend is always available - even if OpenMP is not enabled. */
110: if (flg_openmp) viennacl::backend::default_memory_type(viennacl::MAIN_MEMORY);
111: #if defined(PETSC_HAVE_CUDA)
112: else if (flg_cuda) viennacl::backend::default_memory_type(viennacl::CUDA_MEMORY);
113: #endif
114: #if defined(PETSC_HAVE_OPENCL)
115: else if (flg_opencl) viennacl::backend::default_memory_type(viennacl::OPENCL_MEMORY);
116: #endif
117: else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: Backend not recognized or available: %s.\n Pass -viennacl_view to see available backends for ViennaCL.\n", string);
118: } catch (std::exception const & ex) {
119: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
120: }
121: }
123: #if defined(PETSC_HAVE_OPENCL)
124: /* ViennaCL OpenCL device type configuration */
125: PetscOptionsGetString(NULL,NULL,"-viennacl_opencl_device_type",string,12,&flg);
126: if (flg) {
127: try {
128: PetscStrcasecmp(string,"cpu",&flg);
129: if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_CPU);
131: PetscStrcasecmp(string,"gpu",&flg);
132: if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_GPU);
134: PetscStrcasecmp(string,"accelerator",&flg);
135: if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_ACCELERATOR);
136: } catch (std::exception const & ex) {
137: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
138: }
139: }
140: #endif
142: /* Print available backends */
143: PetscOptionsHasName(NULL,NULL,"-viennacl_view",&flg);
144: if (flg) {
145: PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backends available: ");
146: #if defined(PETSC_HAVE_CUDA)
147: PetscPrintf(PETSC_COMM_WORLD, "CUDA, ");
148: #endif
149: #if defined(PETSC_HAVE_OPENCL)
150: PetscPrintf(PETSC_COMM_WORLD, "OpenCL, ");
151: #endif
152: #if defined(PETSC_HAVE_OPENMP)
153: PetscPrintf(PETSC_COMM_WORLD, "OpenMP ");
154: #else
155: PetscPrintf(PETSC_COMM_WORLD, "OpenMP (1 thread) ");
156: #endif
157: PetscPrintf(PETSC_COMM_WORLD, "\n");
159: /* Print selected backends */
160: PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backend selected: ");
161: #if defined(PETSC_HAVE_CUDA)
162: if (viennacl::backend::default_memory_type() == viennacl::CUDA_MEMORY) {
163: PetscPrintf(PETSC_COMM_WORLD, "CUDA ");
164: }
165: #endif
166: #if defined(PETSC_HAVE_OPENCL)
167: if (viennacl::backend::default_memory_type() == viennacl::OPENCL_MEMORY) {
168: PetscPrintf(PETSC_COMM_WORLD, "OpenCL ");
169: }
170: #endif
171: #if defined(PETSC_HAVE_OPENMP)
172: if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) {
173: PetscPrintf(PETSC_COMM_WORLD, "OpenMP ");
174: }
175: #else
176: if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) {
177: PetscPrintf(PETSC_COMM_WORLD, "OpenMP (sequential - consider reconfiguration: --with-openmp=1) ");
178: }
179: #endif
180: PetscPrintf(PETSC_COMM_WORLD, "\n");
181: }
182: return(0);
183: }
185: /*
186: Allocates space for the vector array on the Host if it does not exist.
187: Does NOT change the PetscViennaCLFlag for the vector
188: Does NOT zero the ViennaCL array
189: */
190: PETSC_EXTERN PetscErrorCode VecViennaCLAllocateCheckHost(Vec v)
191: {
193: PetscScalar *array;
194: Vec_Seq *s;
195: PetscInt n = v->map->n;
198: s = (Vec_Seq*)v->data;
199: VecViennaCLAllocateCheck(v);
200: if (s->array == 0) {
201: PetscMalloc1(n,&array);
202: PetscLogObjectMemory((PetscObject)v,n*sizeof(PetscScalar));
203: s->array = array;
204: s->array_allocated = array;
205: }
206: return(0);
207: }
210: /*
211: Allocates space for the vector array on the GPU if it does not exist.
212: Does NOT change the PetscViennaCLFlag for the vector
213: Does NOT zero the ViennaCL array
215: */
216: PetscErrorCode VecViennaCLAllocateCheck(Vec v)
217: {
219: int rank;
222: MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
223: // First allocate memory on the GPU if needed
224: if (!v->spptr) {
225: try {
226: v->spptr = new Vec_ViennaCL;
227: ((Vec_ViennaCL*)v->spptr)->GPUarray = new ViennaCLVector((PetscBLASInt)v->map->n);
229: } catch(std::exception const & ex) {
230: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
231: }
232: }
233: return(0);
234: }
237: /* Copies a vector from the CPU to the GPU unless we already have an up-to-date copy on the GPU */
238: PetscErrorCode VecViennaCLCopyToGPU(Vec v)
239: {
244: VecViennaCLAllocateCheck(v);
245: if (v->map->n > 0) {
246: if (v->valid_GPU_array == PETSC_OFFLOAD_CPU) {
247: PetscLogEventBegin(VEC_ViennaCLCopyToGPU,v,0,0,0);
248: try {
249: ViennaCLVector *vec = ((Vec_ViennaCL*)v->spptr)->GPUarray;
250: viennacl::fast_copy(*(PetscScalar**)v->data, *(PetscScalar**)v->data + v->map->n, vec->begin());
251: ViennaCLWaitForGPU();
252: } catch(std::exception const & ex) {
253: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
254: }
255: PetscLogEventEnd(VEC_ViennaCLCopyToGPU,v,0,0,0);
256: v->valid_GPU_array = PETSC_OFFLOAD_BOTH;
257: }
258: }
259: return(0);
260: }
264: /*
265: VecViennaCLCopyFromGPU - Copies a vector from the GPU to the CPU unless we already have an up-to-date copy on the CPU
266: */
267: PetscErrorCode VecViennaCLCopyFromGPU(Vec v)
268: {
273: VecViennaCLAllocateCheckHost(v);
274: if (v->valid_GPU_array == PETSC_OFFLOAD_GPU) {
275: PetscLogEventBegin(VEC_ViennaCLCopyFromGPU,v,0,0,0);
276: try {
277: ViennaCLVector *vec = ((Vec_ViennaCL*)v->spptr)->GPUarray;
278: viennacl::fast_copy(vec->begin(),vec->end(),*(PetscScalar**)v->data);
279: ViennaCLWaitForGPU();
280: } catch(std::exception const & ex) {
281: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
282: }
283: PetscLogEventEnd(VEC_ViennaCLCopyFromGPU,v,0,0,0);
284: v->valid_GPU_array = PETSC_OFFLOAD_BOTH;
285: }
286: return(0);
287: }
290: /* Copy on CPU */
291: static PetscErrorCode VecCopy_SeqViennaCL_Private(Vec xin,Vec yin)
292: {
293: PetscScalar *ya;
294: const PetscScalar *xa;
295: PetscErrorCode ierr;
298: VecViennaCLAllocateCheckHost(xin);
299: VecViennaCLAllocateCheckHost(yin);
300: if (xin != yin) {
301: VecGetArrayRead(xin,&xa);
302: VecGetArray(yin,&ya);
303: PetscMemcpy(ya,xa,xin->map->n*sizeof(PetscScalar));
304: VecRestoreArrayRead(xin,&xa);
305: VecRestoreArray(yin,&ya);
306: }
307: return(0);
308: }
310: static PetscErrorCode VecSetRandom_SeqViennaCL_Private(Vec xin,PetscRandom r)
311: {
313: PetscInt n = xin->map->n,i;
314: PetscScalar *xx;
317: VecGetArray(xin,&xx);
318: for (i=0; i<n; i++) {PetscRandomGetValue(r,&xx[i]);}
319: VecRestoreArray(xin,&xx);
320: return(0);
321: }
323: static PetscErrorCode VecDestroy_SeqViennaCL_Private(Vec v)
324: {
325: Vec_Seq *vs = (Vec_Seq*)v->data;
329: PetscObjectSAWsViewOff(v);
330: #if defined(PETSC_USE_LOG)
331: PetscLogObjectState((PetscObject)v,"Length=%D",v->map->n);
332: #endif
333: if (vs->array_allocated) PetscFree(vs->array_allocated);
334: PetscFree(vs);
335: return(0);
336: }
338: static PetscErrorCode VecResetArray_SeqViennaCL_Private(Vec vin)
339: {
340: Vec_Seq *v = (Vec_Seq*)vin->data;
343: v->array = v->unplacedarray;
344: v->unplacedarray = 0;
345: return(0);
346: }
349: /*MC
350: VECSEQVIENNACL - VECSEQVIENNACL = "seqviennacl" - The basic sequential vector, modified to use ViennaCL
352: Options Database Keys:
353: . -vec_type seqviennacl - sets the vector type to VECSEQVIENNACL during a call to VecSetFromOptions()
355: Level: beginner
357: .seealso: VecCreate(), VecSetType(), VecSetFromOptions(), VecCreateSeqWithArray(), VECMPI, VecType, VecCreateMPI(), VecCreateSeq()
358: M*/
361: PetscErrorCode VecAYPX_SeqViennaCL(Vec yin, PetscScalar alpha, Vec xin)
362: {
363: const ViennaCLVector *xgpu;
364: ViennaCLVector *ygpu;
365: PetscErrorCode ierr;
368: VecViennaCLGetArrayRead(xin,&xgpu);
369: VecViennaCLGetArrayReadWrite(yin,&ygpu);
370: try {
371: if (alpha != 0.0 && xin->map->n > 0) {
372: *ygpu = *xgpu + alpha * *ygpu;
373: PetscLogFlops(2.0*yin->map->n);
374: } else {
375: *ygpu = *xgpu;
376: }
377: ViennaCLWaitForGPU();
378: } catch(std::exception const & ex) {
379: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
380: }
381: VecViennaCLRestoreArrayRead(xin,&xgpu);
382: VecViennaCLRestoreArrayReadWrite(yin,&ygpu);
383: return(0);
384: }
387: PetscErrorCode VecAXPY_SeqViennaCL(Vec yin,PetscScalar alpha,Vec xin)
388: {
389: const ViennaCLVector *xgpu;
390: ViennaCLVector *ygpu;
391: PetscErrorCode ierr;
394: if (alpha != 0.0 && xin->map->n > 0) {
395: VecViennaCLGetArrayRead(xin,&xgpu);
396: VecViennaCLGetArrayReadWrite(yin,&ygpu);
397: try {
398: *ygpu += alpha * *xgpu;
399: ViennaCLWaitForGPU();
400: } catch(std::exception const & ex) {
401: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
402: }
403: VecViennaCLRestoreArrayRead(xin,&xgpu);
404: VecViennaCLRestoreArrayReadWrite(yin,&ygpu);
405: PetscLogFlops(2.0*yin->map->n);
406: }
407: return(0);
408: }
411: PetscErrorCode VecPointwiseDivide_SeqViennaCL(Vec win, Vec xin, Vec yin)
412: {
413: const ViennaCLVector *xgpu,*ygpu;
414: ViennaCLVector *wgpu;
415: PetscErrorCode ierr;
418: if (xin->map->n > 0) {
419: VecViennaCLGetArrayRead(xin,&xgpu);
420: VecViennaCLGetArrayRead(yin,&ygpu);
421: VecViennaCLGetArrayWrite(win,&wgpu);
422: try {
423: *wgpu = viennacl::linalg::element_div(*xgpu, *ygpu);
424: ViennaCLWaitForGPU();
425: } catch(std::exception const & ex) {
426: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
427: }
428: PetscLogFlops(win->map->n);
429: VecViennaCLRestoreArrayRead(xin,&xgpu);
430: VecViennaCLRestoreArrayRead(yin,&ygpu);
431: VecViennaCLRestoreArrayWrite(win,&wgpu);
432: }
433: return(0);
434: }
437: PetscErrorCode VecWAXPY_SeqViennaCL(Vec win,PetscScalar alpha,Vec xin, Vec yin)
438: {
439: const ViennaCLVector *xgpu,*ygpu;
440: ViennaCLVector *wgpu;
441: PetscErrorCode ierr;
444: if (alpha == 0.0 && xin->map->n > 0) {
445: VecCopy_SeqViennaCL(yin,win);
446: } else {
447: VecViennaCLGetArrayRead(xin,&xgpu);
448: VecViennaCLGetArrayRead(yin,&ygpu);
449: VecViennaCLGetArrayWrite(win,&wgpu);
450: if (alpha == 1.0) {
451: try {
452: *wgpu = *ygpu + *xgpu;
453: } catch(std::exception const & ex) {
454: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
455: }
456: PetscLogFlops(win->map->n);
457: } else if (alpha == -1.0) {
458: try {
459: *wgpu = *ygpu - *xgpu;
460: } catch(std::exception const & ex) {
461: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
462: }
463: PetscLogFlops(win->map->n);
464: } else {
465: try {
466: *wgpu = *ygpu + alpha * *xgpu;
467: } catch(std::exception const & ex) {
468: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
469: }
470: PetscLogFlops(2*win->map->n);
471: }
472: ViennaCLWaitForGPU();
473: VecViennaCLRestoreArrayRead(xin,&xgpu);
474: VecViennaCLRestoreArrayRead(yin,&ygpu);
475: VecViennaCLRestoreArrayWrite(win,&wgpu);
476: }
477: return(0);
478: }
481: /*
482: * Operation x = x + sum_i alpha_i * y_i for vectors x, y_i and scalars alpha_i
483: *
484: * ViennaCL supports a fast evaluation of x += alpha * y and x += alpha * y + beta * z,
485: * hence there is an iterated application of these until the final result is obtained
486: */
487: PetscErrorCode VecMAXPY_SeqViennaCL(Vec xin, PetscInt nv,const PetscScalar *alpha,Vec *y)
488: {
490: PetscInt j;
493: for (j = 0; j < nv; ++j) {
494: if (j+1 < nv) {
495: VecAXPBYPCZ_SeqViennaCL(xin,alpha[j],alpha[j+1],1.0,y[j],y[j+1]);
496: ++j;
497: } else {
498: VecAXPY_SeqViennaCL(xin,alpha[j],y[j]);
499: }
500: }
501: ViennaCLWaitForGPU();
502: return(0);
503: }
506: PetscErrorCode VecDot_SeqViennaCL(Vec xin,Vec yin,PetscScalar *z)
507: {
508: const ViennaCLVector *xgpu,*ygpu;
509: PetscErrorCode ierr;
512: if (xin->map->n > 0) {
513: VecViennaCLGetArrayRead(xin,&xgpu);
514: VecViennaCLGetArrayRead(yin,&ygpu);
515: try {
516: *z = viennacl::linalg::inner_prod(*xgpu,*ygpu);
517: } catch(std::exception const & ex) {
518: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
519: }
520: if (xin->map->n >0) {
521: PetscLogFlops(2.0*xin->map->n-1);
522: }
523: ViennaCLWaitForGPU();
524: VecViennaCLRestoreArrayRead(xin,&xgpu);
525: VecViennaCLRestoreArrayRead(yin,&ygpu);
526: } else *z = 0.0;
527: return(0);
528: }
532: /*
533: * Operation z[j] = dot(x, y[j])
534: *
535: * We use an iterated application of dot() for each j. For small ranges of j this is still faster than an allocation of extra memory in order to use gemv().
536: */
537: PetscErrorCode VecMDot_SeqViennaCL(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
538: {
539: PetscErrorCode ierr;
540: PetscInt n = xin->map->n,i;
541: const ViennaCLVector *xgpu,*ygpu;
542: Vec *yyin = (Vec*)yin;
543: std::vector<viennacl::vector_base<PetscScalar> const *> ygpu_array(nv);
546: if (xin->map->n > 0) {
547: VecViennaCLGetArrayRead(xin,&xgpu);
548: for (i=0; i<nv; i++) {
549: VecViennaCLGetArrayRead(yyin[i],&ygpu);
550: ygpu_array[i] = ygpu;
551: }
553: viennacl::vector_tuple<PetscScalar> y_tuple(ygpu_array);
554: ViennaCLVector result = viennacl::linalg::inner_prod(*xgpu, y_tuple);
556: viennacl::copy(result.begin(), result.end(), z);
558: for (i=0; i<nv; i++) {
559: VecViennaCLRestoreArrayRead(yyin[i],&ygpu);
560: }
562: ViennaCLWaitForGPU();
563: VecViennaCLRestoreArrayRead(xin,&xgpu);
564: PetscLogFlops(PetscMax(nv*(2.0*n-1),0.0));
565: } else {
566: for (i=0; i<nv; i++) z[i] = 0.0;
567: }
568: return(0);
569: }
571: PetscErrorCode VecMTDot_SeqViennaCL(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
572: {
576: /* Since complex case is not supported at the moment, this is the same as VecMDot_SeqViennaCL */
577: VecMDot_SeqViennaCL(xin,nv,yin,z);
578: ViennaCLWaitForGPU();
579: return(0);
580: }
583: PetscErrorCode VecSet_SeqViennaCL(Vec xin,PetscScalar alpha)
584: {
585: ViennaCLVector *xgpu;
589: if (xin->map->n > 0) {
590: VecViennaCLGetArrayWrite(xin,&xgpu);
591: try {
592: *xgpu = viennacl::scalar_vector<PetscScalar>(xgpu->size(), alpha);
593: ViennaCLWaitForGPU();
594: } catch(std::exception const & ex) {
595: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
596: }
597: VecViennaCLRestoreArrayWrite(xin,&xgpu);
598: }
599: return(0);
600: }
602: PetscErrorCode VecScale_SeqViennaCL(Vec xin, PetscScalar alpha)
603: {
604: ViennaCLVector *xgpu;
608: if (alpha == 0.0 && xin->map->n > 0) {
609: VecSet_SeqViennaCL(xin,alpha);
610: PetscLogFlops(xin->map->n);
611: } else if (alpha != 1.0 && xin->map->n > 0) {
612: VecViennaCLGetArrayReadWrite(xin,&xgpu);
613: try {
614: *xgpu *= alpha;
615: ViennaCLWaitForGPU();
616: } catch(std::exception const & ex) {
617: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
618: }
619: VecViennaCLRestoreArrayReadWrite(xin,&xgpu);
620: PetscLogFlops(xin->map->n);
621: }
622: return(0);
623: }
626: PetscErrorCode VecTDot_SeqViennaCL(Vec xin,Vec yin,PetscScalar *z)
627: {
631: /* Since complex case is not supported at the moment, this is the same as VecDot_SeqViennaCL */
632: VecDot_SeqViennaCL(xin, yin, z);
633: ViennaCLWaitForGPU();
634: return(0);
635: }
638: PetscErrorCode VecCopy_SeqViennaCL(Vec xin,Vec yin)
639: {
640: const ViennaCLVector *xgpu;
641: ViennaCLVector *ygpu;
642: PetscErrorCode ierr;
645: if (xin != yin && xin->map->n > 0) {
646: if (xin->valid_GPU_array == PETSC_OFFLOAD_GPU) {
647: VecViennaCLGetArrayRead(xin,&xgpu);
648: VecViennaCLGetArrayWrite(yin,&ygpu);
649: try {
650: *ygpu = *xgpu;
651: ViennaCLWaitForGPU();
652: } catch(std::exception const & ex) {
653: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
654: }
655: VecViennaCLRestoreArrayRead(xin,&xgpu);
656: VecViennaCLRestoreArrayWrite(yin,&ygpu);
658: } else if (xin->valid_GPU_array == PETSC_OFFLOAD_CPU) {
659: /* copy in CPU if we are on the CPU*/
660: VecCopy_SeqViennaCL_Private(xin,yin);
661: ViennaCLWaitForGPU();
662: } else if (xin->valid_GPU_array == PETSC_OFFLOAD_BOTH) {
663: /* if xin is valid in both places, see where yin is and copy there (because it's probably where we'll want to next use it) */
664: if (yin->valid_GPU_array == PETSC_OFFLOAD_CPU) {
665: /* copy in CPU */
666: VecCopy_SeqViennaCL_Private(xin,yin);
667: ViennaCLWaitForGPU();
668: } else if (yin->valid_GPU_array == PETSC_OFFLOAD_GPU) {
669: /* copy in GPU */
670: VecViennaCLGetArrayRead(xin,&xgpu);
671: VecViennaCLGetArrayWrite(yin,&ygpu);
672: try {
673: *ygpu = *xgpu;
674: ViennaCLWaitForGPU();
675: } catch(std::exception const & ex) {
676: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
677: }
678: VecViennaCLRestoreArrayRead(xin,&xgpu);
679: VecViennaCLRestoreArrayWrite(yin,&ygpu);
680: } else if (yin->valid_GPU_array == PETSC_OFFLOAD_BOTH) {
681: /* xin and yin are both valid in both places (or yin was unallocated before the earlier call to allocatecheck
682: default to copy in GPU (this is an arbitrary choice) */
683: VecViennaCLGetArrayRead(xin,&xgpu);
684: VecViennaCLGetArrayWrite(yin,&ygpu);
685: try {
686: *ygpu = *xgpu;
687: ViennaCLWaitForGPU();
688: } catch(std::exception const & ex) {
689: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
690: }
691: VecViennaCLRestoreArrayRead(xin,&xgpu);
692: VecViennaCLRestoreArrayWrite(yin,&ygpu);
693: } else {
694: VecCopy_SeqViennaCL_Private(xin,yin);
695: ViennaCLWaitForGPU();
696: }
697: }
698: }
699: return(0);
700: }
703: PetscErrorCode VecSwap_SeqViennaCL(Vec xin,Vec yin)
704: {
706: ViennaCLVector *xgpu,*ygpu;
709: if (xin != yin && xin->map->n > 0) {
710: VecViennaCLGetArrayReadWrite(xin,&xgpu);
711: VecViennaCLGetArrayReadWrite(yin,&ygpu);
713: try {
714: viennacl::swap(*xgpu, *ygpu);
715: ViennaCLWaitForGPU();
716: } catch(std::exception const & ex) {
717: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
718: }
719: VecViennaCLRestoreArrayReadWrite(xin,&xgpu);
720: VecViennaCLRestoreArrayReadWrite(yin,&ygpu);
721: }
722: return(0);
723: }
726: // y = alpha * x + beta * y
727: PetscErrorCode VecAXPBY_SeqViennaCL(Vec yin,PetscScalar alpha,PetscScalar beta,Vec xin)
728: {
729: PetscErrorCode ierr;
730: PetscScalar a = alpha,b = beta;
731: const ViennaCLVector *xgpu;
732: ViennaCLVector *ygpu;
735: if (a == 0.0 && xin->map->n > 0) {
736: VecScale_SeqViennaCL(yin,beta);
737: } else if (b == 1.0 && xin->map->n > 0) {
738: VecAXPY_SeqViennaCL(yin,alpha,xin);
739: } else if (a == 1.0 && xin->map->n > 0) {
740: VecAYPX_SeqViennaCL(yin,beta,xin);
741: } else if (b == 0.0 && xin->map->n > 0) {
742: VecViennaCLGetArrayRead(xin,&xgpu);
743: VecViennaCLGetArrayReadWrite(yin,&ygpu);
744: try {
745: *ygpu = *xgpu * alpha;
746: ViennaCLWaitForGPU();
747: } catch(std::exception const & ex) {
748: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
749: }
750: PetscLogFlops(xin->map->n);
751: VecViennaCLRestoreArrayRead(xin,&xgpu);
752: VecViennaCLRestoreArrayReadWrite(yin,&ygpu);
753: } else if (xin->map->n > 0) {
754: VecViennaCLGetArrayRead(xin,&xgpu);
755: VecViennaCLGetArrayReadWrite(yin,&ygpu);
756: try {
757: *ygpu = *xgpu * alpha + *ygpu * beta;
758: ViennaCLWaitForGPU();
759: } catch(std::exception const & ex) {
760: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
761: }
762: VecViennaCLRestoreArrayRead(xin,&xgpu);
763: VecViennaCLRestoreArrayReadWrite(yin,&ygpu);
764: PetscLogFlops(3.0*xin->map->n);
765: }
766: return(0);
767: }
770: /* operation z = alpha * x + beta *y + gamma *z*/
771: PetscErrorCode VecAXPBYPCZ_SeqViennaCL(Vec zin,PetscScalar alpha,PetscScalar beta,PetscScalar gamma,Vec xin,Vec yin)
772: {
773: PetscErrorCode ierr;
774: PetscInt n = zin->map->n;
775: const ViennaCLVector *xgpu,*ygpu;
776: ViennaCLVector *zgpu;
779: VecViennaCLGetArrayRead(xin,&xgpu);
780: VecViennaCLGetArrayRead(yin,&ygpu);
781: VecViennaCLGetArrayReadWrite(zin,&zgpu);
782: if (alpha == 0.0 && xin->map->n > 0) {
783: try {
784: if (beta == 0.0) {
785: *zgpu = gamma * *zgpu;
786: ViennaCLWaitForGPU();
787: PetscLogFlops(1.0*n);
788: } else if (gamma == 0.0) {
789: *zgpu = beta * *ygpu;
790: ViennaCLWaitForGPU();
791: PetscLogFlops(1.0*n);
792: } else {
793: *zgpu = beta * *ygpu + gamma * *zgpu;
794: ViennaCLWaitForGPU();
795: PetscLogFlops(3.0*n);
796: }
797: } catch(std::exception const & ex) {
798: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
799: }
800: PetscLogFlops(3.0*n);
801: } else if (beta == 0.0 && xin->map->n > 0) {
802: try {
803: if (gamma == 0.0) {
804: *zgpu = alpha * *xgpu;
805: ViennaCLWaitForGPU();
806: PetscLogFlops(1.0*n);
807: } else {
808: *zgpu = alpha * *xgpu + gamma * *zgpu;
809: ViennaCLWaitForGPU();
810: PetscLogFlops(3.0*n);
811: }
812: } catch(std::exception const & ex) {
813: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
814: }
815: } else if (gamma == 0.0 && xin->map->n > 0) {
816: try {
817: *zgpu = alpha * *xgpu + beta * *ygpu;
818: ViennaCLWaitForGPU();
819: } catch(std::exception const & ex) {
820: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
821: }
822: PetscLogFlops(3.0*n);
823: } else if (xin->map->n > 0) {
824: try {
825: /* Split operation into two steps. This is not completely ideal, but avoids temporaries (which are far worse) */
826: if (gamma != 1.0)
827: *zgpu *= gamma;
828: *zgpu += alpha * *xgpu + beta * *ygpu;
829: ViennaCLWaitForGPU();
830: } catch(std::exception const & ex) {
831: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
832: }
833: VecViennaCLRestoreArrayReadWrite(zin,&zgpu);
834: VecViennaCLRestoreArrayRead(xin,&xgpu);
835: VecViennaCLRestoreArrayRead(yin,&ygpu);
836: PetscLogFlops(5.0*n);
837: }
838: return(0);
839: }
841: PetscErrorCode VecPointwiseMult_SeqViennaCL(Vec win,Vec xin,Vec yin)
842: {
843: PetscErrorCode ierr;
844: PetscInt n = win->map->n;
845: const ViennaCLVector *xgpu,*ygpu;
846: ViennaCLVector *wgpu;
849: if (xin->map->n > 0) {
850: VecViennaCLGetArrayRead(xin,&xgpu);
851: VecViennaCLGetArrayRead(yin,&ygpu);
852: VecViennaCLGetArrayReadWrite(win,&wgpu);
853: try {
854: *wgpu = viennacl::linalg::element_prod(*xgpu, *ygpu);
855: ViennaCLWaitForGPU();
856: } catch(std::exception const & ex) {
857: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
858: }
859: VecViennaCLRestoreArrayRead(xin,&xgpu);
860: VecViennaCLRestoreArrayRead(yin,&ygpu);
861: VecViennaCLRestoreArrayReadWrite(win,&wgpu);
862: PetscLogFlops(n);
863: }
864: return(0);
865: }
868: PetscErrorCode VecNorm_SeqViennaCL(Vec xin,NormType type,PetscReal *z)
869: {
870: PetscErrorCode ierr;
871: PetscInt n = xin->map->n;
872: PetscBLASInt bn;
873: const ViennaCLVector *xgpu;
876: if (xin->map->n > 0) {
877: PetscBLASIntCast(n,&bn);
878: VecViennaCLGetArrayRead(xin,&xgpu);
879: if (type == NORM_2 || type == NORM_FROBENIUS) {
880: try {
881: *z = viennacl::linalg::norm_2(*xgpu);
882: ViennaCLWaitForGPU();
883: } catch(std::exception const & ex) {
884: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
885: }
886: PetscLogFlops(PetscMax(2.0*n-1,0.0));
887: } else if (type == NORM_INFINITY) {
888: VecViennaCLGetArrayRead(xin,&xgpu);
889: try {
890: *z = viennacl::linalg::norm_inf(*xgpu);
891: ViennaCLWaitForGPU();
892: } catch(std::exception const & ex) {
893: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
894: }
895: VecViennaCLRestoreArrayRead(xin,&xgpu);
896: } else if (type == NORM_1) {
897: try {
898: *z = viennacl::linalg::norm_1(*xgpu);
899: ViennaCLWaitForGPU();
900: } catch(std::exception const & ex) {
901: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
902: }
903: PetscLogFlops(PetscMax(n-1.0,0.0));
904: } else if (type == NORM_1_AND_2) {
905: try {
906: *z = viennacl::linalg::norm_1(*xgpu);
907: *(z+1) = viennacl::linalg::norm_2(*xgpu);
908: ViennaCLWaitForGPU();
909: } catch(std::exception const & ex) {
910: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
911: }
912: PetscLogFlops(PetscMax(2.0*n-1,0.0));
913: PetscLogFlops(PetscMax(n-1.0,0.0));
914: }
915: VecViennaCLRestoreArrayRead(xin,&xgpu);
916: } else if (type == NORM_1_AND_2) {
917: *z = 0.0;
918: *(z+1) = 0.0;
919: } else *z = 0.0;
920: return(0);
921: }
924: PetscErrorCode VecSetRandom_SeqViennaCL(Vec xin,PetscRandom r)
925: {
929: VecSetRandom_SeqViennaCL_Private(xin,r);
930: xin->valid_GPU_array = PETSC_OFFLOAD_CPU;
931: return(0);
932: }
934: PetscErrorCode VecResetArray_SeqViennaCL(Vec vin)
935: {
940: VecViennaCLCopyFromGPU(vin);
941: VecResetArray_SeqViennaCL_Private(vin);
942: vin->valid_GPU_array = PETSC_OFFLOAD_CPU;
943: return(0);
944: }
946: PetscErrorCode VecPlaceArray_SeqViennaCL(Vec vin,const PetscScalar *a)
947: {
952: VecViennaCLCopyFromGPU(vin);
953: VecPlaceArray_Seq(vin,a);
954: vin->valid_GPU_array = PETSC_OFFLOAD_CPU;
955: return(0);
956: }
958: PetscErrorCode VecReplaceArray_SeqViennaCL(Vec vin,const PetscScalar *a)
959: {
964: VecViennaCLCopyFromGPU(vin);
965: VecReplaceArray_Seq(vin,a);
966: vin->valid_GPU_array = PETSC_OFFLOAD_CPU;
967: return(0);
968: }
971: /*@
972: VecCreateSeqViennaCL - Creates a standard, sequential array-style vector.
974: Collective on MPI_Comm
976: Input Parameter:
977: + comm - the communicator, should be PETSC_COMM_SELF
978: - n - the vector length
980: Output Parameter:
981: . V - the vector
983: Notes:
984: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
985: same type as an existing vector.
987: Level: intermediate
989: Concepts: vectors^creating sequential
991: .seealso: VecCreateMPI(), VecCreate(), VecDuplicate(), VecDuplicateVecs(), VecCreateGhost()
992: @*/
993: PetscErrorCode VecCreateSeqViennaCL(MPI_Comm comm,PetscInt n,Vec *v)
994: {
998: VecCreate(comm,v);
999: VecSetSizes(*v,n,n);
1000: VecSetType(*v,VECSEQVIENNACL);
1001: return(0);
1002: }
1005: /* VecDotNorm2 - computes the inner product of two vectors and the 2-norm squared of the second vector
1006: *
1007: * Simply reuses VecDot() and VecNorm(). Performance improvement through custom kernel (kernel generator) possible.
1008: */
1009: PetscErrorCode VecDotNorm2_SeqViennaCL(Vec s, Vec t, PetscScalar *dp, PetscScalar *nm)
1010: {
1011: PetscErrorCode ierr;
1014: VecDot_SeqViennaCL(s,t,dp);
1015: VecNorm_SeqViennaCL(t,NORM_2,nm);
1016: *nm *= *nm; //squared norm required
1017: return(0);
1018: }
1020: PetscErrorCode VecDuplicate_SeqViennaCL(Vec win,Vec *V)
1021: {
1025: VecCreateSeqViennaCL(PetscObjectComm((PetscObject)win),win->map->n,V);
1026: PetscLayoutReference(win->map,&(*V)->map);
1027: PetscObjectListDuplicate(((PetscObject)win)->olist,&((PetscObject)(*V))->olist);
1028: PetscFunctionListDuplicate(((PetscObject)win)->qlist,&((PetscObject)(*V))->qlist);
1029: (*V)->stash.ignorenegidx = win->stash.ignorenegidx;
1030: return(0);
1031: }
1033: PetscErrorCode VecDestroy_SeqViennaCL(Vec v)
1034: {
1038: try {
1039: if (v->spptr) {
1040: delete ((Vec_ViennaCL*)v->spptr)->GPUarray;
1041: delete (Vec_ViennaCL*) v->spptr;
1042: }
1043: } catch(char *ex) {
1044: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
1045: }
1046: VecDestroy_SeqViennaCL_Private(v);
1047: return(0);
1048: }
1051: PETSC_EXTERN PetscErrorCode VecCreate_SeqViennaCL(Vec V)
1052: {
1054: PetscMPIInt size;
1057: MPI_Comm_size(PetscObjectComm((PetscObject)V),&size);
1058: if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQVIENNACL on more than one process");
1059: VecCreate_Seq_Private(V,0);
1060: PetscObjectChangeTypeName((PetscObject)V,VECSEQVIENNACL);
1062: V->ops->dot = VecDot_SeqViennaCL;
1063: V->ops->norm = VecNorm_SeqViennaCL;
1064: V->ops->tdot = VecTDot_SeqViennaCL;
1065: V->ops->scale = VecScale_SeqViennaCL;
1066: V->ops->copy = VecCopy_SeqViennaCL;
1067: V->ops->set = VecSet_SeqViennaCL;
1068: V->ops->swap = VecSwap_SeqViennaCL;
1069: V->ops->axpy = VecAXPY_SeqViennaCL;
1070: V->ops->axpby = VecAXPBY_SeqViennaCL;
1071: V->ops->axpbypcz = VecAXPBYPCZ_SeqViennaCL;
1072: V->ops->pointwisemult = VecPointwiseMult_SeqViennaCL;
1073: V->ops->pointwisedivide = VecPointwiseDivide_SeqViennaCL;
1074: V->ops->setrandom = VecSetRandom_SeqViennaCL;
1075: V->ops->dot_local = VecDot_SeqViennaCL;
1076: V->ops->tdot_local = VecTDot_SeqViennaCL;
1077: V->ops->norm_local = VecNorm_SeqViennaCL;
1078: V->ops->mdot_local = VecMDot_SeqViennaCL;
1079: V->ops->mtdot_local = VecMTDot_SeqViennaCL;
1080: V->ops->maxpy = VecMAXPY_SeqViennaCL;
1081: V->ops->mdot = VecMDot_SeqViennaCL;
1082: V->ops->mtdot = VecMTDot_SeqViennaCL;
1083: V->ops->aypx = VecAYPX_SeqViennaCL;
1084: V->ops->waxpy = VecWAXPY_SeqViennaCL;
1085: V->ops->dotnorm2 = VecDotNorm2_SeqViennaCL;
1086: V->ops->placearray = VecPlaceArray_SeqViennaCL;
1087: V->ops->replacearray = VecReplaceArray_SeqViennaCL;
1088: V->ops->resetarray = VecResetArray_SeqViennaCL;
1089: V->ops->destroy = VecDestroy_SeqViennaCL;
1090: V->ops->duplicate = VecDuplicate_SeqViennaCL;
1092: VecViennaCLAllocateCheck(V);
1093: V->valid_GPU_array = PETSC_OFFLOAD_GPU;
1094: VecSet(V,0.0);
1095: return(0);
1096: }