Actual source code: bvec1.c
petsc-3.11.4 2019-09-28
2: /*
3: Defines the BLAS based vector operations. Code shared by parallel
4: and sequential vectors.
5: */
7: #include <../src/vec/vec/impls/dvecimpl.h>
8: #include <petscblaslapack.h>
10: PetscErrorCode VecDot_Seq(Vec xin,Vec yin,PetscScalar *z)
11: {
12: const PetscScalar *ya,*xa;
13: PetscBLASInt one = 1,bn;
14: PetscErrorCode ierr;
17: PetscBLASIntCast(xin->map->n,&bn);
18: VecGetArrayRead(xin,&xa);
19: VecGetArrayRead(yin,&ya);
20: /* arguments ya, xa are reversed because BLAS complex conjugates the first argument, PETSc the second */
21: PetscStackCallBLAS("BLASdot",*z = BLASdot_(&bn,ya,&one,xa,&one));
22: VecRestoreArrayRead(xin,&xa);
23: VecRestoreArrayRead(yin,&ya);
24: if (xin->map->n > 0) {
25: PetscLogFlops(2.0*xin->map->n-1);
26: }
27: return(0);
28: }
30: PetscErrorCode VecTDot_Seq(Vec xin,Vec yin,PetscScalar *z)
31: {
32: const PetscScalar *ya,*xa;
33: PetscBLASInt one = 1,bn;
34: PetscErrorCode ierr;
37: PetscBLASIntCast(xin->map->n,&bn);
38: VecGetArrayRead(xin,&xa);
39: VecGetArrayRead(yin,&ya);
40: PetscStackCallBLAS("BLASdot",*z = BLASdotu_(&bn,xa,&one,ya,&one));
41: VecRestoreArrayRead(xin,&xa);
42: VecRestoreArrayRead(yin,&ya);
43: if (xin->map->n > 0) {
44: PetscLogFlops(2.0*xin->map->n-1);
45: }
46: return(0);
47: }
49: PetscErrorCode VecScale_Seq(Vec xin, PetscScalar alpha)
50: {
52: PetscBLASInt one = 1,bn;
55: PetscBLASIntCast(xin->map->n,&bn);
56: if (alpha == (PetscScalar)0.0) {
57: VecSet_Seq(xin,alpha);
58: } else if (alpha != (PetscScalar)1.0) {
59: PetscScalar a = alpha,*xarray;
60: VecGetArray(xin,&xarray);
61: PetscStackCallBLAS("BLASscal",BLASscal_(&bn,&a,xarray,&one));
62: VecRestoreArray(xin,&xarray);
63: }
64: PetscLogFlops(xin->map->n);
65: return(0);
66: }
68: PetscErrorCode VecAXPY_Seq(Vec yin,PetscScalar alpha,Vec xin)
69: {
70: PetscErrorCode ierr;
71: const PetscScalar *xarray;
72: PetscScalar *yarray;
73: PetscBLASInt one = 1,bn;
76: PetscBLASIntCast(yin->map->n,&bn);
77: /* assume that the BLAS handles alpha == 1.0 efficiently since we have no fast code for it */
78: if (alpha != (PetscScalar)0.0) {
79: VecGetArrayRead(xin,&xarray);
80: VecGetArray(yin,&yarray);
81: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bn,&alpha,xarray,&one,yarray,&one));
82: VecRestoreArrayRead(xin,&xarray);
83: VecRestoreArray(yin,&yarray);
84: PetscLogFlops(2.0*yin->map->n);
85: }
86: return(0);
87: }
89: PetscErrorCode VecAXPBY_Seq(Vec yin,PetscScalar alpha,PetscScalar beta,Vec xin)
90: {
91: PetscErrorCode ierr;
92: PetscInt n = yin->map->n,i;
93: const PetscScalar *xx;
94: PetscScalar *yy,a = alpha,b = beta;
97: if (a == (PetscScalar)0.0) {
98: VecScale_Seq(yin,beta);
99: } else if (b == (PetscScalar)1.0) {
100: VecAXPY_Seq(yin,alpha,xin);
101: } else if (a == (PetscScalar)1.0) {
102: VecAYPX_Seq(yin,beta,xin);
103: } else if (b == (PetscScalar)0.0) {
104: VecGetArrayRead(xin,&xx);
105: VecGetArray(yin,(PetscScalar**)&yy);
107: for (i=0; i<n; i++) yy[i] = a*xx[i];
109: VecRestoreArrayRead(xin,&xx);
110: VecRestoreArray(yin,(PetscScalar**)&yy);
111: PetscLogFlops(xin->map->n);
112: } else {
113: VecGetArrayRead(xin,&xx);
114: VecGetArray(yin,(PetscScalar**)&yy);
116: for (i=0; i<n; i++) yy[i] = a*xx[i] + b*yy[i];
118: VecRestoreArrayRead(xin,&xx);
119: VecRestoreArray(yin,(PetscScalar**)&yy);
120: PetscLogFlops(3.0*xin->map->n);
121: }
122: return(0);
123: }
125: PetscErrorCode VecAXPBYPCZ_Seq(Vec zin,PetscScalar alpha,PetscScalar beta,PetscScalar gamma,Vec xin,Vec yin)
126: {
127: PetscErrorCode ierr;
128: PetscInt n = zin->map->n,i;
129: const PetscScalar *yy,*xx;
130: PetscScalar *zz;
133: VecGetArrayRead(xin,&xx);
134: VecGetArrayRead(yin,&yy);
135: VecGetArray(zin,&zz);
136: if (alpha == (PetscScalar)1.0) {
137: for (i=0; i<n; i++) zz[i] = xx[i] + beta*yy[i] + gamma*zz[i];
138: PetscLogFlops(4.0*n);
139: } else if (gamma == (PetscScalar)1.0) {
140: for (i=0; i<n; i++) zz[i] = alpha*xx[i] + beta*yy[i] + zz[i];
141: PetscLogFlops(4.0*n);
142: } else if (gamma == (PetscScalar)0.0) {
143: for (i=0; i<n; i++) zz[i] = alpha*xx[i] + beta*yy[i];
144: PetscLogFlops(3.0*n);
145: } else {
146: for (i=0; i<n; i++) zz[i] = alpha*xx[i] + beta*yy[i] + gamma*zz[i];
147: PetscLogFlops(5.0*n);
148: }
149: VecRestoreArrayRead(xin,&xx);
150: VecRestoreArrayRead(yin,&yy);
151: VecRestoreArray(zin,&zz);
152: return(0);
153: }