Actual source code: bvec1.c

petsc-3.3-p6 2013-02-11
  2: /*
  3:    Defines the BLAS based vector operations. Code shared by parallel
  4:   and sequential vectors.
  5: */

  7: #include <petsc-private/vecimpl.h> 
  8: #include <../src/vec/vec/impls/dvecimpl.h> 
  9: #include <petscblaslapack.h>
 10: #include <petscthreadcomm.h>

 12: #if defined(PETSC_THREADCOMM_ACTIVE)
 13: PetscErrorCode VecDot_kernel(PetscInt thread_id,Vec xin,Vec yin,PetscThreadCommRedCtx red)
 14: {
 15:   PetscErrorCode       ierr;
 16:   PetscInt             *trstarts=xin->map->trstarts;
 17:   PetscInt             start,end,n;
 18:   PetscBLASInt         one = 1,bn;
 19:   const PetscScalar    *xa,*ya;
 20:   PetscScalar          z_loc;

 22:   VecGetArrayRead(xin,&xa);
 23:   VecGetArrayRead(yin,&ya);
 24:   start = trstarts[thread_id];
 25:   end   = trstarts[thread_id+1];
 26:   n     = end-start;
 27:   bn = PetscBLASIntCast(n);
 28:   /* arguments ya, xa are reversed because BLAS complex conjugates the first argument, PETSc the second */
 29:   z_loc = BLASdot_(&bn,ya+start,&one,xa+start,&one);

 31:   PetscThreadReductionKernelBegin(thread_id,red,(void*)&z_loc);

 33:   VecRestoreArrayRead(xin,&xa);
 34:   VecRestoreArrayRead(yin,&ya);
 35:   return 0;
 36: }

 40: PetscErrorCode VecDot_Seq(Vec xin,Vec yin,PetscScalar *z)
 41: {
 42:   PetscErrorCode        ierr;
 43:   PetscThreadCommRedCtx red;

 46:   PetscThreadReductionBegin(((PetscObject)xin)->comm,THREADCOMM_SUM,PETSC_SCALAR,&red);
 47:   PetscThreadCommRunKernel(((PetscObject)xin)->comm,(PetscThreadKernel)VecDot_kernel,3,xin,yin,red);
 48:   PetscThreadReductionEnd(red,z);
 49:   if (xin->map->n > 0) {
 50:     PetscLogFlops(2.0*xin->map->n-1);
 51:   }
 52:   return(0);
 53: }
 54: #else
 57: PetscErrorCode VecDot_Seq(Vec xin,Vec yin,PetscScalar *z)
 58: {
 59:   const PetscScalar *ya,*xa;
 60:   PetscBLASInt      one = 1,bn;
 61:   PetscErrorCode    ierr;

 64:   bn = PetscBLASIntCast(xin->map->n);
 65:   VecGetArrayRead(xin,&xa);
 66:   VecGetArrayRead(yin,&ya);
 67:   /* arguments ya, xa are reversed because BLAS complex conjugates the first argument, PETSc the second */
 68:   *z = BLASdot_(&bn,ya,&one,xa,&one);
 69:   VecRestoreArrayRead(xin,&xa);
 70:   VecRestoreArrayRead(yin,&ya);
 71:   if (xin->map->n > 0) {
 72:     PetscLogFlops(2.0*xin->map->n-1);
 73:   }
 74:   return(0);
 75: }
 76: #endif

 80: PetscErrorCode VecTDot_Seq(Vec xin,Vec yin,PetscScalar *z)
 81: {
 82:   const PetscScalar *ya,*xa;
 83:   PetscBLASInt      one = 1,bn;
 84:   PetscErrorCode    ierr;

 87:   bn = PetscBLASIntCast(xin->map->n);
 88:   VecGetArrayRead(xin,&xa);
 89:   VecGetArrayRead(yin,&ya);
 90:   *z = BLASdotu_(&bn,xa,&one,ya,&one);
 91:   VecRestoreArrayRead(xin,&xa);
 92:   VecRestoreArrayRead(yin,&ya);
 93:   if (xin->map->n > 0) {
 94:     PetscLogFlops(2.0*xin->map->n-1);
 95:   }
 96:   return(0);
 97: }

101: PetscErrorCode VecScale_Seq(Vec xin, PetscScalar alpha)
102: {
104:   PetscBLASInt   one = 1,bn = PetscBLASIntCast(xin->map->n);

107:   if (alpha == (PetscScalar)0.0) {
108:     VecSet_Seq(xin,alpha);
109:   } else if (alpha != (PetscScalar)1.0) {
110:     PetscScalar a = alpha,*xarray;
111:     VecGetArray(xin,&xarray);
112:     BLASscal_(&bn,&a,xarray,&one);
113:     VecRestoreArray(xin,&xarray);
114:   }
115:   PetscLogFlops(xin->map->n);
116:   return(0);
117: }

119: #if defined(PETSC_THREADCOMM_ACTIVE)
120: PetscErrorCode VecAXPY_kernel(PetscInt thread_id,Vec yin,PetscScalar *alpha_p,Vec xin)
121: {
122:   PetscErrorCode    ierr;
123:   const PetscScalar *xarray;
124:   PetscScalar       *yarray;
125:   PetscBLASInt      one=1,bn;
126:   PetscInt          *trstarts=yin->map->trstarts,start,end,n;
127:   PetscScalar       alpha = *alpha_p;

129:   start = trstarts[thread_id];
130:   end   = trstarts[thread_id+1];
131:   n     = end - start;
132:   bn = PetscBLASIntCast(n);
133:   VecGetArrayRead(xin,&xarray);
134:   VecGetArray(yin,&yarray);
135:   BLASaxpy_(&bn,&alpha,xarray+start,&one,yarray+start,&one);
136:   VecRestoreArrayRead(xin,&xarray);
137:   VecRestoreArray(yin,&yarray);

139:   return 0;
140: }
143: PetscErrorCode VecAXPY_Seq(Vec yin,PetscScalar alpha,Vec xin)
144: {
145:   PetscErrorCode    ierr;

148:   /* assume that the BLAS handles alpha == 1.0 efficiently since we have no fast code for it */
149:   if (alpha != (PetscScalar)0.0) {
150:     PetscScalar *scalar;
151:     PetscThreadCommGetScalars(((PetscObject)yin)->comm,&scalar,PETSC_NULL,PETSC_NULL);
152:     *scalar = alpha;
153:     PetscThreadCommRunKernel(((PetscObject)yin)->comm,(PetscThreadKernel)VecAXPY_kernel,3,yin,scalar,xin);
154:     PetscLogFlops(2.0*yin->map->n);
155:   }
156:   return(0);
157: }
158: #else
161: PetscErrorCode VecAXPY_Seq(Vec yin,PetscScalar alpha,Vec xin)
162: {
163:   PetscErrorCode    ierr;
164:   const PetscScalar *xarray;
165:   PetscScalar       *yarray;
166:   PetscBLASInt      one = 1,bn = PetscBLASIntCast(yin->map->n);

169:   /* assume that the BLAS handles alpha == 1.0 efficiently since we have no fast code for it */
170:   if (alpha != (PetscScalar)0.0) {
171:     VecGetArrayRead(xin,&xarray);
172:     VecGetArray(yin,&yarray);
173:     BLASaxpy_(&bn,&alpha,xarray,&one,yarray,&one);
174:     VecRestoreArrayRead(xin,&xarray);
175:     VecRestoreArray(yin,&yarray);
176:     PetscLogFlops(2.0*yin->map->n);
177:   }
178:   return(0);
179: }
180: #endif

184: PetscErrorCode VecAXPBY_Seq(Vec yin,PetscScalar alpha,PetscScalar beta,Vec xin)
185: {
186:   PetscErrorCode    ierr;
187:   PetscInt          n = yin->map->n,i;
188:   const PetscScalar *xx;
189:   PetscScalar       *yy,a = alpha,b = beta;

192:   if (a == (PetscScalar)0.0) {
193:     VecScale_Seq(yin,beta);
194:   } else if (b == (PetscScalar)1.0) {
195:     VecAXPY_Seq(yin,alpha,xin);
196:   } else if (a == (PetscScalar)1.0) {
197:     VecAYPX_Seq(yin,beta,xin);
198:   } else if (b == (PetscScalar)0.0) {
199:     VecGetArrayRead(xin,&xx);
200:     VecGetArray(yin,(PetscScalar**)&yy);
201:     for (i=0; i<n; i++) {
202:       yy[i] = a*xx[i];
203:     }
204:     VecRestoreArrayRead(xin,&xx);
205:     VecRestoreArray(yin,(PetscScalar**)&yy);
206:     PetscLogFlops(xin->map->n);
207:   } else {
208:     VecGetArrayRead(xin,&xx);
209:     VecGetArray(yin,(PetscScalar**)&yy);
210:     for (i=0; i<n; i++) {
211:       yy[i] = a*xx[i] + b*yy[i];
212:     }
213:     VecRestoreArrayRead(xin,&xx);
214:     VecRestoreArray(yin,(PetscScalar**)&yy);
215:     PetscLogFlops(3.0*xin->map->n);
216:   }
217:   return(0);
218: }

222: PetscErrorCode VecAXPBYPCZ_Seq(Vec zin,PetscScalar alpha,PetscScalar beta,PetscScalar gamma,Vec xin,Vec yin)
223: {
224:   PetscErrorCode     ierr;
225:   PetscInt           n = zin->map->n,i;
226:   const PetscScalar  *yy,*xx;
227:   PetscScalar        *zz;

230:   VecGetArrayRead(xin,&xx);
231:   VecGetArrayRead(yin,&yy);
232:   VecGetArray(zin,&zz);
233:   if (alpha == (PetscScalar)1.0) {
234:     for (i=0; i<n; i++) {
235:       zz[i] = xx[i] + beta*yy[i] + gamma*zz[i];
236:     }
237:     PetscLogFlops(4.0*n);
238:   } else if (gamma == (PetscScalar)1.0) {
239:     for (i=0; i<n; i++) {
240:       zz[i] = alpha*xx[i] + beta*yy[i] + zz[i];
241:     }
242:     PetscLogFlops(4.0*n);
243:   } else if (gamma == (PetscScalar)0.0) {
244:     for (i=0; i<n; i++) {
245:       zz[i] = alpha*xx[i] + beta*yy[i];
246:     }
247:     PetscLogFlops(3.0*n);
248:   } else {
249:     for (i=0; i<n; i++) {
250:       zz[i] = alpha*xx[i] + beta*yy[i] + gamma*zz[i];
251:     }
252:     PetscLogFlops(5.0*n);
253:   }
254:   VecRestoreArrayRead(xin,&xx);
255:   VecRestoreArrayRead(yin,&yy);
256:   VecRestoreArray(zin,&zz);
257:   return(0);
258: }