Actual source code: dvec2.c

petsc-3.9.4 2018-09-11
Report Typos and Errors

  2: /*
  3:    Defines some vector operation functions that are shared by
  4:    sequential and parallel vectors.
  5: */
  6:  #include <../src/vec/vec/impls/dvecimpl.h>
  7:  #include <petsc/private/kernels/petscaxpy.h>



 11: #if defined(PETSC_USE_FORTRAN_KERNEL_MDOT)
 12: #include <../src/vec/vec/impls/seq/ftn-kernels/fmdot.h>
 13: PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
 14: {
 15:   PetscErrorCode    ierr;
 16:   PetscInt          i,nv_rem,n = xin->map->n;
 17:   PetscScalar       sum0,sum1,sum2,sum3;
 18:   const PetscScalar *yy0,*yy1,*yy2,*yy3,*x;
 19:   Vec               *yy;

 22:   sum0 = 0.0;
 23:   sum1 = 0.0;
 24:   sum2 = 0.0;

 26:   i      = nv;
 27:   nv_rem = nv&0x3;
 28:   yy     = (Vec*)yin;
 29:   VecGetArrayRead(xin,&x);

 31:   switch (nv_rem) {
 32:   case 3:
 33:     VecGetArrayRead(yy[0],&yy0);
 34:     VecGetArrayRead(yy[1],&yy1);
 35:     VecGetArrayRead(yy[2],&yy2);
 36:     fortranmdot3_(x,yy0,yy1,yy2,&n,&sum0,&sum1,&sum2);
 37:     VecRestoreArrayRead(yy[0],&yy0);
 38:     VecRestoreArrayRead(yy[1],&yy1);
 39:     VecRestoreArrayRead(yy[2],&yy2);
 40:     z[0] = sum0;
 41:     z[1] = sum1;
 42:     z[2] = sum2;
 43:     break;
 44:   case 2:
 45:     VecGetArrayRead(yy[0],&yy0);
 46:     VecGetArrayRead(yy[1],&yy1);
 47:     fortranmdot2_(x,yy0,yy1,&n,&sum0,&sum1);
 48:     VecRestoreArrayRead(yy[0],&yy0);
 49:     VecRestoreArrayRead(yy[1],&yy1);
 50:     z[0] = sum0;
 51:     z[1] = sum1;
 52:     break;
 53:   case 1:
 54:     VecGetArrayRead(yy[0],&yy0);
 55:     fortranmdot1_(x,yy0,&n,&sum0);
 56:     VecRestoreArrayRead(yy[0],&yy0);
 57:     z[0] = sum0;
 58:     break;
 59:   case 0:
 60:     break;
 61:   }
 62:   z  += nv_rem;
 63:   i  -= nv_rem;
 64:   yy += nv_rem;

 66:   while (i >0) {
 67:     sum0 = 0.;
 68:     sum1 = 0.;
 69:     sum2 = 0.;
 70:     sum3 = 0.;
 71:     VecGetArrayRead(yy[0],&yy0);
 72:     VecGetArrayRead(yy[1],&yy1);
 73:     VecGetArrayRead(yy[2],&yy2);
 74:     VecGetArrayRead(yy[3],&yy3);
 75:     fortranmdot4_(x,yy0,yy1,yy2,yy3,&n,&sum0,&sum1,&sum2,&sum3);
 76:     VecRestoreArrayRead(yy[0],&yy0);
 77:     VecRestoreArrayRead(yy[1],&yy1);
 78:     VecRestoreArrayRead(yy[2],&yy2);
 79:     VecRestoreArrayRead(yy[3],&yy3);
 80:     yy  += 4;
 81:     z[0] = sum0;
 82:     z[1] = sum1;
 83:     z[2] = sum2;
 84:     z[3] = sum3;
 85:     z   += 4;
 86:     i   -= 4;
 87:   }
 88:   VecRestoreArrayRead(xin,&x);
 89:   PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
 90:   return(0);
 91: }

 93: #else
 94: PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
 95: {
 96:   PetscErrorCode    ierr;
 97:   PetscInt          n = xin->map->n,i,j,nv_rem,j_rem;
 98:   PetscScalar       sum0,sum1,sum2,sum3,x0,x1,x2,x3;
 99:   const PetscScalar *yy0,*yy1,*yy2,*yy3,*x,*xbase;
100:   Vec               *yy;

103:   sum0 = 0.;
104:   sum1 = 0.;
105:   sum2 = 0.;

107:   i      = nv;
108:   nv_rem = nv&0x3;
109:   yy     = (Vec*)yin;
110:   j      = n;
111:   VecGetArrayRead(xin,&xbase);
112:   x      = xbase;

114:   switch (nv_rem) {
115:   case 3:
116:     VecGetArrayRead(yy[0],&yy0);
117:     VecGetArrayRead(yy[1],&yy1);
118:     VecGetArrayRead(yy[2],&yy2);
119:     switch (j_rem=j&0x3) {
120:     case 3:
121:       x2    = x[2];
122:       sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
123:       sum2 += x2*PetscConj(yy2[2]);
124:     case 2:
125:       x1    = x[1];
126:       sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
127:       sum2 += x1*PetscConj(yy2[1]);
128:     case 1:
129:       x0    = x[0];
130:       sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
131:       sum2 += x0*PetscConj(yy2[0]);
132:     case 0:
133:       x   += j_rem;
134:       yy0 += j_rem;
135:       yy1 += j_rem;
136:       yy2 += j_rem;
137:       j   -= j_rem;
138:       break;
139:     }
140:     while (j>0) {
141:       x0 = x[0];
142:       x1 = x[1];
143:       x2 = x[2];
144:       x3 = x[3];
145:       x += 4;

147:       sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
148:       sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
149:       sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
150:       j    -= 4;
151:     }
152:     z[0] = sum0;
153:     z[1] = sum1;
154:     z[2] = sum2;
155:     VecRestoreArrayRead(yy[0],&yy0);
156:     VecRestoreArrayRead(yy[1],&yy1);
157:     VecRestoreArrayRead(yy[2],&yy2);
158:     break;
159:   case 2:
160:     VecGetArrayRead(yy[0],&yy0);
161:     VecGetArrayRead(yy[1],&yy1);
162:     switch (j_rem=j&0x3) {
163:     case 3:
164:       x2    = x[2];
165:       sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
166:     case 2:
167:       x1    = x[1];
168:       sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
169:     case 1:
170:       x0    = x[0];
171:       sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
172:     case 0:
173:       x   += j_rem;
174:       yy0 += j_rem;
175:       yy1 += j_rem;
176:       j   -= j_rem;
177:       break;
178:     }
179:     while (j>0) {
180:       x0 = x[0];
181:       x1 = x[1];
182:       x2 = x[2];
183:       x3 = x[3];
184:       x += 4;

186:       sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
187:       sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
188:       j    -= 4;
189:     }
190:     z[0] = sum0;
191:     z[1] = sum1;

193:     VecRestoreArrayRead(yy[0],&yy0);
194:     VecRestoreArrayRead(yy[1],&yy1);
195:     break;
196:   case 1:
197:     VecGetArrayRead(yy[0],&yy0);
198:     switch (j_rem=j&0x3) {
199:     case 3:
200:       x2 = x[2]; sum0 += x2*PetscConj(yy0[2]);
201:     case 2:
202:       x1 = x[1]; sum0 += x1*PetscConj(yy0[1]);
203:     case 1:
204:       x0 = x[0]; sum0 += x0*PetscConj(yy0[0]);
205:     case 0:
206:       x   += j_rem;
207:       yy0 += j_rem;
208:       j   -= j_rem;
209:       break;
210:     }
211:     while (j>0) {
212:       sum0 += x[0]*PetscConj(yy0[0]) + x[1]*PetscConj(yy0[1])
213:             + x[2]*PetscConj(yy0[2]) + x[3]*PetscConj(yy0[3]);
214:       yy0  +=4;
215:       j    -= 4; x+=4;
216:     }
217:     z[0] = sum0;

219:     VecRestoreArrayRead(yy[0],&yy0);
220:     break;
221:   case 0:
222:     break;
223:   }
224:   z  += nv_rem;
225:   i  -= nv_rem;
226:   yy += nv_rem;

228:   while (i >0) {
229:     sum0 = 0.;
230:     sum1 = 0.;
231:     sum2 = 0.;
232:     sum3 = 0.;
233:     VecGetArrayRead(yy[0],&yy0);
234:     VecGetArrayRead(yy[1],&yy1);
235:     VecGetArrayRead(yy[2],&yy2);
236:     VecGetArrayRead(yy[3],&yy3);

238:     j = n;
239:     x = xbase;
240:     switch (j_rem=j&0x3) {
241:     case 3:
242:       x2    = x[2];
243:       sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
244:       sum2 += x2*PetscConj(yy2[2]); sum3 += x2*PetscConj(yy3[2]);
245:     case 2:
246:       x1    = x[1];
247:       sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
248:       sum2 += x1*PetscConj(yy2[1]); sum3 += x1*PetscConj(yy3[1]);
249:     case 1:
250:       x0    = x[0];
251:       sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
252:       sum2 += x0*PetscConj(yy2[0]); sum3 += x0*PetscConj(yy3[0]);
253:     case 0:
254:       x   += j_rem;
255:       yy0 += j_rem;
256:       yy1 += j_rem;
257:       yy2 += j_rem;
258:       yy3 += j_rem;
259:       j   -= j_rem;
260:       break;
261:     }
262:     while (j>0) {
263:       x0 = x[0];
264:       x1 = x[1];
265:       x2 = x[2];
266:       x3 = x[3];
267:       x += 4;

269:       sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
270:       sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
271:       sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
272:       sum3 += x0*PetscConj(yy3[0]) + x1*PetscConj(yy3[1]) + x2*PetscConj(yy3[2]) + x3*PetscConj(yy3[3]); yy3+=4;
273:       j    -= 4;
274:     }
275:     z[0] = sum0;
276:     z[1] = sum1;
277:     z[2] = sum2;
278:     z[3] = sum3;
279:     z   += 4;
280:     i   -= 4;
281:     VecRestoreArrayRead(yy[0],&yy0);
282:     VecRestoreArrayRead(yy[1],&yy1);
283:     VecRestoreArrayRead(yy[2],&yy2);
284:     VecRestoreArrayRead(yy[3],&yy3);
285:     yy  += 4;
286:   }
287:   VecRestoreArrayRead(xin,&xbase);
288:   PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
289:   return(0);
290: }
291: #endif

293: /* ----------------------------------------------------------------------------*/
294: PetscErrorCode VecMTDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
295: {
296:   PetscErrorCode    ierr;
297:   PetscInt          n = xin->map->n,i,j,nv_rem,j_rem;
298:   PetscScalar       sum0,sum1,sum2,sum3,x0,x1,x2,x3;
299:   const PetscScalar *yy0,*yy1,*yy2,*yy3,*x,*xbase;
300:   Vec               *yy;

303:   sum0 = 0.;
304:   sum1 = 0.;
305:   sum2 = 0.;

307:   i      = nv;
308:   nv_rem = nv&0x3;
309:   yy     = (Vec*)yin;
310:   j      = n;
311:   VecGetArrayRead(xin,&xbase);
312:   x      = xbase;

314:   switch (nv_rem) {
315:   case 3:
316:     VecGetArrayRead(yy[0],&yy0);
317:     VecGetArrayRead(yy[1],&yy1);
318:     VecGetArrayRead(yy[2],&yy2);
319:     switch (j_rem=j&0x3) {
320:     case 3:
321:       x2    = x[2];
322:       sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
323:       sum2 += x2*yy2[2];
324:     case 2:
325:       x1    = x[1];
326:       sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
327:       sum2 += x1*yy2[1];
328:     case 1:
329:       x0    = x[0];
330:       sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
331:       sum2 += x0*yy2[0];
332:     case 0:
333:       x   += j_rem;
334:       yy0 += j_rem;
335:       yy1 += j_rem;
336:       yy2 += j_rem;
337:       j   -= j_rem;
338:       break;
339:     }
340:     while (j>0) {
341:       x0 = x[0];
342:       x1 = x[1];
343:       x2 = x[2];
344:       x3 = x[3];
345:       x += 4;

347:       sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
348:       sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
349:       sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
350:       j    -= 4;
351:     }
352:     z[0] = sum0;
353:     z[1] = sum1;
354:     z[2] = sum2;
355:     VecRestoreArrayRead(yy[0],&yy0);
356:     VecRestoreArrayRead(yy[1],&yy1);
357:     VecRestoreArrayRead(yy[2],&yy2);
358:     break;
359:   case 2:
360:     VecGetArrayRead(yy[0],&yy0);
361:     VecGetArrayRead(yy[1],&yy1);
362:     switch (j_rem=j&0x3) {
363:     case 3:
364:       x2    = x[2];
365:       sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
366:     case 2:
367:       x1    = x[1];
368:       sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
369:     case 1:
370:       x0    = x[0];
371:       sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
372:     case 0:
373:       x   += j_rem;
374:       yy0 += j_rem;
375:       yy1 += j_rem;
376:       j   -= j_rem;
377:       break;
378:     }
379:     while (j>0) {
380:       x0 = x[0];
381:       x1 = x[1];
382:       x2 = x[2];
383:       x3 = x[3];
384:       x += 4;

386:       sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
387:       sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
388:       j    -= 4;
389:     }
390:     z[0] = sum0;
391:     z[1] = sum1;

393:     VecRestoreArrayRead(yy[0],&yy0);
394:     VecRestoreArrayRead(yy[1],&yy1);
395:     break;
396:   case 1:
397:     VecGetArrayRead(yy[0],&yy0);
398:     switch (j_rem=j&0x3) {
399:     case 3:
400:       x2 = x[2]; sum0 += x2*yy0[2];
401:     case 2:
402:       x1 = x[1]; sum0 += x1*yy0[1];
403:     case 1:
404:       x0 = x[0]; sum0 += x0*yy0[0];
405:     case 0:
406:       x   += j_rem;
407:       yy0 += j_rem;
408:       j   -= j_rem;
409:       break;
410:     }
411:     while (j>0) {
412:       sum0 += x[0]*yy0[0] + x[1]*yy0[1] + x[2]*yy0[2] + x[3]*yy0[3]; yy0+=4;
413:       j    -= 4; x+=4;
414:     }
415:     z[0] = sum0;

417:     VecRestoreArrayRead(yy[0],&yy0);
418:     break;
419:   case 0:
420:     break;
421:   }
422:   z  += nv_rem;
423:   i  -= nv_rem;
424:   yy += nv_rem;

426:   while (i >0) {
427:     sum0 = 0.;
428:     sum1 = 0.;
429:     sum2 = 0.;
430:     sum3 = 0.;
431:     VecGetArrayRead(yy[0],&yy0);
432:     VecGetArrayRead(yy[1],&yy1);
433:     VecGetArrayRead(yy[2],&yy2);
434:     VecGetArrayRead(yy[3],&yy3);
435:     x    = xbase;

437:     j = n;
438:     switch (j_rem=j&0x3) {
439:     case 3:
440:       x2    = x[2];
441:       sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
442:       sum2 += x2*yy2[2]; sum3 += x2*yy3[2];
443:     case 2:
444:       x1    = x[1];
445:       sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
446:       sum2 += x1*yy2[1]; sum3 += x1*yy3[1];
447:     case 1:
448:       x0    = x[0];
449:       sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
450:       sum2 += x0*yy2[0]; sum3 += x0*yy3[0];
451:     case 0:
452:       x   += j_rem;
453:       yy0 += j_rem;
454:       yy1 += j_rem;
455:       yy2 += j_rem;
456:       yy3 += j_rem;
457:       j   -= j_rem;
458:       break;
459:     }
460:     while (j>0) {
461:       x0 = x[0];
462:       x1 = x[1];
463:       x2 = x[2];
464:       x3 = x[3];
465:       x += 4;

467:       sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
468:       sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
469:       sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
470:       sum3 += x0*yy3[0] + x1*yy3[1] + x2*yy3[2] + x3*yy3[3]; yy3+=4;
471:       j    -= 4;
472:     }
473:     z[0] = sum0;
474:     z[1] = sum1;
475:     z[2] = sum2;
476:     z[3] = sum3;
477:     z   += 4;
478:     i   -= 4;
479:     VecRestoreArrayRead(yy[0],&yy0);
480:     VecRestoreArrayRead(yy[1],&yy1);
481:     VecRestoreArrayRead(yy[2],&yy2);
482:     VecRestoreArrayRead(yy[3],&yy3);
483:     yy  += 4;
484:   }
485:   VecRestoreArrayRead(xin,&xbase);
486:   PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
487:   return(0);
488: }


491: PetscErrorCode VecMax_Seq(Vec xin,PetscInt *idx,PetscReal *z)
492: {
493:   PetscInt          i,j=0,n = xin->map->n;
494:   PetscReal         max,tmp;
495:   const PetscScalar *xx;
496:   PetscErrorCode    ierr;

499:   VecGetArrayRead(xin,&xx);
500:   if (!n) {
501:     max = PETSC_MIN_REAL;
502:     j   = -1;
503:   } else {
504:     max = PetscRealPart(*xx++); j = 0;
505:     for (i=1; i<n; i++) {
506:       if ((tmp = PetscRealPart(*xx++)) > max) { j = i; max = tmp;}
507:     }
508:   }
509:   *z = max;
510:   if (idx) *idx = j;
511:   VecRestoreArrayRead(xin,&xx);
512:   return(0);
513: }

515: PetscErrorCode VecMin_Seq(Vec xin,PetscInt *idx,PetscReal *z)
516: {
517:   PetscInt          i,j=0,n = xin->map->n;
518:   PetscReal         min,tmp;
519:   const PetscScalar *xx;
520:   PetscErrorCode    ierr;

523:   VecGetArrayRead(xin,&xx);
524:   if (!n) {
525:     min = PETSC_MAX_REAL;
526:     j   = -1;
527:   } else {
528:     min = PetscRealPart(*xx++); j = 0;
529:     for (i=1; i<n; i++) {
530:       if ((tmp = PetscRealPart(*xx++)) < min) { j = i; min = tmp;}
531:     }
532:   }
533:   *z = min;
534:   if (idx) *idx = j;
535:   VecRestoreArrayRead(xin,&xx);
536:   return(0);
537: }

539: PetscErrorCode VecSet_Seq(Vec xin,PetscScalar alpha)
540: {
541:   PetscInt       i,n = xin->map->n;
542:   PetscScalar    *xx;

546:   VecGetArray(xin,&xx);
547:   if (alpha == (PetscScalar)0.0) {
548:     PetscMemzero(xx,n*sizeof(PetscScalar));
549:   } else {
550:     for (i=0; i<n; i++) xx[i] = alpha;
551:   }
552:   VecRestoreArray(xin,&xx);
553:   return(0);
554: }

556: PetscErrorCode VecMAXPY_Seq(Vec xin, PetscInt nv,const PetscScalar *alpha,Vec *y)
557: {
558:   PetscErrorCode    ierr;
559:   PetscInt          n = xin->map->n,j,j_rem;
560:   const PetscScalar *yy0,*yy1,*yy2,*yy3;
561:   PetscScalar       *xx,alpha0,alpha1,alpha2,alpha3;

563: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
564: #pragma disjoint(*xx,*yy0,*yy1,*yy2,*yy3,*alpha)
565: #endif

568:   PetscLogFlops(nv*2.0*n);
569:   VecGetArray(xin,&xx);
570:   switch (j_rem=nv&0x3) {
571:   case 3:
572:     VecGetArrayRead(y[0],&yy0);
573:     VecGetArrayRead(y[1],&yy1);
574:     VecGetArrayRead(y[2],&yy2);
575:     alpha0 = alpha[0];
576:     alpha1 = alpha[1];
577:     alpha2 = alpha[2];
578:     alpha += 3;
579:     PetscKernelAXPY3(xx,alpha0,alpha1,alpha2,yy0,yy1,yy2,n);
580:     VecRestoreArrayRead(y[0],&yy0);
581:     VecRestoreArrayRead(y[1],&yy1);
582:     VecRestoreArrayRead(y[2],&yy2);
583:     y   += 3;
584:     break;
585:   case 2:
586:     VecGetArrayRead(y[0],&yy0);
587:     VecGetArrayRead(y[1],&yy1);
588:     alpha0 = alpha[0];
589:     alpha1 = alpha[1];
590:     alpha +=2;
591:     PetscKernelAXPY2(xx,alpha0,alpha1,yy0,yy1,n);
592:     VecRestoreArrayRead(y[0],&yy0);
593:     VecRestoreArrayRead(y[1],&yy1);
594:     y   +=2;
595:     break;
596:   case 1:
597:     VecGetArrayRead(y[0],&yy0);
598:     alpha0 = *alpha++;
599:     PetscKernelAXPY(xx,alpha0,yy0,n);
600:     VecRestoreArrayRead(y[0],&yy0);
601:     y   +=1;
602:     break;
603:   }
604:   for (j=j_rem; j<nv; j+=4) {
605:     VecGetArrayRead(y[0],&yy0);
606:     VecGetArrayRead(y[1],&yy1);
607:     VecGetArrayRead(y[2],&yy2);
608:     VecGetArrayRead(y[3],&yy3);
609:     alpha0 = alpha[0];
610:     alpha1 = alpha[1];
611:     alpha2 = alpha[2];
612:     alpha3 = alpha[3];
613:     alpha += 4;

615:     PetscKernelAXPY4(xx,alpha0,alpha1,alpha2,alpha3,yy0,yy1,yy2,yy3,n);
616:     VecRestoreArrayRead(y[0],&yy0);
617:     VecRestoreArrayRead(y[1],&yy1);
618:     VecRestoreArrayRead(y[2],&yy2);
619:     VecRestoreArrayRead(y[3],&yy3);
620:     y   += 4;
621:   }
622:   VecRestoreArray(xin,&xx);
623:   return(0);
624: }

626: #include <../src/vec/vec/impls/seq/ftn-kernels/faypx.h>

628: PetscErrorCode VecAYPX_Seq(Vec yin,PetscScalar alpha,Vec xin)
629: {
630:   PetscErrorCode    ierr;
631:   PetscInt          n = yin->map->n;
632:   PetscScalar       *yy;
633:   const PetscScalar *xx;

636:   if (alpha == (PetscScalar)0.0) {
637:     VecCopy(xin,yin);
638:   } else if (alpha == (PetscScalar)1.0) {
639:     VecAXPY_Seq(yin,alpha,xin);
640:   } else if (alpha == (PetscScalar)-1.0) {
641:     PetscInt i;
642:     VecGetArrayRead(xin,&xx);
643:     VecGetArray(yin,&yy);

645:     for (i=0; i<n; i++) yy[i] = xx[i] - yy[i];

647:     VecRestoreArrayRead(xin,&xx);
648:     VecRestoreArray(yin,&yy);
649:     PetscLogFlops(1.0*n);
650:   } else {
651:     VecGetArrayRead(xin,&xx);
652:     VecGetArray(yin,&yy);
653: #if defined(PETSC_USE_FORTRAN_KERNEL_AYPX)
654:     {
655:       PetscScalar oalpha = alpha;
656:       fortranaypx_(&n,&oalpha,xx,yy);
657:     }
658: #else
659:     {
660:       PetscInt i;

662:       for (i=0; i<n; i++) yy[i] = xx[i] + alpha*yy[i];
663:     }
664: #endif
665:     VecRestoreArrayRead(xin,&xx);
666:     VecRestoreArray(yin,&yy);
667:     PetscLogFlops(2.0*n);
668:   }
669:   return(0);
670: }

672: #include <../src/vec/vec/impls/seq/ftn-kernels/fwaxpy.h>
673: /*
674:    IBM ESSL contains a routine dzaxpy() that is our WAXPY() but it appears
675:    to be slower than a regular C loop.  Hence,we do not include it.
676:    void ?zaxpy(int*,PetscScalar*,PetscScalar*,int*,PetscScalar*,int*,PetscScalar*,int*);
677: */

679: PetscErrorCode VecWAXPY_Seq(Vec win, PetscScalar alpha,Vec xin,Vec yin)
680: {
681:   PetscErrorCode     ierr;
682:   PetscInt           i,n = win->map->n;
683:   PetscScalar        *ww;
684:   const PetscScalar  *yy,*xx;

687:   VecGetArrayRead(xin,&xx);
688:   VecGetArrayRead(yin,&yy);
689:   VecGetArray(win,&ww);
690:   if (alpha == (PetscScalar)1.0) {
691:     PetscLogFlops(n);
692:     /* could call BLAS axpy after call to memcopy, but may be slower */
693:     for (i=0; i<n; i++) ww[i] = yy[i] + xx[i];
694:   } else if (alpha == (PetscScalar)-1.0) {
695:     PetscLogFlops(n);
696:     for (i=0; i<n; i++) ww[i] = yy[i] - xx[i];
697:   } else if (alpha == (PetscScalar)0.0) {
698:     PetscMemcpy(ww,yy,n*sizeof(PetscScalar));
699:   } else {
700:     PetscScalar oalpha = alpha;
701: #if defined(PETSC_USE_FORTRAN_KERNEL_WAXPY)
702:     fortranwaxpy_(&n,&oalpha,xx,yy,ww);
703: #else
704:     for (i=0; i<n; i++) ww[i] = yy[i] + oalpha * xx[i];
705: #endif
706:     PetscLogFlops(2.0*n);
707:   }
708:   VecRestoreArrayRead(xin,&xx);
709:   VecRestoreArrayRead(yin,&yy);
710:   VecRestoreArray(win,&ww);
711:   return(0);
712: }

714: PetscErrorCode VecMaxPointwiseDivide_Seq(Vec xin,Vec yin,PetscReal *max)
715: {
716:   PetscErrorCode    ierr;
717:   PetscInt          n = xin->map->n,i;
718:   const PetscScalar *xx,*yy;
719:   PetscReal         m = 0.0;

722:   VecGetArrayRead(xin,&xx);
723:   VecGetArrayRead(yin,&yy);
724:   for (i = 0; i < n; i++) {
725:     if (yy[i] != (PetscScalar)0.0) {
726:       m = PetscMax(PetscAbsScalar(xx[i]/yy[i]), m);
727:     } else {
728:       m = PetscMax(PetscAbsScalar(xx[i]), m);
729:     }
730:   }
731:   VecRestoreArrayRead(xin,&xx);
732:   VecRestoreArrayRead(yin,&yy);
733:   MPIU_Allreduce(&m,max,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)xin));
734:   PetscLogFlops(n);
735:   return(0);
736: }

738: PetscErrorCode VecPlaceArray_Seq(Vec vin,const PetscScalar *a)
739: {
740:   Vec_Seq *v = (Vec_Seq*)vin->data;

743:   if (v->unplacedarray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"VecPlaceArray() was already called on this vector, without a call to VecResetArray()");
744:   v->unplacedarray = v->array;  /* save previous array so reset can bring it back */
745:   v->array         = (PetscScalar*)a;
746:   return(0);
747: }

749: PetscErrorCode VecReplaceArray_Seq(Vec vin,const PetscScalar *a)
750: {
751:   Vec_Seq        *v = (Vec_Seq*)vin->data;

755:   PetscFree(v->array_allocated);
756:   v->array_allocated = v->array = (PetscScalar*)a;
757:   return(0);
758: }