Actual source code: dvec2.c
petsc-3.13.6 2020-09-29
2: /*
3: Defines some vector operation functions that are shared by
4: sequential and parallel vectors.
5: */
6: #include <../src/vec/vec/impls/dvecimpl.h>
7: #include <petsc/private/kernels/petscaxpy.h>
11: #if defined(PETSC_USE_FORTRAN_KERNEL_MDOT)
12: #include <../src/vec/vec/impls/seq/ftn-kernels/fmdot.h>
13: PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
14: {
15: PetscErrorCode ierr;
16: PetscInt i,nv_rem,n = xin->map->n;
17: PetscScalar sum0,sum1,sum2,sum3;
18: const PetscScalar *yy0,*yy1,*yy2,*yy3,*x;
19: Vec *yy;
22: sum0 = 0.0;
23: sum1 = 0.0;
24: sum2 = 0.0;
26: i = nv;
27: nv_rem = nv&0x3;
28: yy = (Vec*)yin;
29: VecGetArrayRead(xin,&x);
31: switch (nv_rem) {
32: case 3:
33: VecGetArrayRead(yy[0],&yy0);
34: VecGetArrayRead(yy[1],&yy1);
35: VecGetArrayRead(yy[2],&yy2);
36: fortranmdot3_(x,yy0,yy1,yy2,&n,&sum0,&sum1,&sum2);
37: VecRestoreArrayRead(yy[0],&yy0);
38: VecRestoreArrayRead(yy[1],&yy1);
39: VecRestoreArrayRead(yy[2],&yy2);
40: z[0] = sum0;
41: z[1] = sum1;
42: z[2] = sum2;
43: break;
44: case 2:
45: VecGetArrayRead(yy[0],&yy0);
46: VecGetArrayRead(yy[1],&yy1);
47: fortranmdot2_(x,yy0,yy1,&n,&sum0,&sum1);
48: VecRestoreArrayRead(yy[0],&yy0);
49: VecRestoreArrayRead(yy[1],&yy1);
50: z[0] = sum0;
51: z[1] = sum1;
52: break;
53: case 1:
54: VecGetArrayRead(yy[0],&yy0);
55: fortranmdot1_(x,yy0,&n,&sum0);
56: VecRestoreArrayRead(yy[0],&yy0);
57: z[0] = sum0;
58: break;
59: case 0:
60: break;
61: }
62: z += nv_rem;
63: i -= nv_rem;
64: yy += nv_rem;
66: while (i >0) {
67: sum0 = 0.;
68: sum1 = 0.;
69: sum2 = 0.;
70: sum3 = 0.;
71: VecGetArrayRead(yy[0],&yy0);
72: VecGetArrayRead(yy[1],&yy1);
73: VecGetArrayRead(yy[2],&yy2);
74: VecGetArrayRead(yy[3],&yy3);
75: fortranmdot4_(x,yy0,yy1,yy2,yy3,&n,&sum0,&sum1,&sum2,&sum3);
76: VecRestoreArrayRead(yy[0],&yy0);
77: VecRestoreArrayRead(yy[1],&yy1);
78: VecRestoreArrayRead(yy[2],&yy2);
79: VecRestoreArrayRead(yy[3],&yy3);
80: yy += 4;
81: z[0] = sum0;
82: z[1] = sum1;
83: z[2] = sum2;
84: z[3] = sum3;
85: z += 4;
86: i -= 4;
87: }
88: VecRestoreArrayRead(xin,&x);
89: PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
90: return(0);
91: }
93: #else
94: PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
95: {
96: PetscErrorCode ierr;
97: PetscInt n = xin->map->n,i,j,nv_rem,j_rem;
98: PetscScalar sum0,sum1,sum2,sum3,x0,x1,x2,x3;
99: const PetscScalar *yy0,*yy1,*yy2,*yy3,*x,*xbase;
100: Vec *yy;
103: sum0 = 0.;
104: sum1 = 0.;
105: sum2 = 0.;
107: i = nv;
108: nv_rem = nv&0x3;
109: yy = (Vec*)yin;
110: j = n;
111: VecGetArrayRead(xin,&xbase);
112: x = xbase;
114: switch (nv_rem) {
115: case 3:
116: VecGetArrayRead(yy[0],&yy0);
117: VecGetArrayRead(yy[1],&yy1);
118: VecGetArrayRead(yy[2],&yy2);
119: switch (j_rem=j&0x3) {
120: case 3:
121: x2 = x[2];
122: sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
123: sum2 += x2*PetscConj(yy2[2]);
124: case 2:
125: x1 = x[1];
126: sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
127: sum2 += x1*PetscConj(yy2[1]);
128: case 1:
129: x0 = x[0];
130: sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
131: sum2 += x0*PetscConj(yy2[0]);
132: case 0:
133: x += j_rem;
134: yy0 += j_rem;
135: yy1 += j_rem;
136: yy2 += j_rem;
137: j -= j_rem;
138: break;
139: }
140: while (j>0) {
141: x0 = x[0];
142: x1 = x[1];
143: x2 = x[2];
144: x3 = x[3];
145: x += 4;
147: sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
148: sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
149: sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
150: j -= 4;
151: }
152: z[0] = sum0;
153: z[1] = sum1;
154: z[2] = sum2;
155: VecRestoreArrayRead(yy[0],&yy0);
156: VecRestoreArrayRead(yy[1],&yy1);
157: VecRestoreArrayRead(yy[2],&yy2);
158: break;
159: case 2:
160: VecGetArrayRead(yy[0],&yy0);
161: VecGetArrayRead(yy[1],&yy1);
162: switch (j_rem=j&0x3) {
163: case 3:
164: x2 = x[2];
165: sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
166: case 2:
167: x1 = x[1];
168: sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
169: case 1:
170: x0 = x[0];
171: sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
172: case 0:
173: x += j_rem;
174: yy0 += j_rem;
175: yy1 += j_rem;
176: j -= j_rem;
177: break;
178: }
179: while (j>0) {
180: x0 = x[0];
181: x1 = x[1];
182: x2 = x[2];
183: x3 = x[3];
184: x += 4;
186: sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
187: sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
188: j -= 4;
189: }
190: z[0] = sum0;
191: z[1] = sum1;
193: VecRestoreArrayRead(yy[0],&yy0);
194: VecRestoreArrayRead(yy[1],&yy1);
195: break;
196: case 1:
197: VecGetArrayRead(yy[0],&yy0);
198: switch (j_rem=j&0x3) {
199: case 3:
200: x2 = x[2]; sum0 += x2*PetscConj(yy0[2]);
201: case 2:
202: x1 = x[1]; sum0 += x1*PetscConj(yy0[1]);
203: case 1:
204: x0 = x[0]; sum0 += x0*PetscConj(yy0[0]);
205: case 0:
206: x += j_rem;
207: yy0 += j_rem;
208: j -= j_rem;
209: break;
210: }
211: while (j>0) {
212: sum0 += x[0]*PetscConj(yy0[0]) + x[1]*PetscConj(yy0[1])
213: + x[2]*PetscConj(yy0[2]) + x[3]*PetscConj(yy0[3]);
214: yy0 +=4;
215: j -= 4; x+=4;
216: }
217: z[0] = sum0;
219: VecRestoreArrayRead(yy[0],&yy0);
220: break;
221: case 0:
222: break;
223: }
224: z += nv_rem;
225: i -= nv_rem;
226: yy += nv_rem;
228: while (i >0) {
229: sum0 = 0.;
230: sum1 = 0.;
231: sum2 = 0.;
232: sum3 = 0.;
233: VecGetArrayRead(yy[0],&yy0);
234: VecGetArrayRead(yy[1],&yy1);
235: VecGetArrayRead(yy[2],&yy2);
236: VecGetArrayRead(yy[3],&yy3);
238: j = n;
239: x = xbase;
240: switch (j_rem=j&0x3) {
241: case 3:
242: x2 = x[2];
243: sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
244: sum2 += x2*PetscConj(yy2[2]); sum3 += x2*PetscConj(yy3[2]);
245: case 2:
246: x1 = x[1];
247: sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
248: sum2 += x1*PetscConj(yy2[1]); sum3 += x1*PetscConj(yy3[1]);
249: case 1:
250: x0 = x[0];
251: sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
252: sum2 += x0*PetscConj(yy2[0]); sum3 += x0*PetscConj(yy3[0]);
253: case 0:
254: x += j_rem;
255: yy0 += j_rem;
256: yy1 += j_rem;
257: yy2 += j_rem;
258: yy3 += j_rem;
259: j -= j_rem;
260: break;
261: }
262: while (j>0) {
263: x0 = x[0];
264: x1 = x[1];
265: x2 = x[2];
266: x3 = x[3];
267: x += 4;
269: sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
270: sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
271: sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
272: sum3 += x0*PetscConj(yy3[0]) + x1*PetscConj(yy3[1]) + x2*PetscConj(yy3[2]) + x3*PetscConj(yy3[3]); yy3+=4;
273: j -= 4;
274: }
275: z[0] = sum0;
276: z[1] = sum1;
277: z[2] = sum2;
278: z[3] = sum3;
279: z += 4;
280: i -= 4;
281: VecRestoreArrayRead(yy[0],&yy0);
282: VecRestoreArrayRead(yy[1],&yy1);
283: VecRestoreArrayRead(yy[2],&yy2);
284: VecRestoreArrayRead(yy[3],&yy3);
285: yy += 4;
286: }
287: VecRestoreArrayRead(xin,&xbase);
288: PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
289: return(0);
290: }
291: #endif
293: /* ----------------------------------------------------------------------------*/
294: PetscErrorCode VecMTDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
295: {
296: PetscErrorCode ierr;
297: PetscInt n = xin->map->n,i,j,nv_rem,j_rem;
298: PetscScalar sum0,sum1,sum2,sum3,x0,x1,x2,x3;
299: const PetscScalar *yy0,*yy1,*yy2,*yy3,*x,*xbase;
300: Vec *yy;
303: sum0 = 0.;
304: sum1 = 0.;
305: sum2 = 0.;
307: i = nv;
308: nv_rem = nv&0x3;
309: yy = (Vec*)yin;
310: j = n;
311: VecGetArrayRead(xin,&xbase);
312: x = xbase;
314: switch (nv_rem) {
315: case 3:
316: VecGetArrayRead(yy[0],&yy0);
317: VecGetArrayRead(yy[1],&yy1);
318: VecGetArrayRead(yy[2],&yy2);
319: switch (j_rem=j&0x3) {
320: case 3:
321: x2 = x[2];
322: sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
323: sum2 += x2*yy2[2];
324: case 2:
325: x1 = x[1];
326: sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
327: sum2 += x1*yy2[1];
328: case 1:
329: x0 = x[0];
330: sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
331: sum2 += x0*yy2[0];
332: case 0:
333: x += j_rem;
334: yy0 += j_rem;
335: yy1 += j_rem;
336: yy2 += j_rem;
337: j -= j_rem;
338: break;
339: }
340: while (j>0) {
341: x0 = x[0];
342: x1 = x[1];
343: x2 = x[2];
344: x3 = x[3];
345: x += 4;
347: sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
348: sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
349: sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
350: j -= 4;
351: }
352: z[0] = sum0;
353: z[1] = sum1;
354: z[2] = sum2;
355: VecRestoreArrayRead(yy[0],&yy0);
356: VecRestoreArrayRead(yy[1],&yy1);
357: VecRestoreArrayRead(yy[2],&yy2);
358: break;
359: case 2:
360: VecGetArrayRead(yy[0],&yy0);
361: VecGetArrayRead(yy[1],&yy1);
362: switch (j_rem=j&0x3) {
363: case 3:
364: x2 = x[2];
365: sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
366: case 2:
367: x1 = x[1];
368: sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
369: case 1:
370: x0 = x[0];
371: sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
372: case 0:
373: x += j_rem;
374: yy0 += j_rem;
375: yy1 += j_rem;
376: j -= j_rem;
377: break;
378: }
379: while (j>0) {
380: x0 = x[0];
381: x1 = x[1];
382: x2 = x[2];
383: x3 = x[3];
384: x += 4;
386: sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
387: sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
388: j -= 4;
389: }
390: z[0] = sum0;
391: z[1] = sum1;
393: VecRestoreArrayRead(yy[0],&yy0);
394: VecRestoreArrayRead(yy[1],&yy1);
395: break;
396: case 1:
397: VecGetArrayRead(yy[0],&yy0);
398: switch (j_rem=j&0x3) {
399: case 3:
400: x2 = x[2]; sum0 += x2*yy0[2];
401: case 2:
402: x1 = x[1]; sum0 += x1*yy0[1];
403: case 1:
404: x0 = x[0]; sum0 += x0*yy0[0];
405: case 0:
406: x += j_rem;
407: yy0 += j_rem;
408: j -= j_rem;
409: break;
410: }
411: while (j>0) {
412: sum0 += x[0]*yy0[0] + x[1]*yy0[1] + x[2]*yy0[2] + x[3]*yy0[3]; yy0+=4;
413: j -= 4; x+=4;
414: }
415: z[0] = sum0;
417: VecRestoreArrayRead(yy[0],&yy0);
418: break;
419: case 0:
420: break;
421: }
422: z += nv_rem;
423: i -= nv_rem;
424: yy += nv_rem;
426: while (i >0) {
427: sum0 = 0.;
428: sum1 = 0.;
429: sum2 = 0.;
430: sum3 = 0.;
431: VecGetArrayRead(yy[0],&yy0);
432: VecGetArrayRead(yy[1],&yy1);
433: VecGetArrayRead(yy[2],&yy2);
434: VecGetArrayRead(yy[3],&yy3);
435: x = xbase;
437: j = n;
438: switch (j_rem=j&0x3) {
439: case 3:
440: x2 = x[2];
441: sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
442: sum2 += x2*yy2[2]; sum3 += x2*yy3[2];
443: case 2:
444: x1 = x[1];
445: sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
446: sum2 += x1*yy2[1]; sum3 += x1*yy3[1];
447: case 1:
448: x0 = x[0];
449: sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
450: sum2 += x0*yy2[0]; sum3 += x0*yy3[0];
451: case 0:
452: x += j_rem;
453: yy0 += j_rem;
454: yy1 += j_rem;
455: yy2 += j_rem;
456: yy3 += j_rem;
457: j -= j_rem;
458: break;
459: }
460: while (j>0) {
461: x0 = x[0];
462: x1 = x[1];
463: x2 = x[2];
464: x3 = x[3];
465: x += 4;
467: sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
468: sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
469: sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
470: sum3 += x0*yy3[0] + x1*yy3[1] + x2*yy3[2] + x3*yy3[3]; yy3+=4;
471: j -= 4;
472: }
473: z[0] = sum0;
474: z[1] = sum1;
475: z[2] = sum2;
476: z[3] = sum3;
477: z += 4;
478: i -= 4;
479: VecRestoreArrayRead(yy[0],&yy0);
480: VecRestoreArrayRead(yy[1],&yy1);
481: VecRestoreArrayRead(yy[2],&yy2);
482: VecRestoreArrayRead(yy[3],&yy3);
483: yy += 4;
484: }
485: VecRestoreArrayRead(xin,&xbase);
486: PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
487: return(0);
488: }
491: PetscErrorCode VecMax_Seq(Vec xin,PetscInt *idx,PetscReal *z)
492: {
493: PetscInt i,j=0,n = xin->map->n;
494: PetscReal max,tmp;
495: const PetscScalar *xx;
496: PetscErrorCode ierr;
499: VecGetArrayRead(xin,&xx);
500: if (!n) {
501: max = PETSC_MIN_REAL;
502: j = -1;
503: } else {
504: max = PetscRealPart(*xx++); j = 0;
505: for (i=1; i<n; i++) {
506: if ((tmp = PetscRealPart(*xx++)) > max) { j = i; max = tmp;}
507: }
508: }
509: *z = max;
510: if (idx) *idx = j;
511: VecRestoreArrayRead(xin,&xx);
512: return(0);
513: }
515: PetscErrorCode VecMin_Seq(Vec xin,PetscInt *idx,PetscReal *z)
516: {
517: PetscInt i,j=0,n = xin->map->n;
518: PetscReal min,tmp;
519: const PetscScalar *xx;
520: PetscErrorCode ierr;
523: VecGetArrayRead(xin,&xx);
524: if (!n) {
525: min = PETSC_MAX_REAL;
526: j = -1;
527: } else {
528: min = PetscRealPart(*xx++); j = 0;
529: for (i=1; i<n; i++) {
530: if ((tmp = PetscRealPart(*xx++)) < min) { j = i; min = tmp;}
531: }
532: }
533: *z = min;
534: if (idx) *idx = j;
535: VecRestoreArrayRead(xin,&xx);
536: return(0);
537: }
539: PetscErrorCode VecSet_Seq(Vec xin,PetscScalar alpha)
540: {
541: PetscInt i,n = xin->map->n;
542: PetscScalar *xx;
546: VecGetArrayWrite(xin,&xx);
547: if (alpha == (PetscScalar)0.0) {
548: PetscArrayzero(xx,n);
549: } else {
550: for (i=0; i<n; i++) xx[i] = alpha;
551: }
552: VecRestoreArrayWrite(xin,&xx);
553: return(0);
554: }
556: PetscErrorCode VecMAXPY_Seq(Vec xin, PetscInt nv,const PetscScalar *alpha,Vec *y)
557: {
558: PetscErrorCode ierr;
559: PetscInt n = xin->map->n,j,j_rem;
560: const PetscScalar *yy0,*yy1,*yy2,*yy3;
561: PetscScalar *xx,alpha0,alpha1,alpha2,alpha3;
563: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
564: #pragma disjoint(*xx,*yy0,*yy1,*yy2,*yy3,*alpha)
565: #endif
568: PetscLogFlops(nv*2.0*n);
569: VecGetArray(xin,&xx);
570: switch (j_rem=nv&0x3) {
571: case 3:
572: VecGetArrayRead(y[0],&yy0);
573: VecGetArrayRead(y[1],&yy1);
574: VecGetArrayRead(y[2],&yy2);
575: alpha0 = alpha[0];
576: alpha1 = alpha[1];
577: alpha2 = alpha[2];
578: alpha += 3;
579: PetscKernelAXPY3(xx,alpha0,alpha1,alpha2,yy0,yy1,yy2,n);
580: VecRestoreArrayRead(y[0],&yy0);
581: VecRestoreArrayRead(y[1],&yy1);
582: VecRestoreArrayRead(y[2],&yy2);
583: y += 3;
584: break;
585: case 2:
586: VecGetArrayRead(y[0],&yy0);
587: VecGetArrayRead(y[1],&yy1);
588: alpha0 = alpha[0];
589: alpha1 = alpha[1];
590: alpha +=2;
591: PetscKernelAXPY2(xx,alpha0,alpha1,yy0,yy1,n);
592: VecRestoreArrayRead(y[0],&yy0);
593: VecRestoreArrayRead(y[1],&yy1);
594: y +=2;
595: break;
596: case 1:
597: VecGetArrayRead(y[0],&yy0);
598: alpha0 = *alpha++;
599: PetscKernelAXPY(xx,alpha0,yy0,n);
600: VecRestoreArrayRead(y[0],&yy0);
601: y +=1;
602: break;
603: }
604: for (j=j_rem; j<nv; j+=4) {
605: VecGetArrayRead(y[0],&yy0);
606: VecGetArrayRead(y[1],&yy1);
607: VecGetArrayRead(y[2],&yy2);
608: VecGetArrayRead(y[3],&yy3);
609: alpha0 = alpha[0];
610: alpha1 = alpha[1];
611: alpha2 = alpha[2];
612: alpha3 = alpha[3];
613: alpha += 4;
615: PetscKernelAXPY4(xx,alpha0,alpha1,alpha2,alpha3,yy0,yy1,yy2,yy3,n);
616: VecRestoreArrayRead(y[0],&yy0);
617: VecRestoreArrayRead(y[1],&yy1);
618: VecRestoreArrayRead(y[2],&yy2);
619: VecRestoreArrayRead(y[3],&yy3);
620: y += 4;
621: }
622: VecRestoreArray(xin,&xx);
623: return(0);
624: }
626: #include <../src/vec/vec/impls/seq/ftn-kernels/faypx.h>
628: PetscErrorCode VecAYPX_Seq(Vec yin,PetscScalar alpha,Vec xin)
629: {
630: PetscErrorCode ierr;
631: PetscInt n = yin->map->n;
632: PetscScalar *yy;
633: const PetscScalar *xx;
636: if (alpha == (PetscScalar)0.0) {
637: VecCopy(xin,yin);
638: } else if (alpha == (PetscScalar)1.0) {
639: VecAXPY_Seq(yin,alpha,xin);
640: } else if (alpha == (PetscScalar)-1.0) {
641: PetscInt i;
642: VecGetArrayRead(xin,&xx);
643: VecGetArray(yin,&yy);
645: for (i=0; i<n; i++) yy[i] = xx[i] - yy[i];
647: VecRestoreArrayRead(xin,&xx);
648: VecRestoreArray(yin,&yy);
649: PetscLogFlops(1.0*n);
650: } else {
651: VecGetArrayRead(xin,&xx);
652: VecGetArray(yin,&yy);
653: #if defined(PETSC_USE_FORTRAN_KERNEL_AYPX)
654: {
655: PetscScalar oalpha = alpha;
656: fortranaypx_(&n,&oalpha,xx,yy);
657: }
658: #else
659: {
660: PetscInt i;
662: for (i=0; i<n; i++) yy[i] = xx[i] + alpha*yy[i];
663: }
664: #endif
665: VecRestoreArrayRead(xin,&xx);
666: VecRestoreArray(yin,&yy);
667: PetscLogFlops(2.0*n);
668: }
669: return(0);
670: }
672: #include <../src/vec/vec/impls/seq/ftn-kernels/fwaxpy.h>
673: /*
674: IBM ESSL contains a routine dzaxpy() that is our WAXPY() but it appears
675: to be slower than a regular C loop. Hence,we do not include it.
676: void ?zaxpy(int*,PetscScalar*,PetscScalar*,int*,PetscScalar*,int*,PetscScalar*,int*);
677: */
679: PetscErrorCode VecWAXPY_Seq(Vec win, PetscScalar alpha,Vec xin,Vec yin)
680: {
681: PetscErrorCode ierr;
682: PetscInt i,n = win->map->n;
683: PetscScalar *ww;
684: const PetscScalar *yy,*xx;
687: VecGetArrayRead(xin,&xx);
688: VecGetArrayRead(yin,&yy);
689: VecGetArray(win,&ww);
690: if (alpha == (PetscScalar)1.0) {
691: PetscLogFlops(n);
692: /* could call BLAS axpy after call to memcopy, but may be slower */
693: for (i=0; i<n; i++) ww[i] = yy[i] + xx[i];
694: } else if (alpha == (PetscScalar)-1.0) {
695: PetscLogFlops(n);
696: for (i=0; i<n; i++) ww[i] = yy[i] - xx[i];
697: } else if (alpha == (PetscScalar)0.0) {
698: PetscArraycpy(ww,yy,n);
699: } else {
700: PetscScalar oalpha = alpha;
701: #if defined(PETSC_USE_FORTRAN_KERNEL_WAXPY)
702: fortranwaxpy_(&n,&oalpha,xx,yy,ww);
703: #else
704: for (i=0; i<n; i++) ww[i] = yy[i] + oalpha * xx[i];
705: #endif
706: PetscLogFlops(2.0*n);
707: }
708: VecRestoreArrayRead(xin,&xx);
709: VecRestoreArrayRead(yin,&yy);
710: VecRestoreArray(win,&ww);
711: return(0);
712: }
714: PetscErrorCode VecMaxPointwiseDivide_Seq(Vec xin,Vec yin,PetscReal *max)
715: {
716: PetscErrorCode ierr;
717: PetscInt n = xin->map->n,i;
718: const PetscScalar *xx,*yy;
719: PetscReal m = 0.0;
722: VecGetArrayRead(xin,&xx);
723: VecGetArrayRead(yin,&yy);
724: for (i = 0; i < n; i++) {
725: if (yy[i] != (PetscScalar)0.0) {
726: m = PetscMax(PetscAbsScalar(xx[i]/yy[i]), m);
727: } else {
728: m = PetscMax(PetscAbsScalar(xx[i]), m);
729: }
730: }
731: VecRestoreArrayRead(xin,&xx);
732: VecRestoreArrayRead(yin,&yy);
733: MPIU_Allreduce(&m,max,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)xin));
734: PetscLogFlops(n);
735: return(0);
736: }
738: PetscErrorCode VecPlaceArray_Seq(Vec vin,const PetscScalar *a)
739: {
740: Vec_Seq *v = (Vec_Seq*)vin->data;
743: if (v->unplacedarray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"VecPlaceArray() was already called on this vector, without a call to VecResetArray()");
744: v->unplacedarray = v->array; /* save previous array so reset can bring it back */
745: v->array = (PetscScalar*)a;
746: return(0);
747: }
749: PetscErrorCode VecReplaceArray_Seq(Vec vin,const PetscScalar *a)
750: {
751: Vec_Seq *v = (Vec_Seq*)vin->data;
755: PetscFree(v->array_allocated);
756: v->array_allocated = v->array = (PetscScalar*)a;
757: return(0);
758: }