Actual source code: baijsolvtrannat.c
petsc-3.7.3 2016-08-01
1: #include <../src/mat/impls/baij/seq/baij.h>
5: PetscErrorCode MatSolveTranspose_SeqBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)
6: {
7: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
8: PetscErrorCode ierr;
9: const PetscInt *adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
10: PetscInt i,n = a->mbs,j;
11: PetscInt nz;
12: PetscScalar *x,*tmp,s1;
13: const MatScalar *aa = a->a,*v;
14: const PetscScalar *b;
17: VecGetArrayRead(bb,&b);
18: VecGetArray(xx,&x);
19: tmp = a->solve_work;
22: /* copy the b into temp work space according to permutation */
23: for (i=0; i<n; i++) tmp[i] = b[i];
25: /* forward solve the U^T */
26: for (i=0; i<n; i++) {
27: v = aa + adiag[i+1] + 1;
28: vi = aj + adiag[i+1] + 1;
29: nz = adiag[i] - adiag[i+1] - 1;
30: s1 = tmp[i];
31: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
32: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
33: tmp[i] = s1;
34: }
36: /* backward solve the L^T */
37: for (i=n-1; i>=0; i--) {
38: v = aa + ai[i];
39: vi = aj + ai[i];
40: nz = ai[i+1] - ai[i];
41: s1 = tmp[i];
42: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
43: }
45: /* copy tmp into x according to permutation */
46: for (i=0; i<n; i++) x[i] = tmp[i];
48: VecRestoreArrayRead(bb,&b);
49: VecRestoreArray(xx,&x);
51: PetscLogFlops(2.0*a->nz-A->cmap->n);
52: return(0);
53: }
57: PetscErrorCode MatSolveTranspose_SeqBAIJ_1_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
58: {
59: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
60: PetscErrorCode ierr;
61: PetscInt i,nz;
62: const PetscInt *diag = a->diag,n=a->mbs,*vi,*ai=a->i,*aj=a->j;
63: const MatScalar *aa =a->a,*v;
64: PetscScalar s1,*x;
67: VecCopy(bb,xx);
68: VecGetArray(xx,&x);
70: /* forward solve the U^T */
71: for (i=0; i<n; i++) {
73: v = aa + diag[i];
74: /* multiply by the inverse of the block diagonal */
75: s1 = (*v++)*x[i];
76: vi = aj + diag[i] + 1;
77: nz = ai[i+1] - diag[i] - 1;
78: while (nz--) {
79: x[*vi++] -= (*v++)*s1;
80: }
81: x[i] = s1;
82: }
83: /* backward solve the L^T */
84: for (i=n-1; i>=0; i--) {
85: v = aa + diag[i] - 1;
86: vi = aj + diag[i] - 1;
87: nz = diag[i] - ai[i];
88: s1 = x[i];
89: while (nz--) {
90: x[*vi--] -= (*v--)*s1;
91: }
92: }
93: VecRestoreArray(xx,&x);
94: PetscLogFlops(2.0*(a->nz) - A->cmap->n);
95: return(0);
96: }
100: PetscErrorCode MatSolveTranspose_SeqBAIJ_2_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
101: {
102: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
103: PetscErrorCode ierr;
104: PetscInt i,nz,idx,idt,oidx;
105: const PetscInt *diag = a->diag,*vi,n=a->mbs,*ai=a->i,*aj=a->j;
106: const MatScalar *aa =a->a,*v;
107: PetscScalar s1,s2,x1,x2,*x;
110: VecCopy(bb,xx);
111: VecGetArray(xx,&x);
113: /* forward solve the U^T */
114: idx = 0;
115: for (i=0; i<n; i++) {
117: v = aa + 4*diag[i];
118: /* multiply by the inverse of the block diagonal */
119: x1 = x[idx]; x2 = x[1+idx];
120: s1 = v[0]*x1 + v[1]*x2;
121: s2 = v[2]*x1 + v[3]*x2;
122: v += 4;
124: vi = aj + diag[i] + 1;
125: nz = ai[i+1] - diag[i] - 1;
126: while (nz--) {
127: oidx = 2*(*vi++);
128: x[oidx] -= v[0]*s1 + v[1]*s2;
129: x[oidx+1] -= v[2]*s1 + v[3]*s2;
130: v += 4;
131: }
132: x[idx] = s1;x[1+idx] = s2;
133: idx += 2;
134: }
135: /* backward solve the L^T */
136: for (i=n-1; i>=0; i--) {
137: v = aa + 4*diag[i] - 4;
138: vi = aj + diag[i] - 1;
139: nz = diag[i] - ai[i];
140: idt = 2*i;
141: s1 = x[idt]; s2 = x[1+idt];
142: while (nz--) {
143: idx = 2*(*vi--);
144: x[idx] -= v[0]*s1 + v[1]*s2;
145: x[idx+1] -= v[2]*s1 + v[3]*s2;
146: v -= 4;
147: }
148: }
149: VecRestoreArray(xx,&x);
150: PetscLogFlops(2.0*4.0*(a->nz) - 2.0*A->cmap->n);
151: return(0);
152: }
156: PetscErrorCode MatSolveTranspose_SeqBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
157: {
158: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
159: PetscErrorCode ierr;
160: const PetscInt n=a->mbs,*vi,*ai=a->i,*aj=a->j,*diag=a->diag;
161: PetscInt nz,idx,idt,j,i,oidx;
162: const PetscInt bs =A->rmap->bs,bs2=a->bs2;
163: const MatScalar *aa=a->a,*v;
164: PetscScalar s1,s2,x1,x2,*x;
167: VecCopy(bb,xx);
168: VecGetArray(xx,&x);
170: /* forward solve the U^T */
171: idx = 0;
172: for (i=0; i<n; i++) {
173: v = aa + bs2*diag[i];
174: /* multiply by the inverse of the block diagonal */
175: x1 = x[idx]; x2 = x[1+idx];
176: s1 = v[0]*x1 + v[1]*x2;
177: s2 = v[2]*x1 + v[3]*x2;
178: v -= bs2;
180: vi = aj + diag[i] - 1;
181: nz = diag[i] - diag[i+1] - 1;
182: for (j=0; j>-nz; j--) {
183: oidx = bs*vi[j];
184: x[oidx] -= v[0]*s1 + v[1]*s2;
185: x[oidx+1] -= v[2]*s1 + v[3]*s2;
186: v -= bs2;
187: }
188: x[idx] = s1;x[1+idx] = s2;
189: idx += bs;
190: }
191: /* backward solve the L^T */
192: for (i=n-1; i>=0; i--) {
193: v = aa + bs2*ai[i];
194: vi = aj + ai[i];
195: nz = ai[i+1] - ai[i];
196: idt = bs*i;
197: s1 = x[idt]; s2 = x[1+idt];
198: for (j=0; j<nz; j++) {
199: idx = bs*vi[j];
200: x[idx] -= v[0]*s1 + v[1]*s2;
201: x[idx+1] -= v[2]*s1 + v[3]*s2;
202: v += bs2;
203: }
204: }
205: VecRestoreArray(xx,&x);
206: PetscLogFlops(2.0*bs2*(a->nz) - bs*A->cmap->n);
207: return(0);
208: }
212: PetscErrorCode MatSolveTranspose_SeqBAIJ_3_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
213: {
214: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
215: PetscErrorCode ierr;
216: const PetscInt n=a->mbs,*vi,*ai=a->i,*aj=a->j,*diag=a->diag;
217: PetscInt i,nz,idx,idt,oidx;
218: const MatScalar *aa=a->a,*v;
219: PetscScalar s1,s2,s3,x1,x2,x3,*x;
222: VecCopy(bb,xx);
223: VecGetArray(xx,&x);
225: /* forward solve the U^T */
226: idx = 0;
227: for (i=0; i<n; i++) {
229: v = aa + 9*diag[i];
230: /* multiply by the inverse of the block diagonal */
231: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
232: s1 = v[0]*x1 + v[1]*x2 + v[2]*x3;
233: s2 = v[3]*x1 + v[4]*x2 + v[5]*x3;
234: s3 = v[6]*x1 + v[7]*x2 + v[8]*x3;
235: v += 9;
237: vi = aj + diag[i] + 1;
238: nz = ai[i+1] - diag[i] - 1;
239: while (nz--) {
240: oidx = 3*(*vi++);
241: x[oidx] -= v[0]*s1 + v[1]*s2 + v[2]*s3;
242: x[oidx+1] -= v[3]*s1 + v[4]*s2 + v[5]*s3;
243: x[oidx+2] -= v[6]*s1 + v[7]*s2 + v[8]*s3;
244: v += 9;
245: }
246: x[idx] = s1;x[1+idx] = s2; x[2+idx] = s3;
247: idx += 3;
248: }
249: /* backward solve the L^T */
250: for (i=n-1; i>=0; i--) {
251: v = aa + 9*diag[i] - 9;
252: vi = aj + diag[i] - 1;
253: nz = diag[i] - ai[i];
254: idt = 3*i;
255: s1 = x[idt]; s2 = x[1+idt]; s3 = x[2+idt];
256: while (nz--) {
257: idx = 3*(*vi--);
258: x[idx] -= v[0]*s1 + v[1]*s2 + v[2]*s3;
259: x[idx+1] -= v[3]*s1 + v[4]*s2 + v[5]*s3;
260: x[idx+2] -= v[6]*s1 + v[7]*s2 + v[8]*s3;
261: v -= 9;
262: }
263: }
264: VecRestoreArray(xx,&x);
265: PetscLogFlops(2.0*9.0*(a->nz) - 3.0*A->cmap->n);
266: return(0);
267: }
271: PetscErrorCode MatSolveTranspose_SeqBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
272: {
273: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
274: PetscErrorCode ierr;
275: const PetscInt n=a->mbs,*vi,*ai=a->i,*aj=a->j,*diag=a->diag;
276: PetscInt nz,idx,idt,j,i,oidx;
277: const PetscInt bs =A->rmap->bs,bs2=a->bs2;
278: const MatScalar *aa=a->a,*v;
279: PetscScalar s1,s2,s3,x1,x2,x3,*x;
282: VecCopy(bb,xx);
283: VecGetArray(xx,&x);
285: /* forward solve the U^T */
286: idx = 0;
287: for (i=0; i<n; i++) {
288: v = aa + bs2*diag[i];
289: /* multiply by the inverse of the block diagonal */
290: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
291: s1 = v[0]*x1 + v[1]*x2 + v[2]*x3;
292: s2 = v[3]*x1 + v[4]*x2 + v[5]*x3;
293: s3 = v[6]*x1 + v[7]*x2 + v[8]*x3;
294: v -= bs2;
296: vi = aj + diag[i] - 1;
297: nz = diag[i] - diag[i+1] - 1;
298: for (j=0; j>-nz; j--) {
299: oidx = bs*vi[j];
300: x[oidx] -= v[0]*s1 + v[1]*s2 + v[2]*s3;
301: x[oidx+1] -= v[3]*s1 + v[4]*s2 + v[5]*s3;
302: x[oidx+2] -= v[6]*s1 + v[7]*s2 + v[8]*s3;
303: v -= bs2;
304: }
305: x[idx] = s1;x[1+idx] = s2; x[2+idx] = s3;
306: idx += bs;
307: }
308: /* backward solve the L^T */
309: for (i=n-1; i>=0; i--) {
310: v = aa + bs2*ai[i];
311: vi = aj + ai[i];
312: nz = ai[i+1] - ai[i];
313: idt = bs*i;
314: s1 = x[idt]; s2 = x[1+idt]; s3 = x[2+idt];
315: for (j=0; j<nz; j++) {
316: idx = bs*vi[j];
317: x[idx] -= v[0]*s1 + v[1]*s2 + v[2]*s3;
318: x[idx+1] -= v[3]*s1 + v[4]*s2 + v[5]*s3;
319: x[idx+2] -= v[6]*s1 + v[7]*s2 + v[8]*s3;
320: v += bs2;
321: }
322: }
323: VecRestoreArray(xx,&x);
324: PetscLogFlops(2.0*bs2*(a->nz) - bs*A->cmap->n);
325: return(0);
326: }
330: PetscErrorCode MatSolveTranspose_SeqBAIJ_4_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
331: {
332: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
333: PetscErrorCode ierr;
334: const PetscInt *diag=a->diag,n=a->mbs,*vi,*ai=a->i,*aj=a->j;
335: PetscInt i,nz,idx,idt,oidx;
336: const MatScalar *aa=a->a,*v;
337: PetscScalar s1,s2,s3,s4,x1,x2,x3,x4,*x;
340: VecCopy(bb,xx);
341: VecGetArray(xx,&x);
343: /* forward solve the U^T */
344: idx = 0;
345: for (i=0; i<n; i++) {
347: v = aa + 16*diag[i];
348: /* multiply by the inverse of the block diagonal */
349: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
350: s1 = v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
351: s2 = v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
352: s3 = v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
353: s4 = v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
354: v += 16;
356: vi = aj + diag[i] + 1;
357: nz = ai[i+1] - diag[i] - 1;
358: while (nz--) {
359: oidx = 4*(*vi++);
360: x[oidx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4;
361: x[oidx+1] -= v[4]*s1 + v[5]*s2 + v[6]*s3 + v[7]*s4;
362: x[oidx+2] -= v[8]*s1 + v[9]*s2 + v[10]*s3 + v[11]*s4;
363: x[oidx+3] -= v[12]*s1 + v[13]*s2 + v[14]*s3 + v[15]*s4;
364: v += 16;
365: }
366: x[idx] = s1;x[1+idx] = s2; x[2+idx] = s3;x[3+idx] = s4;
367: idx += 4;
368: }
369: /* backward solve the L^T */
370: for (i=n-1; i>=0; i--) {
371: v = aa + 16*diag[i] - 16;
372: vi = aj + diag[i] - 1;
373: nz = diag[i] - ai[i];
374: idt = 4*i;
375: s1 = x[idt]; s2 = x[1+idt]; s3 = x[2+idt];s4 = x[3+idt];
376: while (nz--) {
377: idx = 4*(*vi--);
378: x[idx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4;
379: x[idx+1] -= v[4]*s1 + v[5]*s2 + v[6]*s3 + v[7]*s4;
380: x[idx+2] -= v[8]*s1 + v[9]*s2 + v[10]*s3 + v[11]*s4;
381: x[idx+3] -= v[12]*s1 + v[13]*s2 + v[14]*s3 + v[15]*s4;
382: v -= 16;
383: }
384: }
385: VecRestoreArray(xx,&x);
386: PetscLogFlops(2.0*16*(a->nz) - 4.0*A->cmap->n);
387: return(0);
388: }
392: PetscErrorCode MatSolveTranspose_SeqBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx)
393: {
394: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
395: PetscErrorCode ierr;
396: const PetscInt n=a->mbs,*vi,*ai=a->i,*aj=a->j,*diag=a->diag;
397: PetscInt nz,idx,idt,j,i,oidx;
398: const PetscInt bs =A->rmap->bs,bs2=a->bs2;
399: const MatScalar *aa=a->a,*v;
400: PetscScalar s1,s2,s3,s4,x1,x2,x3,x4,*x;
403: VecCopy(bb,xx);
404: VecGetArray(xx,&x);
406: /* forward solve the U^T */
407: idx = 0;
408: for (i=0; i<n; i++) {
409: v = aa + bs2*diag[i];
410: /* multiply by the inverse of the block diagonal */
411: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
412: s1 = v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
413: s2 = v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
414: s3 = v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
415: s4 = v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
416: v -= bs2;
418: vi = aj + diag[i] - 1;
419: nz = diag[i] - diag[i+1] - 1;
420: for (j=0; j>-nz; j--) {
421: oidx = bs*vi[j];
422: x[oidx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4;
423: x[oidx+1] -= v[4]*s1 + v[5]*s2 + v[6]*s3 + v[7]*s4;
424: x[oidx+2] -= v[8]*s1 + v[9]*s2 + v[10]*s3 + v[11]*s4;
425: x[oidx+3] -= v[12]*s1 + v[13]*s2 + v[14]*s3 + v[15]*s4;
426: v -= bs2;
427: }
428: x[idx] = s1;x[1+idx] = s2; x[2+idx] = s3; x[3+idx] = s4;
429: idx += bs;
430: }
431: /* backward solve the L^T */
432: for (i=n-1; i>=0; i--) {
433: v = aa + bs2*ai[i];
434: vi = aj + ai[i];
435: nz = ai[i+1] - ai[i];
436: idt = bs*i;
437: s1 = x[idt]; s2 = x[1+idt]; s3 = x[2+idt]; s4 = x[3+idt];
438: for (j=0; j<nz; j++) {
439: idx = bs*vi[j];
440: x[idx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4;
441: x[idx+1] -= v[4]*s1 + v[5]*s2 + v[6]*s3 + v[7]*s4;
442: x[idx+2] -= v[8]*s1 + v[9]*s2 + v[10]*s3 + v[11]*s4;
443: x[idx+3] -= v[12]*s1 + v[13]*s2 + v[14]*s3 + v[15]*s4;
444: v += bs2;
445: }
446: }
447: VecRestoreArray(xx,&x);
448: PetscLogFlops(2.0*bs2*(a->nz) - bs*A->cmap->n);
449: return(0);
450: }
454: PetscErrorCode MatSolveTranspose_SeqBAIJ_5_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
455: {
456: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
457: PetscErrorCode ierr;
458: const PetscInt *diag=a->diag,n=a->mbs,*vi,*ai=a->i,*aj=a->j;
459: PetscInt i,nz,idx,idt,oidx;
460: const MatScalar *aa=a->a,*v;
461: PetscScalar s1,s2,s3,s4,s5,x1,x2,x3,x4,x5,*x;
464: VecCopy(bb,xx);
465: VecGetArray(xx,&x);
467: /* forward solve the U^T */
468: idx = 0;
469: for (i=0; i<n; i++) {
471: v = aa + 25*diag[i];
472: /* multiply by the inverse of the block diagonal */
473: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
474: s1 = v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
475: s2 = v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
476: s3 = v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
477: s4 = v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
478: s5 = v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
479: v += 25;
481: vi = aj + diag[i] + 1;
482: nz = ai[i+1] - diag[i] - 1;
483: while (nz--) {
484: oidx = 5*(*vi++);
485: x[oidx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4 + v[4]*s5;
486: x[oidx+1] -= v[5]*s1 + v[6]*s2 + v[7]*s3 + v[8]*s4 + v[9]*s5;
487: x[oidx+2] -= v[10]*s1 + v[11]*s2 + v[12]*s3 + v[13]*s4 + v[14]*s5;
488: x[oidx+3] -= v[15]*s1 + v[16]*s2 + v[17]*s3 + v[18]*s4 + v[19]*s5;
489: x[oidx+4] -= v[20]*s1 + v[21]*s2 + v[22]*s3 + v[23]*s4 + v[24]*s5;
490: v += 25;
491: }
492: x[idx] = s1;x[1+idx] = s2; x[2+idx] = s3;x[3+idx] = s4; x[4+idx] = s5;
493: idx += 5;
494: }
495: /* backward solve the L^T */
496: for (i=n-1; i>=0; i--) {
497: v = aa + 25*diag[i] - 25;
498: vi = aj + diag[i] - 1;
499: nz = diag[i] - ai[i];
500: idt = 5*i;
501: s1 = x[idt]; s2 = x[1+idt]; s3 = x[2+idt];s4 = x[3+idt]; s5 = x[4+idt];
502: while (nz--) {
503: idx = 5*(*vi--);
504: x[idx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4 + v[4]*s5;
505: x[idx+1] -= v[5]*s1 + v[6]*s2 + v[7]*s3 + v[8]*s4 + v[9]*s5;
506: x[idx+2] -= v[10]*s1 + v[11]*s2 + v[12]*s3 + v[13]*s4 + v[14]*s5;
507: x[idx+3] -= v[15]*s1 + v[16]*s2 + v[17]*s3 + v[18]*s4 + v[19]*s5;
508: x[idx+4] -= v[20]*s1 + v[21]*s2 + v[22]*s3 + v[23]*s4 + v[24]*s5;
509: v -= 25;
510: }
511: }
512: VecRestoreArray(xx,&x);
513: PetscLogFlops(2.0*25*(a->nz) - 5.0*A->cmap->n);
514: return(0);
515: }
519: PetscErrorCode MatSolveTranspose_SeqBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx)
520: {
521: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
522: PetscErrorCode ierr;
523: const PetscInt n=a->mbs,*vi,*ai=a->i,*aj=a->j,*diag=a->diag;
524: PetscInt nz,idx,idt,j,i,oidx;
525: const PetscInt bs =A->rmap->bs,bs2=a->bs2;
526: const MatScalar *aa=a->a,*v;
527: PetscScalar s1,s2,s3,s4,s5,x1,x2,x3,x4,x5,*x;
530: VecCopy(bb,xx);
531: VecGetArray(xx,&x);
533: /* forward solve the U^T */
534: idx = 0;
535: for (i=0; i<n; i++) {
536: v = aa + bs2*diag[i];
537: /* multiply by the inverse of the block diagonal */
538: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
539: x5 = x[4+idx];
540: s1 = v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
541: s2 = v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
542: s3 = v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
543: s4 = v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
544: s5 = v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
545: v -= bs2;
547: vi = aj + diag[i] - 1;
548: nz = diag[i] - diag[i+1] - 1;
549: for (j=0; j>-nz; j--) {
550: oidx = bs*vi[j];
551: x[oidx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4 + v[4]*s5;
552: x[oidx+1] -= v[5]*s1 + v[6]*s2 + v[7]*s3 + v[8]*s4 + v[9]*s5;
553: x[oidx+2] -= v[10]*s1 + v[11]*s2 + v[12]*s3 + v[13]*s4 + v[14]*s5;
554: x[oidx+3] -= v[15]*s1 + v[16]*s2 + v[17]*s3 + v[18]*s4 + v[19]*s5;
555: x[oidx+4] -= v[20]*s1 + v[21]*s2 + v[22]*s3 + v[23]*s4 + v[24]*s5;
556: v -= bs2;
557: }
558: x[idx] = s1;x[1+idx] = s2; x[2+idx] = s3; x[3+idx] = s4; x[4+idx] = s5;
559: idx += bs;
560: }
561: /* backward solve the L^T */
562: for (i=n-1; i>=0; i--) {
563: v = aa + bs2*ai[i];
564: vi = aj + ai[i];
565: nz = ai[i+1] - ai[i];
566: idt = bs*i;
567: s1 = x[idt]; s2 = x[1+idt]; s3 = x[2+idt]; s4 = x[3+idt]; s5 = x[4+idt];
568: for (j=0; j<nz; j++) {
569: idx = bs*vi[j];
570: x[idx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4 + v[4]*s5;
571: x[idx+1] -= v[5]*s1 + v[6]*s2 + v[7]*s3 + v[8]*s4 + v[9]*s5;
572: x[idx+2] -= v[10]*s1 + v[11]*s2 + v[12]*s3 + v[13]*s4 + v[14]*s5;
573: x[idx+3] -= v[15]*s1 + v[16]*s2 + v[17]*s3 + v[18]*s4 + v[19]*s5;
574: x[idx+4] -= v[20]*s1 + v[21]*s2 + v[22]*s3 + v[23]*s4 + v[24]*s5;
575: v += bs2;
576: }
577: }
578: VecRestoreArray(xx,&x);
579: PetscLogFlops(2.0*bs2*(a->nz) - bs*A->cmap->n);
580: return(0);
581: }
585: PetscErrorCode MatSolveTranspose_SeqBAIJ_6_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
586: {
587: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
588: PetscErrorCode ierr;
589: const PetscInt *diag=a->diag,n=a->mbs,*vi,*ai=a->i,*aj=a->j;
590: PetscInt i,nz,idx,idt,oidx;
591: const MatScalar *aa=a->a,*v;
592: PetscScalar s1,s2,s3,s4,s5,s6,x1,x2,x3,x4,x5,x6,*x;
595: VecCopy(bb,xx);
596: VecGetArray(xx,&x);
598: /* forward solve the U^T */
599: idx = 0;
600: for (i=0; i<n; i++) {
602: v = aa + 36*diag[i];
603: /* multiply by the inverse of the block diagonal */
604: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
605: x6 = x[5+idx];
606: s1 = v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5 + v[5]*x6;
607: s2 = v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4 + v[10]*x5 + v[11]*x6;
608: s3 = v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4 + v[16]*x5 + v[17]*x6;
609: s4 = v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4 + v[22]*x5 + v[23]*x6;
610: s5 = v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4 + v[28]*x5 + v[29]*x6;
611: s6 = v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4 + v[34]*x5 + v[35]*x6;
612: v += 36;
614: vi = aj + diag[i] + 1;
615: nz = ai[i+1] - diag[i] - 1;
616: while (nz--) {
617: oidx = 6*(*vi++);
618: x[oidx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4 + v[4]*s5 + v[5]*s6;
619: x[oidx+1] -= v[6]*s1 + v[7]*s2 + v[8]*s3 + v[9]*s4 + v[10]*s5 + v[11]*s6;
620: x[oidx+2] -= v[12]*s1 + v[13]*s2 + v[14]*s3 + v[15]*s4 + v[16]*s5 + v[17]*s6;
621: x[oidx+3] -= v[18]*s1 + v[19]*s2 + v[20]*s3 + v[21]*s4 + v[22]*s5 + v[23]*s6;
622: x[oidx+4] -= v[24]*s1 + v[25]*s2 + v[26]*s3 + v[27]*s4 + v[28]*s5 + v[29]*s6;
623: x[oidx+5] -= v[30]*s1 + v[31]*s2 + v[32]*s3 + v[33]*s4 + v[34]*s5 + v[35]*s6;
624: v += 36;
625: }
626: x[idx] = s1;x[1+idx] = s2; x[2+idx] = s3;x[3+idx] = s4; x[4+idx] = s5;
627: x[5+idx] = s6;
628: idx += 6;
629: }
630: /* backward solve the L^T */
631: for (i=n-1; i>=0; i--) {
632: v = aa + 36*diag[i] - 36;
633: vi = aj + diag[i] - 1;
634: nz = diag[i] - ai[i];
635: idt = 6*i;
636: s1 = x[idt]; s2 = x[1+idt]; s3 = x[2+idt];s4 = x[3+idt]; s5 = x[4+idt];
637: s6 = x[5+idt];
638: while (nz--) {
639: idx = 6*(*vi--);
640: x[idx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4 + v[4]*s5 + v[5]*s6;
641: x[idx+1] -= v[6]*s1 + v[7]*s2 + v[8]*s3 + v[9]*s4 + v[10]*s5 + v[11]*s6;
642: x[idx+2] -= v[12]*s1 + v[13]*s2 + v[14]*s3 + v[15]*s4 + v[16]*s5 + v[17]*s6;
643: x[idx+3] -= v[18]*s1 + v[19]*s2 + v[20]*s3 + v[21]*s4 + v[22]*s5 + v[23]*s6;
644: x[idx+4] -= v[24]*s1 + v[25]*s2 + v[26]*s3 + v[27]*s4 + v[28]*s5 + v[29]*s6;
645: x[idx+5] -= v[30]*s1 + v[31]*s2 + v[32]*s3 + v[33]*s4 + v[34]*s5 + v[35]*s6;
646: v -= 36;
647: }
648: }
649: VecRestoreArray(xx,&x);
650: PetscLogFlops(2.0*36*(a->nz) - 6.0*A->cmap->n);
651: return(0);
652: }
656: PetscErrorCode MatSolveTranspose_SeqBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx)
657: {
658: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
659: PetscErrorCode ierr;
660: const PetscInt n=a->mbs,*vi,*ai=a->i,*aj=a->j,*diag=a->diag;
661: PetscInt nz,idx,idt,j,i,oidx;
662: const PetscInt bs =A->rmap->bs,bs2=a->bs2;
663: const MatScalar *aa=a->a,*v;
664: PetscScalar s1,s2,s3,s4,s5,s6,x1,x2,x3,x4,x5,x6,*x;
667: VecCopy(bb,xx);
668: VecGetArray(xx,&x);
670: /* forward solve the U^T */
671: idx = 0;
672: for (i=0; i<n; i++) {
673: v = aa + bs2*diag[i];
674: /* multiply by the inverse of the block diagonal */
675: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
676: x5 = x[4+idx]; x6 = x[5+idx];
677: s1 = v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5 + v[5]*x6;
678: s2 = v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4 + v[10]*x5 + v[11]*x6;
679: s3 = v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4 + v[16]*x5 + v[17]*x6;
680: s4 = v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4 + v[22]*x5 + v[23]*x6;
681: s5 = v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4 + v[28]*x5 + v[29]*x6;
682: s6 = v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4 + v[34]*x5 + v[35]*x6;
683: v -= bs2;
685: vi = aj + diag[i] - 1;
686: nz = diag[i] - diag[i+1] - 1;
687: for (j=0; j>-nz; j--) {
688: oidx = bs*vi[j];
689: x[oidx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4 + v[4]*s5 + v[5]*s6;
690: x[oidx+1] -= v[6]*s1 + v[7]*s2 + v[8]*s3 + v[9]*s4 + v[10]*s5 + v[11]*s6;
691: x[oidx+2] -= v[12]*s1 + v[13]*s2 + v[14]*s3 + v[15]*s4 + v[16]*s5 + v[17]*s6;
692: x[oidx+3] -= v[18]*s1 + v[19]*s2 + v[20]*s3 + v[21]*s4 + v[22]*s5 + v[23]*s6;
693: x[oidx+4] -= v[24]*s1 + v[25]*s2 + v[26]*s3 + v[27]*s4 + v[28]*s5 + v[29]*s6;
694: x[oidx+5] -= v[30]*s1 + v[31]*s2 + v[32]*s3 + v[33]*s4 + v[34]*s5 + v[35]*s6;
695: v -= bs2;
696: }
697: x[idx] = s1;x[1+idx] = s2; x[2+idx] = s3; x[3+idx] = s4; x[4+idx] = s5;
698: x[5+idx] = s6;
699: idx += bs;
700: }
701: /* backward solve the L^T */
702: for (i=n-1; i>=0; i--) {
703: v = aa + bs2*ai[i];
704: vi = aj + ai[i];
705: nz = ai[i+1] - ai[i];
706: idt = bs*i;
707: s1 = x[idt]; s2 = x[1+idt]; s3 = x[2+idt]; s4 = x[3+idt]; s5 = x[4+idt];
708: s6 = x[5+idt];
709: for (j=0; j<nz; j++) {
710: idx = bs*vi[j];
711: x[idx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4 + v[4]*s5 + v[5]*s6;
712: x[idx+1] -= v[6]*s1 + v[7]*s2 + v[8]*s3 + v[9]*s4 + v[10]*s5 + v[11]*s6;
713: x[idx+2] -= v[12]*s1 + v[13]*s2 + v[14]*s3 + v[15]*s4 + v[16]*s5 + v[17]*s6;
714: x[idx+3] -= v[18]*s1 + v[19]*s2 + v[20]*s3 + v[21]*s4 + v[22]*s5 + v[23]*s6;
715: x[idx+4] -= v[24]*s1 + v[25]*s2 + v[26]*s3 + v[27]*s4 + v[28]*s5 + v[29]*s6;
716: x[idx+5] -= v[30]*s1 + v[31]*s2 + v[32]*s3 + v[33]*s4 + v[34]*s5 + v[35]*s6;
717: v += bs2;
718: }
719: }
720: VecRestoreArray(xx,&x);
721: PetscLogFlops(2.0*bs2*(a->nz) - bs*A->cmap->n);
722: return(0);
723: }
727: PetscErrorCode MatSolveTranspose_SeqBAIJ_7_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
728: {
729: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
730: PetscErrorCode ierr;
731: const PetscInt *diag=a->diag,n=a->mbs,*vi,*ai=a->i,*aj=a->j;
732: PetscInt i,nz,idx,idt,oidx;
733: const MatScalar *aa=a->a,*v;
734: PetscScalar s1,s2,s3,s4,s5,s6,s7,x1,x2,x3,x4,x5,x6,x7,*x;
737: VecCopy(bb,xx);
738: VecGetArray(xx,&x);
740: /* forward solve the U^T */
741: idx = 0;
742: for (i=0; i<n; i++) {
744: v = aa + 49*diag[i];
745: /* multiply by the inverse of the block diagonal */
746: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
747: x6 = x[5+idx]; x7 = x[6+idx];
748: s1 = v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5 + v[5]*x6 + v[6]*x7;
749: s2 = v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4 + v[11]*x5 + v[12]*x6 + v[13]*x7;
750: s3 = v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4 + v[18]*x5 + v[19]*x6 + v[20]*x7;
751: s4 = v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4 + v[25]*x5 + v[26]*x6 + v[27]*x7;
752: s5 = v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4 + v[32]*x5 + v[33]*x6 + v[34]*x7;
753: s6 = v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4 + v[39]*x5 + v[40]*x6 + v[41]*x7;
754: s7 = v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4 + v[46]*x5 + v[47]*x6 + v[48]*x7;
755: v += 49;
757: vi = aj + diag[i] + 1;
758: nz = ai[i+1] - diag[i] - 1;
759: while (nz--) {
760: oidx = 7*(*vi++);
761: x[oidx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4 + v[4]*s5 + v[5]*s6 + v[6]*s7;
762: x[oidx+1] -= v[7]*s1 + v[8]*s2 + v[9]*s3 + v[10]*s4 + v[11]*s5 + v[12]*s6 + v[13]*s7;
763: x[oidx+2] -= v[14]*s1 + v[15]*s2 + v[16]*s3 + v[17]*s4 + v[18]*s5 + v[19]*s6 + v[20]*s7;
764: x[oidx+3] -= v[21]*s1 + v[22]*s2 + v[23]*s3 + v[24]*s4 + v[25]*s5 + v[26]*s6 + v[27]*s7;
765: x[oidx+4] -= v[28]*s1 + v[29]*s2 + v[30]*s3 + v[31]*s4 + v[32]*s5 + v[33]*s6 + v[34]*s7;
766: x[oidx+5] -= v[35]*s1 + v[36]*s2 + v[37]*s3 + v[38]*s4 + v[39]*s5 + v[40]*s6 + v[41]*s7;
767: x[oidx+6] -= v[42]*s1 + v[43]*s2 + v[44]*s3 + v[45]*s4 + v[46]*s5 + v[47]*s6 + v[48]*s7;
768: v += 49;
769: }
770: x[idx] = s1;x[1+idx] = s2; x[2+idx] = s3;x[3+idx] = s4; x[4+idx] = s5;
771: x[5+idx] = s6;x[6+idx] = s7;
772: idx += 7;
773: }
774: /* backward solve the L^T */
775: for (i=n-1; i>=0; i--) {
776: v = aa + 49*diag[i] - 49;
777: vi = aj + diag[i] - 1;
778: nz = diag[i] - ai[i];
779: idt = 7*i;
780: s1 = x[idt]; s2 = x[1+idt]; s3 = x[2+idt];s4 = x[3+idt]; s5 = x[4+idt];
781: s6 = x[5+idt];s7 = x[6+idt];
782: while (nz--) {
783: idx = 7*(*vi--);
784: x[idx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4 + v[4]*s5 + v[5]*s6 + v[6]*s7;
785: x[idx+1] -= v[7]*s1 + v[8]*s2 + v[9]*s3 + v[10]*s4 + v[11]*s5 + v[12]*s6 + v[13]*s7;
786: x[idx+2] -= v[14]*s1 + v[15]*s2 + v[16]*s3 + v[17]*s4 + v[18]*s5 + v[19]*s6 + v[20]*s7;
787: x[idx+3] -= v[21]*s1 + v[22]*s2 + v[23]*s3 + v[24]*s4 + v[25]*s5 + v[26]*s6 + v[27]*s7;
788: x[idx+4] -= v[28]*s1 + v[29]*s2 + v[30]*s3 + v[31]*s4 + v[32]*s5 + v[33]*s6 + v[34]*s7;
789: x[idx+5] -= v[35]*s1 + v[36]*s2 + v[37]*s3 + v[38]*s4 + v[39]*s5 + v[40]*s6 + v[41]*s7;
790: x[idx+6] -= v[42]*s1 + v[43]*s2 + v[44]*s3 + v[45]*s4 + v[46]*s5 + v[47]*s6 + v[48]*s7;
791: v -= 49;
792: }
793: }
794: VecRestoreArray(xx,&x);
795: PetscLogFlops(2.0*49*(a->nz) - 7.0*A->cmap->n);
796: return(0);
797: }
800: PetscErrorCode MatSolveTranspose_SeqBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx)
801: {
802: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
803: PetscErrorCode ierr;
804: const PetscInt n=a->mbs,*vi,*ai=a->i,*aj=a->j,*diag=a->diag;
805: PetscInt nz,idx,idt,j,i,oidx;
806: const PetscInt bs =A->rmap->bs,bs2=a->bs2;
807: const MatScalar *aa=a->a,*v;
808: PetscScalar s1,s2,s3,s4,s5,s6,s7,x1,x2,x3,x4,x5,x6,x7,*x;
811: VecCopy(bb,xx);
812: VecGetArray(xx,&x);
814: /* forward solve the U^T */
815: idx = 0;
816: for (i=0; i<n; i++) {
817: v = aa + bs2*diag[i];
818: /* multiply by the inverse of the block diagonal */
819: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
820: x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
821: s1 = v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5 + v[5]*x6 + v[6]*x7;
822: s2 = v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4 + v[11]*x5 + v[12]*x6 + v[13]*x7;
823: s3 = v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4 + v[18]*x5 + v[19]*x6 + v[20]*x7;
824: s4 = v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4 + v[25]*x5 + v[26]*x6 + v[27]*x7;
825: s5 = v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4 + v[32]*x5 + v[33]*x6 + v[34]*x7;
826: s6 = v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4 + v[39]*x5 + v[40]*x6 + v[41]*x7;
827: s7 = v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4 + v[46]*x5 + v[47]*x6 + v[48]*x7;
828: v -= bs2;
829: vi = aj + diag[i] - 1;
830: nz = diag[i] - diag[i+1] - 1;
831: for (j=0; j>-nz; j--) {
832: oidx = bs*vi[j];
833: x[oidx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4 + v[4]*s5 + v[5]*s6 + v[6]*s7;
834: x[oidx+1] -= v[7]*s1 + v[8]*s2 + v[9]*s3 + v[10]*s4 + v[11]*s5 + v[12]*s6 + v[13]*s7;
835: x[oidx+2] -= v[14]*s1 + v[15]*s2 + v[16]*s3 + v[17]*s4 + v[18]*s5 + v[19]*s6 + v[20]*s7;
836: x[oidx+3] -= v[21]*s1 + v[22]*s2 + v[23]*s3 + v[24]*s4 + v[25]*s5 + v[26]*s6 + v[27]*s7;
837: x[oidx+4] -= v[28]*s1 + v[29]*s2 + v[30]*s3 + v[31]*s4 + v[32]*s5 + v[33]*s6 + v[34]*s7;
838: x[oidx+5] -= v[35]*s1 + v[36]*s2 + v[37]*s3 + v[38]*s4 + v[39]*s5 + v[40]*s6 + v[41]*s7;
839: x[oidx+6] -= v[42]*s1 + v[43]*s2 + v[44]*s3 + v[45]*s4 + v[46]*s5 + v[47]*s6 + v[48]*s7;
840: v -= bs2;
841: }
842: x[idx] = s1; x[1+idx] = s2; x[2+idx] = s3; x[3+idx] = s4; x[4+idx] = s5;
843: x[5+idx] = s6; x[6+idx] = s7;
844: idx += bs;
845: }
846: /* backward solve the L^T */
847: for (i=n-1; i>=0; i--) {
848: v = aa + bs2*ai[i];
849: vi = aj + ai[i];
850: nz = ai[i+1] - ai[i];
851: idt = bs*i;
852: s1 = x[idt]; s2 = x[1+idt]; s3 = x[2+idt]; s4 = x[3+idt]; s5 = x[4+idt];
853: s6 = x[5+idt]; s7 = x[6+idt];
854: for (j=0; j<nz; j++) {
855: idx = bs*vi[j];
856: x[idx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4 + v[4]*s5 + v[5]*s6 + v[6]*s7;
857: x[idx+1] -= v[7]*s1 + v[8]*s2 + v[9]*s3 + v[10]*s4 + v[11]*s5 + v[12]*s6 + v[13]*s7;
858: x[idx+2] -= v[14]*s1 + v[15]*s2 + v[16]*s3 + v[17]*s4 + v[18]*s5 + v[19]*s6 + v[20]*s7;
859: x[idx+3] -= v[21]*s1 + v[22]*s2 + v[23]*s3 + v[24]*s4 + v[25]*s5 + v[26]*s6 + v[27]*s7;
860: x[idx+4] -= v[28]*s1 + v[29]*s2 + v[30]*s3 + v[31]*s4 + v[32]*s5 + v[33]*s6 + v[34]*s7;
861: x[idx+5] -= v[35]*s1 + v[36]*s2 + v[37]*s3 + v[38]*s4 + v[39]*s5 + v[40]*s6 + v[41]*s7;
862: x[idx+6] -= v[42]*s1 + v[43]*s2 + v[44]*s3 + v[45]*s4 + v[46]*s5 + v[47]*s6 + v[48]*s7;
863: v += bs2;
864: }
865: }
866: VecRestoreArray(xx,&x);
867: PetscLogFlops(2.0*bs2*(a->nz) - bs*A->cmap->n);
868: return(0);
869: }