Actual source code: baijsolvtrannat.c
petsc-3.9.4 2018-09-11
1: #include <../src/mat/impls/baij/seq/baij.h>
3: PetscErrorCode MatSolveTranspose_SeqBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)
4: {
5: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
6: PetscErrorCode ierr;
7: const PetscInt *adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
8: PetscInt i,n = a->mbs,j;
9: PetscInt nz;
10: PetscScalar *x,*tmp,s1;
11: const MatScalar *aa = a->a,*v;
12: const PetscScalar *b;
15: VecGetArrayRead(bb,&b);
16: VecGetArray(xx,&x);
17: tmp = a->solve_work;
20: /* copy the b into temp work space according to permutation */
21: for (i=0; i<n; i++) tmp[i] = b[i];
23: /* forward solve the U^T */
24: for (i=0; i<n; i++) {
25: v = aa + adiag[i+1] + 1;
26: vi = aj + adiag[i+1] + 1;
27: nz = adiag[i] - adiag[i+1] - 1;
28: s1 = tmp[i];
29: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
30: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
31: tmp[i] = s1;
32: }
34: /* backward solve the L^T */
35: for (i=n-1; i>=0; i--) {
36: v = aa + ai[i];
37: vi = aj + ai[i];
38: nz = ai[i+1] - ai[i];
39: s1 = tmp[i];
40: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
41: }
43: /* copy tmp into x according to permutation */
44: for (i=0; i<n; i++) x[i] = tmp[i];
46: VecRestoreArrayRead(bb,&b);
47: VecRestoreArray(xx,&x);
49: PetscLogFlops(2.0*a->nz-A->cmap->n);
50: return(0);
51: }
53: PetscErrorCode MatSolveTranspose_SeqBAIJ_1_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
54: {
55: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
56: PetscErrorCode ierr;
57: PetscInt i,nz;
58: const PetscInt *diag = a->diag,n=a->mbs,*vi,*ai=a->i,*aj=a->j;
59: const MatScalar *aa =a->a,*v;
60: PetscScalar s1,*x;
63: VecCopy(bb,xx);
64: VecGetArray(xx,&x);
66: /* forward solve the U^T */
67: for (i=0; i<n; i++) {
69: v = aa + diag[i];
70: /* multiply by the inverse of the block diagonal */
71: s1 = (*v++)*x[i];
72: vi = aj + diag[i] + 1;
73: nz = ai[i+1] - diag[i] - 1;
74: while (nz--) {
75: x[*vi++] -= (*v++)*s1;
76: }
77: x[i] = s1;
78: }
79: /* backward solve the L^T */
80: for (i=n-1; i>=0; i--) {
81: v = aa + diag[i] - 1;
82: vi = aj + diag[i] - 1;
83: nz = diag[i] - ai[i];
84: s1 = x[i];
85: while (nz--) {
86: x[*vi--] -= (*v--)*s1;
87: }
88: }
89: VecRestoreArray(xx,&x);
90: PetscLogFlops(2.0*(a->nz) - A->cmap->n);
91: return(0);
92: }
94: PetscErrorCode MatSolveTranspose_SeqBAIJ_2_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
95: {
96: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
97: PetscErrorCode ierr;
98: PetscInt i,nz,idx,idt,oidx;
99: const PetscInt *diag = a->diag,*vi,n=a->mbs,*ai=a->i,*aj=a->j;
100: const MatScalar *aa =a->a,*v;
101: PetscScalar s1,s2,x1,x2,*x;
104: VecCopy(bb,xx);
105: VecGetArray(xx,&x);
107: /* forward solve the U^T */
108: idx = 0;
109: for (i=0; i<n; i++) {
111: v = aa + 4*diag[i];
112: /* multiply by the inverse of the block diagonal */
113: x1 = x[idx]; x2 = x[1+idx];
114: s1 = v[0]*x1 + v[1]*x2;
115: s2 = v[2]*x1 + v[3]*x2;
116: v += 4;
118: vi = aj + diag[i] + 1;
119: nz = ai[i+1] - diag[i] - 1;
120: while (nz--) {
121: oidx = 2*(*vi++);
122: x[oidx] -= v[0]*s1 + v[1]*s2;
123: x[oidx+1] -= v[2]*s1 + v[3]*s2;
124: v += 4;
125: }
126: x[idx] = s1;x[1+idx] = s2;
127: idx += 2;
128: }
129: /* backward solve the L^T */
130: for (i=n-1; i>=0; i--) {
131: v = aa + 4*diag[i] - 4;
132: vi = aj + diag[i] - 1;
133: nz = diag[i] - ai[i];
134: idt = 2*i;
135: s1 = x[idt]; s2 = x[1+idt];
136: while (nz--) {
137: idx = 2*(*vi--);
138: x[idx] -= v[0]*s1 + v[1]*s2;
139: x[idx+1] -= v[2]*s1 + v[3]*s2;
140: v -= 4;
141: }
142: }
143: VecRestoreArray(xx,&x);
144: PetscLogFlops(2.0*4.0*(a->nz) - 2.0*A->cmap->n);
145: return(0);
146: }
148: PetscErrorCode MatSolveTranspose_SeqBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
149: {
150: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
151: PetscErrorCode ierr;
152: const PetscInt n=a->mbs,*vi,*ai=a->i,*aj=a->j,*diag=a->diag;
153: PetscInt nz,idx,idt,j,i,oidx;
154: const PetscInt bs =A->rmap->bs,bs2=a->bs2;
155: const MatScalar *aa=a->a,*v;
156: PetscScalar s1,s2,x1,x2,*x;
159: VecCopy(bb,xx);
160: VecGetArray(xx,&x);
162: /* forward solve the U^T */
163: idx = 0;
164: for (i=0; i<n; i++) {
165: v = aa + bs2*diag[i];
166: /* multiply by the inverse of the block diagonal */
167: x1 = x[idx]; x2 = x[1+idx];
168: s1 = v[0]*x1 + v[1]*x2;
169: s2 = v[2]*x1 + v[3]*x2;
170: v -= bs2;
172: vi = aj + diag[i] - 1;
173: nz = diag[i] - diag[i+1] - 1;
174: for (j=0; j>-nz; j--) {
175: oidx = bs*vi[j];
176: x[oidx] -= v[0]*s1 + v[1]*s2;
177: x[oidx+1] -= v[2]*s1 + v[3]*s2;
178: v -= bs2;
179: }
180: x[idx] = s1;x[1+idx] = s2;
181: idx += bs;
182: }
183: /* backward solve the L^T */
184: for (i=n-1; i>=0; i--) {
185: v = aa + bs2*ai[i];
186: vi = aj + ai[i];
187: nz = ai[i+1] - ai[i];
188: idt = bs*i;
189: s1 = x[idt]; s2 = x[1+idt];
190: for (j=0; j<nz; j++) {
191: idx = bs*vi[j];
192: x[idx] -= v[0]*s1 + v[1]*s2;
193: x[idx+1] -= v[2]*s1 + v[3]*s2;
194: v += bs2;
195: }
196: }
197: VecRestoreArray(xx,&x);
198: PetscLogFlops(2.0*bs2*(a->nz) - bs*A->cmap->n);
199: return(0);
200: }
202: PetscErrorCode MatSolveTranspose_SeqBAIJ_3_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
203: {
204: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
205: PetscErrorCode ierr;
206: const PetscInt n=a->mbs,*vi,*ai=a->i,*aj=a->j,*diag=a->diag;
207: PetscInt i,nz,idx,idt,oidx;
208: const MatScalar *aa=a->a,*v;
209: PetscScalar s1,s2,s3,x1,x2,x3,*x;
212: VecCopy(bb,xx);
213: VecGetArray(xx,&x);
215: /* forward solve the U^T */
216: idx = 0;
217: for (i=0; i<n; i++) {
219: v = aa + 9*diag[i];
220: /* multiply by the inverse of the block diagonal */
221: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
222: s1 = v[0]*x1 + v[1]*x2 + v[2]*x3;
223: s2 = v[3]*x1 + v[4]*x2 + v[5]*x3;
224: s3 = v[6]*x1 + v[7]*x2 + v[8]*x3;
225: v += 9;
227: vi = aj + diag[i] + 1;
228: nz = ai[i+1] - diag[i] - 1;
229: while (nz--) {
230: oidx = 3*(*vi++);
231: x[oidx] -= v[0]*s1 + v[1]*s2 + v[2]*s3;
232: x[oidx+1] -= v[3]*s1 + v[4]*s2 + v[5]*s3;
233: x[oidx+2] -= v[6]*s1 + v[7]*s2 + v[8]*s3;
234: v += 9;
235: }
236: x[idx] = s1;x[1+idx] = s2; x[2+idx] = s3;
237: idx += 3;
238: }
239: /* backward solve the L^T */
240: for (i=n-1; i>=0; i--) {
241: v = aa + 9*diag[i] - 9;
242: vi = aj + diag[i] - 1;
243: nz = diag[i] - ai[i];
244: idt = 3*i;
245: s1 = x[idt]; s2 = x[1+idt]; s3 = x[2+idt];
246: while (nz--) {
247: idx = 3*(*vi--);
248: x[idx] -= v[0]*s1 + v[1]*s2 + v[2]*s3;
249: x[idx+1] -= v[3]*s1 + v[4]*s2 + v[5]*s3;
250: x[idx+2] -= v[6]*s1 + v[7]*s2 + v[8]*s3;
251: v -= 9;
252: }
253: }
254: VecRestoreArray(xx,&x);
255: PetscLogFlops(2.0*9.0*(a->nz) - 3.0*A->cmap->n);
256: return(0);
257: }
259: PetscErrorCode MatSolveTranspose_SeqBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
260: {
261: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
262: PetscErrorCode ierr;
263: const PetscInt n=a->mbs,*vi,*ai=a->i,*aj=a->j,*diag=a->diag;
264: PetscInt nz,idx,idt,j,i,oidx;
265: const PetscInt bs =A->rmap->bs,bs2=a->bs2;
266: const MatScalar *aa=a->a,*v;
267: PetscScalar s1,s2,s3,x1,x2,x3,*x;
270: VecCopy(bb,xx);
271: VecGetArray(xx,&x);
273: /* forward solve the U^T */
274: idx = 0;
275: for (i=0; i<n; i++) {
276: v = aa + bs2*diag[i];
277: /* multiply by the inverse of the block diagonal */
278: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
279: s1 = v[0]*x1 + v[1]*x2 + v[2]*x3;
280: s2 = v[3]*x1 + v[4]*x2 + v[5]*x3;
281: s3 = v[6]*x1 + v[7]*x2 + v[8]*x3;
282: v -= bs2;
284: vi = aj + diag[i] - 1;
285: nz = diag[i] - diag[i+1] - 1;
286: for (j=0; j>-nz; j--) {
287: oidx = bs*vi[j];
288: x[oidx] -= v[0]*s1 + v[1]*s2 + v[2]*s3;
289: x[oidx+1] -= v[3]*s1 + v[4]*s2 + v[5]*s3;
290: x[oidx+2] -= v[6]*s1 + v[7]*s2 + v[8]*s3;
291: v -= bs2;
292: }
293: x[idx] = s1;x[1+idx] = s2; x[2+idx] = s3;
294: idx += bs;
295: }
296: /* backward solve the L^T */
297: for (i=n-1; i>=0; i--) {
298: v = aa + bs2*ai[i];
299: vi = aj + ai[i];
300: nz = ai[i+1] - ai[i];
301: idt = bs*i;
302: s1 = x[idt]; s2 = x[1+idt]; s3 = x[2+idt];
303: for (j=0; j<nz; j++) {
304: idx = bs*vi[j];
305: x[idx] -= v[0]*s1 + v[1]*s2 + v[2]*s3;
306: x[idx+1] -= v[3]*s1 + v[4]*s2 + v[5]*s3;
307: x[idx+2] -= v[6]*s1 + v[7]*s2 + v[8]*s3;
308: v += bs2;
309: }
310: }
311: VecRestoreArray(xx,&x);
312: PetscLogFlops(2.0*bs2*(a->nz) - bs*A->cmap->n);
313: return(0);
314: }
316: PetscErrorCode MatSolveTranspose_SeqBAIJ_4_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
317: {
318: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
319: PetscErrorCode ierr;
320: const PetscInt *diag=a->diag,n=a->mbs,*vi,*ai=a->i,*aj=a->j;
321: PetscInt i,nz,idx,idt,oidx;
322: const MatScalar *aa=a->a,*v;
323: PetscScalar s1,s2,s3,s4,x1,x2,x3,x4,*x;
326: VecCopy(bb,xx);
327: VecGetArray(xx,&x);
329: /* forward solve the U^T */
330: idx = 0;
331: for (i=0; i<n; i++) {
333: v = aa + 16*diag[i];
334: /* multiply by the inverse of the block diagonal */
335: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
336: s1 = v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
337: s2 = v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
338: s3 = v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
339: s4 = v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
340: v += 16;
342: vi = aj + diag[i] + 1;
343: nz = ai[i+1] - diag[i] - 1;
344: while (nz--) {
345: oidx = 4*(*vi++);
346: x[oidx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4;
347: x[oidx+1] -= v[4]*s1 + v[5]*s2 + v[6]*s3 + v[7]*s4;
348: x[oidx+2] -= v[8]*s1 + v[9]*s2 + v[10]*s3 + v[11]*s4;
349: x[oidx+3] -= v[12]*s1 + v[13]*s2 + v[14]*s3 + v[15]*s4;
350: v += 16;
351: }
352: x[idx] = s1;x[1+idx] = s2; x[2+idx] = s3;x[3+idx] = s4;
353: idx += 4;
354: }
355: /* backward solve the L^T */
356: for (i=n-1; i>=0; i--) {
357: v = aa + 16*diag[i] - 16;
358: vi = aj + diag[i] - 1;
359: nz = diag[i] - ai[i];
360: idt = 4*i;
361: s1 = x[idt]; s2 = x[1+idt]; s3 = x[2+idt];s4 = x[3+idt];
362: while (nz--) {
363: idx = 4*(*vi--);
364: x[idx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4;
365: x[idx+1] -= v[4]*s1 + v[5]*s2 + v[6]*s3 + v[7]*s4;
366: x[idx+2] -= v[8]*s1 + v[9]*s2 + v[10]*s3 + v[11]*s4;
367: x[idx+3] -= v[12]*s1 + v[13]*s2 + v[14]*s3 + v[15]*s4;
368: v -= 16;
369: }
370: }
371: VecRestoreArray(xx,&x);
372: PetscLogFlops(2.0*16*(a->nz) - 4.0*A->cmap->n);
373: return(0);
374: }
376: PetscErrorCode MatSolveTranspose_SeqBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx)
377: {
378: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
379: PetscErrorCode ierr;
380: const PetscInt n=a->mbs,*vi,*ai=a->i,*aj=a->j,*diag=a->diag;
381: PetscInt nz,idx,idt,j,i,oidx;
382: const PetscInt bs =A->rmap->bs,bs2=a->bs2;
383: const MatScalar *aa=a->a,*v;
384: PetscScalar s1,s2,s3,s4,x1,x2,x3,x4,*x;
387: VecCopy(bb,xx);
388: VecGetArray(xx,&x);
390: /* forward solve the U^T */
391: idx = 0;
392: for (i=0; i<n; i++) {
393: v = aa + bs2*diag[i];
394: /* multiply by the inverse of the block diagonal */
395: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
396: s1 = v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
397: s2 = v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
398: s3 = v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
399: s4 = v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
400: v -= bs2;
402: vi = aj + diag[i] - 1;
403: nz = diag[i] - diag[i+1] - 1;
404: for (j=0; j>-nz; j--) {
405: oidx = bs*vi[j];
406: x[oidx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4;
407: x[oidx+1] -= v[4]*s1 + v[5]*s2 + v[6]*s3 + v[7]*s4;
408: x[oidx+2] -= v[8]*s1 + v[9]*s2 + v[10]*s3 + v[11]*s4;
409: x[oidx+3] -= v[12]*s1 + v[13]*s2 + v[14]*s3 + v[15]*s4;
410: v -= bs2;
411: }
412: x[idx] = s1;x[1+idx] = s2; x[2+idx] = s3; x[3+idx] = s4;
413: idx += bs;
414: }
415: /* backward solve the L^T */
416: for (i=n-1; i>=0; i--) {
417: v = aa + bs2*ai[i];
418: vi = aj + ai[i];
419: nz = ai[i+1] - ai[i];
420: idt = bs*i;
421: s1 = x[idt]; s2 = x[1+idt]; s3 = x[2+idt]; s4 = x[3+idt];
422: for (j=0; j<nz; j++) {
423: idx = bs*vi[j];
424: x[idx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4;
425: x[idx+1] -= v[4]*s1 + v[5]*s2 + v[6]*s3 + v[7]*s4;
426: x[idx+2] -= v[8]*s1 + v[9]*s2 + v[10]*s3 + v[11]*s4;
427: x[idx+3] -= v[12]*s1 + v[13]*s2 + v[14]*s3 + v[15]*s4;
428: v += bs2;
429: }
430: }
431: VecRestoreArray(xx,&x);
432: PetscLogFlops(2.0*bs2*(a->nz) - bs*A->cmap->n);
433: return(0);
434: }
436: PetscErrorCode MatSolveTranspose_SeqBAIJ_5_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
437: {
438: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
439: PetscErrorCode ierr;
440: const PetscInt *diag=a->diag,n=a->mbs,*vi,*ai=a->i,*aj=a->j;
441: PetscInt i,nz,idx,idt,oidx;
442: const MatScalar *aa=a->a,*v;
443: PetscScalar s1,s2,s3,s4,s5,x1,x2,x3,x4,x5,*x;
446: VecCopy(bb,xx);
447: VecGetArray(xx,&x);
449: /* forward solve the U^T */
450: idx = 0;
451: for (i=0; i<n; i++) {
453: v = aa + 25*diag[i];
454: /* multiply by the inverse of the block diagonal */
455: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
456: s1 = v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
457: s2 = v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
458: s3 = v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
459: s4 = v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
460: s5 = v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
461: v += 25;
463: vi = aj + diag[i] + 1;
464: nz = ai[i+1] - diag[i] - 1;
465: while (nz--) {
466: oidx = 5*(*vi++);
467: x[oidx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4 + v[4]*s5;
468: x[oidx+1] -= v[5]*s1 + v[6]*s2 + v[7]*s3 + v[8]*s4 + v[9]*s5;
469: x[oidx+2] -= v[10]*s1 + v[11]*s2 + v[12]*s3 + v[13]*s4 + v[14]*s5;
470: x[oidx+3] -= v[15]*s1 + v[16]*s2 + v[17]*s3 + v[18]*s4 + v[19]*s5;
471: x[oidx+4] -= v[20]*s1 + v[21]*s2 + v[22]*s3 + v[23]*s4 + v[24]*s5;
472: v += 25;
473: }
474: x[idx] = s1;x[1+idx] = s2; x[2+idx] = s3;x[3+idx] = s4; x[4+idx] = s5;
475: idx += 5;
476: }
477: /* backward solve the L^T */
478: for (i=n-1; i>=0; i--) {
479: v = aa + 25*diag[i] - 25;
480: vi = aj + diag[i] - 1;
481: nz = diag[i] - ai[i];
482: idt = 5*i;
483: s1 = x[idt]; s2 = x[1+idt]; s3 = x[2+idt];s4 = x[3+idt]; s5 = x[4+idt];
484: while (nz--) {
485: idx = 5*(*vi--);
486: x[idx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4 + v[4]*s5;
487: x[idx+1] -= v[5]*s1 + v[6]*s2 + v[7]*s3 + v[8]*s4 + v[9]*s5;
488: x[idx+2] -= v[10]*s1 + v[11]*s2 + v[12]*s3 + v[13]*s4 + v[14]*s5;
489: x[idx+3] -= v[15]*s1 + v[16]*s2 + v[17]*s3 + v[18]*s4 + v[19]*s5;
490: x[idx+4] -= v[20]*s1 + v[21]*s2 + v[22]*s3 + v[23]*s4 + v[24]*s5;
491: v -= 25;
492: }
493: }
494: VecRestoreArray(xx,&x);
495: PetscLogFlops(2.0*25*(a->nz) - 5.0*A->cmap->n);
496: return(0);
497: }
499: PetscErrorCode MatSolveTranspose_SeqBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx)
500: {
501: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
502: PetscErrorCode ierr;
503: const PetscInt n=a->mbs,*vi,*ai=a->i,*aj=a->j,*diag=a->diag;
504: PetscInt nz,idx,idt,j,i,oidx;
505: const PetscInt bs =A->rmap->bs,bs2=a->bs2;
506: const MatScalar *aa=a->a,*v;
507: PetscScalar s1,s2,s3,s4,s5,x1,x2,x3,x4,x5,*x;
510: VecCopy(bb,xx);
511: VecGetArray(xx,&x);
513: /* forward solve the U^T */
514: idx = 0;
515: for (i=0; i<n; i++) {
516: v = aa + bs2*diag[i];
517: /* multiply by the inverse of the block diagonal */
518: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
519: x5 = x[4+idx];
520: s1 = v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
521: s2 = v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
522: s3 = v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
523: s4 = v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
524: s5 = v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
525: v -= bs2;
527: vi = aj + diag[i] - 1;
528: nz = diag[i] - diag[i+1] - 1;
529: for (j=0; j>-nz; j--) {
530: oidx = bs*vi[j];
531: x[oidx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4 + v[4]*s5;
532: x[oidx+1] -= v[5]*s1 + v[6]*s2 + v[7]*s3 + v[8]*s4 + v[9]*s5;
533: x[oidx+2] -= v[10]*s1 + v[11]*s2 + v[12]*s3 + v[13]*s4 + v[14]*s5;
534: x[oidx+3] -= v[15]*s1 + v[16]*s2 + v[17]*s3 + v[18]*s4 + v[19]*s5;
535: x[oidx+4] -= v[20]*s1 + v[21]*s2 + v[22]*s3 + v[23]*s4 + v[24]*s5;
536: v -= bs2;
537: }
538: x[idx] = s1;x[1+idx] = s2; x[2+idx] = s3; x[3+idx] = s4; x[4+idx] = s5;
539: idx += bs;
540: }
541: /* backward solve the L^T */
542: for (i=n-1; i>=0; i--) {
543: v = aa + bs2*ai[i];
544: vi = aj + ai[i];
545: nz = ai[i+1] - ai[i];
546: idt = bs*i;
547: s1 = x[idt]; s2 = x[1+idt]; s3 = x[2+idt]; s4 = x[3+idt]; s5 = x[4+idt];
548: for (j=0; j<nz; j++) {
549: idx = bs*vi[j];
550: x[idx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4 + v[4]*s5;
551: x[idx+1] -= v[5]*s1 + v[6]*s2 + v[7]*s3 + v[8]*s4 + v[9]*s5;
552: x[idx+2] -= v[10]*s1 + v[11]*s2 + v[12]*s3 + v[13]*s4 + v[14]*s5;
553: x[idx+3] -= v[15]*s1 + v[16]*s2 + v[17]*s3 + v[18]*s4 + v[19]*s5;
554: x[idx+4] -= v[20]*s1 + v[21]*s2 + v[22]*s3 + v[23]*s4 + v[24]*s5;
555: v += bs2;
556: }
557: }
558: VecRestoreArray(xx,&x);
559: PetscLogFlops(2.0*bs2*(a->nz) - bs*A->cmap->n);
560: return(0);
561: }
563: PetscErrorCode MatSolveTranspose_SeqBAIJ_6_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
564: {
565: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
566: PetscErrorCode ierr;
567: const PetscInt *diag=a->diag,n=a->mbs,*vi,*ai=a->i,*aj=a->j;
568: PetscInt i,nz,idx,idt,oidx;
569: const MatScalar *aa=a->a,*v;
570: PetscScalar s1,s2,s3,s4,s5,s6,x1,x2,x3,x4,x5,x6,*x;
573: VecCopy(bb,xx);
574: VecGetArray(xx,&x);
576: /* forward solve the U^T */
577: idx = 0;
578: for (i=0; i<n; i++) {
580: v = aa + 36*diag[i];
581: /* multiply by the inverse of the block diagonal */
582: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
583: x6 = x[5+idx];
584: s1 = v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5 + v[5]*x6;
585: s2 = v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4 + v[10]*x5 + v[11]*x6;
586: s3 = v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4 + v[16]*x5 + v[17]*x6;
587: s4 = v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4 + v[22]*x5 + v[23]*x6;
588: s5 = v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4 + v[28]*x5 + v[29]*x6;
589: s6 = v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4 + v[34]*x5 + v[35]*x6;
590: v += 36;
592: vi = aj + diag[i] + 1;
593: nz = ai[i+1] - diag[i] - 1;
594: while (nz--) {
595: oidx = 6*(*vi++);
596: x[oidx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4 + v[4]*s5 + v[5]*s6;
597: x[oidx+1] -= v[6]*s1 + v[7]*s2 + v[8]*s3 + v[9]*s4 + v[10]*s5 + v[11]*s6;
598: x[oidx+2] -= v[12]*s1 + v[13]*s2 + v[14]*s3 + v[15]*s4 + v[16]*s5 + v[17]*s6;
599: x[oidx+3] -= v[18]*s1 + v[19]*s2 + v[20]*s3 + v[21]*s4 + v[22]*s5 + v[23]*s6;
600: x[oidx+4] -= v[24]*s1 + v[25]*s2 + v[26]*s3 + v[27]*s4 + v[28]*s5 + v[29]*s6;
601: x[oidx+5] -= v[30]*s1 + v[31]*s2 + v[32]*s3 + v[33]*s4 + v[34]*s5 + v[35]*s6;
602: v += 36;
603: }
604: x[idx] = s1;x[1+idx] = s2; x[2+idx] = s3;x[3+idx] = s4; x[4+idx] = s5;
605: x[5+idx] = s6;
606: idx += 6;
607: }
608: /* backward solve the L^T */
609: for (i=n-1; i>=0; i--) {
610: v = aa + 36*diag[i] - 36;
611: vi = aj + diag[i] - 1;
612: nz = diag[i] - ai[i];
613: idt = 6*i;
614: s1 = x[idt]; s2 = x[1+idt]; s3 = x[2+idt];s4 = x[3+idt]; s5 = x[4+idt];
615: s6 = x[5+idt];
616: while (nz--) {
617: idx = 6*(*vi--);
618: x[idx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4 + v[4]*s5 + v[5]*s6;
619: x[idx+1] -= v[6]*s1 + v[7]*s2 + v[8]*s3 + v[9]*s4 + v[10]*s5 + v[11]*s6;
620: x[idx+2] -= v[12]*s1 + v[13]*s2 + v[14]*s3 + v[15]*s4 + v[16]*s5 + v[17]*s6;
621: x[idx+3] -= v[18]*s1 + v[19]*s2 + v[20]*s3 + v[21]*s4 + v[22]*s5 + v[23]*s6;
622: x[idx+4] -= v[24]*s1 + v[25]*s2 + v[26]*s3 + v[27]*s4 + v[28]*s5 + v[29]*s6;
623: x[idx+5] -= v[30]*s1 + v[31]*s2 + v[32]*s3 + v[33]*s4 + v[34]*s5 + v[35]*s6;
624: v -= 36;
625: }
626: }
627: VecRestoreArray(xx,&x);
628: PetscLogFlops(2.0*36*(a->nz) - 6.0*A->cmap->n);
629: return(0);
630: }
632: PetscErrorCode MatSolveTranspose_SeqBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx)
633: {
634: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
635: PetscErrorCode ierr;
636: const PetscInt n=a->mbs,*vi,*ai=a->i,*aj=a->j,*diag=a->diag;
637: PetscInt nz,idx,idt,j,i,oidx;
638: const PetscInt bs =A->rmap->bs,bs2=a->bs2;
639: const MatScalar *aa=a->a,*v;
640: PetscScalar s1,s2,s3,s4,s5,s6,x1,x2,x3,x4,x5,x6,*x;
643: VecCopy(bb,xx);
644: VecGetArray(xx,&x);
646: /* forward solve the U^T */
647: idx = 0;
648: for (i=0; i<n; i++) {
649: v = aa + bs2*diag[i];
650: /* multiply by the inverse of the block diagonal */
651: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
652: x5 = x[4+idx]; x6 = x[5+idx];
653: s1 = v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5 + v[5]*x6;
654: s2 = v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4 + v[10]*x5 + v[11]*x6;
655: s3 = v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4 + v[16]*x5 + v[17]*x6;
656: s4 = v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4 + v[22]*x5 + v[23]*x6;
657: s5 = v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4 + v[28]*x5 + v[29]*x6;
658: s6 = v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4 + v[34]*x5 + v[35]*x6;
659: v -= bs2;
661: vi = aj + diag[i] - 1;
662: nz = diag[i] - diag[i+1] - 1;
663: for (j=0; j>-nz; j--) {
664: oidx = bs*vi[j];
665: x[oidx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4 + v[4]*s5 + v[5]*s6;
666: x[oidx+1] -= v[6]*s1 + v[7]*s2 + v[8]*s3 + v[9]*s4 + v[10]*s5 + v[11]*s6;
667: x[oidx+2] -= v[12]*s1 + v[13]*s2 + v[14]*s3 + v[15]*s4 + v[16]*s5 + v[17]*s6;
668: x[oidx+3] -= v[18]*s1 + v[19]*s2 + v[20]*s3 + v[21]*s4 + v[22]*s5 + v[23]*s6;
669: x[oidx+4] -= v[24]*s1 + v[25]*s2 + v[26]*s3 + v[27]*s4 + v[28]*s5 + v[29]*s6;
670: x[oidx+5] -= v[30]*s1 + v[31]*s2 + v[32]*s3 + v[33]*s4 + v[34]*s5 + v[35]*s6;
671: v -= bs2;
672: }
673: x[idx] = s1;x[1+idx] = s2; x[2+idx] = s3; x[3+idx] = s4; x[4+idx] = s5;
674: x[5+idx] = s6;
675: idx += bs;
676: }
677: /* backward solve the L^T */
678: for (i=n-1; i>=0; i--) {
679: v = aa + bs2*ai[i];
680: vi = aj + ai[i];
681: nz = ai[i+1] - ai[i];
682: idt = bs*i;
683: s1 = x[idt]; s2 = x[1+idt]; s3 = x[2+idt]; s4 = x[3+idt]; s5 = x[4+idt];
684: s6 = x[5+idt];
685: for (j=0; j<nz; j++) {
686: idx = bs*vi[j];
687: x[idx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4 + v[4]*s5 + v[5]*s6;
688: x[idx+1] -= v[6]*s1 + v[7]*s2 + v[8]*s3 + v[9]*s4 + v[10]*s5 + v[11]*s6;
689: x[idx+2] -= v[12]*s1 + v[13]*s2 + v[14]*s3 + v[15]*s4 + v[16]*s5 + v[17]*s6;
690: x[idx+3] -= v[18]*s1 + v[19]*s2 + v[20]*s3 + v[21]*s4 + v[22]*s5 + v[23]*s6;
691: x[idx+4] -= v[24]*s1 + v[25]*s2 + v[26]*s3 + v[27]*s4 + v[28]*s5 + v[29]*s6;
692: x[idx+5] -= v[30]*s1 + v[31]*s2 + v[32]*s3 + v[33]*s4 + v[34]*s5 + v[35]*s6;
693: v += bs2;
694: }
695: }
696: VecRestoreArray(xx,&x);
697: PetscLogFlops(2.0*bs2*(a->nz) - bs*A->cmap->n);
698: return(0);
699: }
701: PetscErrorCode MatSolveTranspose_SeqBAIJ_7_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
702: {
703: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
704: PetscErrorCode ierr;
705: const PetscInt *diag=a->diag,n=a->mbs,*vi,*ai=a->i,*aj=a->j;
706: PetscInt i,nz,idx,idt,oidx;
707: const MatScalar *aa=a->a,*v;
708: PetscScalar s1,s2,s3,s4,s5,s6,s7,x1,x2,x3,x4,x5,x6,x7,*x;
711: VecCopy(bb,xx);
712: VecGetArray(xx,&x);
714: /* forward solve the U^T */
715: idx = 0;
716: for (i=0; i<n; i++) {
718: v = aa + 49*diag[i];
719: /* multiply by the inverse of the block diagonal */
720: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
721: x6 = x[5+idx]; x7 = x[6+idx];
722: s1 = v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5 + v[5]*x6 + v[6]*x7;
723: s2 = v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4 + v[11]*x5 + v[12]*x6 + v[13]*x7;
724: s3 = v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4 + v[18]*x5 + v[19]*x6 + v[20]*x7;
725: s4 = v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4 + v[25]*x5 + v[26]*x6 + v[27]*x7;
726: s5 = v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4 + v[32]*x5 + v[33]*x6 + v[34]*x7;
727: s6 = v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4 + v[39]*x5 + v[40]*x6 + v[41]*x7;
728: s7 = v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4 + v[46]*x5 + v[47]*x6 + v[48]*x7;
729: v += 49;
731: vi = aj + diag[i] + 1;
732: nz = ai[i+1] - diag[i] - 1;
733: while (nz--) {
734: oidx = 7*(*vi++);
735: x[oidx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4 + v[4]*s5 + v[5]*s6 + v[6]*s7;
736: x[oidx+1] -= v[7]*s1 + v[8]*s2 + v[9]*s3 + v[10]*s4 + v[11]*s5 + v[12]*s6 + v[13]*s7;
737: x[oidx+2] -= v[14]*s1 + v[15]*s2 + v[16]*s3 + v[17]*s4 + v[18]*s5 + v[19]*s6 + v[20]*s7;
738: x[oidx+3] -= v[21]*s1 + v[22]*s2 + v[23]*s3 + v[24]*s4 + v[25]*s5 + v[26]*s6 + v[27]*s7;
739: x[oidx+4] -= v[28]*s1 + v[29]*s2 + v[30]*s3 + v[31]*s4 + v[32]*s5 + v[33]*s6 + v[34]*s7;
740: x[oidx+5] -= v[35]*s1 + v[36]*s2 + v[37]*s3 + v[38]*s4 + v[39]*s5 + v[40]*s6 + v[41]*s7;
741: x[oidx+6] -= v[42]*s1 + v[43]*s2 + v[44]*s3 + v[45]*s4 + v[46]*s5 + v[47]*s6 + v[48]*s7;
742: v += 49;
743: }
744: x[idx] = s1;x[1+idx] = s2; x[2+idx] = s3;x[3+idx] = s4; x[4+idx] = s5;
745: x[5+idx] = s6;x[6+idx] = s7;
746: idx += 7;
747: }
748: /* backward solve the L^T */
749: for (i=n-1; i>=0; i--) {
750: v = aa + 49*diag[i] - 49;
751: vi = aj + diag[i] - 1;
752: nz = diag[i] - ai[i];
753: idt = 7*i;
754: s1 = x[idt]; s2 = x[1+idt]; s3 = x[2+idt];s4 = x[3+idt]; s5 = x[4+idt];
755: s6 = x[5+idt];s7 = x[6+idt];
756: while (nz--) {
757: idx = 7*(*vi--);
758: x[idx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4 + v[4]*s5 + v[5]*s6 + v[6]*s7;
759: x[idx+1] -= v[7]*s1 + v[8]*s2 + v[9]*s3 + v[10]*s4 + v[11]*s5 + v[12]*s6 + v[13]*s7;
760: x[idx+2] -= v[14]*s1 + v[15]*s2 + v[16]*s3 + v[17]*s4 + v[18]*s5 + v[19]*s6 + v[20]*s7;
761: x[idx+3] -= v[21]*s1 + v[22]*s2 + v[23]*s3 + v[24]*s4 + v[25]*s5 + v[26]*s6 + v[27]*s7;
762: x[idx+4] -= v[28]*s1 + v[29]*s2 + v[30]*s3 + v[31]*s4 + v[32]*s5 + v[33]*s6 + v[34]*s7;
763: x[idx+5] -= v[35]*s1 + v[36]*s2 + v[37]*s3 + v[38]*s4 + v[39]*s5 + v[40]*s6 + v[41]*s7;
764: x[idx+6] -= v[42]*s1 + v[43]*s2 + v[44]*s3 + v[45]*s4 + v[46]*s5 + v[47]*s6 + v[48]*s7;
765: v -= 49;
766: }
767: }
768: VecRestoreArray(xx,&x);
769: PetscLogFlops(2.0*49*(a->nz) - 7.0*A->cmap->n);
770: return(0);
771: }
772: PetscErrorCode MatSolveTranspose_SeqBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx)
773: {
774: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
775: PetscErrorCode ierr;
776: const PetscInt n=a->mbs,*vi,*ai=a->i,*aj=a->j,*diag=a->diag;
777: PetscInt nz,idx,idt,j,i,oidx;
778: const PetscInt bs =A->rmap->bs,bs2=a->bs2;
779: const MatScalar *aa=a->a,*v;
780: PetscScalar s1,s2,s3,s4,s5,s6,s7,x1,x2,x3,x4,x5,x6,x7,*x;
783: VecCopy(bb,xx);
784: VecGetArray(xx,&x);
786: /* forward solve the U^T */
787: idx = 0;
788: for (i=0; i<n; i++) {
789: v = aa + bs2*diag[i];
790: /* multiply by the inverse of the block diagonal */
791: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
792: x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
793: s1 = v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5 + v[5]*x6 + v[6]*x7;
794: s2 = v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4 + v[11]*x5 + v[12]*x6 + v[13]*x7;
795: s3 = v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4 + v[18]*x5 + v[19]*x6 + v[20]*x7;
796: s4 = v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4 + v[25]*x5 + v[26]*x6 + v[27]*x7;
797: s5 = v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4 + v[32]*x5 + v[33]*x6 + v[34]*x7;
798: s6 = v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4 + v[39]*x5 + v[40]*x6 + v[41]*x7;
799: s7 = v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4 + v[46]*x5 + v[47]*x6 + v[48]*x7;
800: v -= bs2;
801: vi = aj + diag[i] - 1;
802: nz = diag[i] - diag[i+1] - 1;
803: for (j=0; j>-nz; j--) {
804: oidx = bs*vi[j];
805: x[oidx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4 + v[4]*s5 + v[5]*s6 + v[6]*s7;
806: x[oidx+1] -= v[7]*s1 + v[8]*s2 + v[9]*s3 + v[10]*s4 + v[11]*s5 + v[12]*s6 + v[13]*s7;
807: x[oidx+2] -= v[14]*s1 + v[15]*s2 + v[16]*s3 + v[17]*s4 + v[18]*s5 + v[19]*s6 + v[20]*s7;
808: x[oidx+3] -= v[21]*s1 + v[22]*s2 + v[23]*s3 + v[24]*s4 + v[25]*s5 + v[26]*s6 + v[27]*s7;
809: x[oidx+4] -= v[28]*s1 + v[29]*s2 + v[30]*s3 + v[31]*s4 + v[32]*s5 + v[33]*s6 + v[34]*s7;
810: x[oidx+5] -= v[35]*s1 + v[36]*s2 + v[37]*s3 + v[38]*s4 + v[39]*s5 + v[40]*s6 + v[41]*s7;
811: x[oidx+6] -= v[42]*s1 + v[43]*s2 + v[44]*s3 + v[45]*s4 + v[46]*s5 + v[47]*s6 + v[48]*s7;
812: v -= bs2;
813: }
814: x[idx] = s1; x[1+idx] = s2; x[2+idx] = s3; x[3+idx] = s4; x[4+idx] = s5;
815: x[5+idx] = s6; x[6+idx] = s7;
816: idx += bs;
817: }
818: /* backward solve the L^T */
819: for (i=n-1; i>=0; i--) {
820: v = aa + bs2*ai[i];
821: vi = aj + ai[i];
822: nz = ai[i+1] - ai[i];
823: idt = bs*i;
824: s1 = x[idt]; s2 = x[1+idt]; s3 = x[2+idt]; s4 = x[3+idt]; s5 = x[4+idt];
825: s6 = x[5+idt]; s7 = x[6+idt];
826: for (j=0; j<nz; j++) {
827: idx = bs*vi[j];
828: x[idx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4 + v[4]*s5 + v[5]*s6 + v[6]*s7;
829: x[idx+1] -= v[7]*s1 + v[8]*s2 + v[9]*s3 + v[10]*s4 + v[11]*s5 + v[12]*s6 + v[13]*s7;
830: x[idx+2] -= v[14]*s1 + v[15]*s2 + v[16]*s3 + v[17]*s4 + v[18]*s5 + v[19]*s6 + v[20]*s7;
831: x[idx+3] -= v[21]*s1 + v[22]*s2 + v[23]*s3 + v[24]*s4 + v[25]*s5 + v[26]*s6 + v[27]*s7;
832: x[idx+4] -= v[28]*s1 + v[29]*s2 + v[30]*s3 + v[31]*s4 + v[32]*s5 + v[33]*s6 + v[34]*s7;
833: x[idx+5] -= v[35]*s1 + v[36]*s2 + v[37]*s3 + v[38]*s4 + v[39]*s5 + v[40]*s6 + v[41]*s7;
834: x[idx+6] -= v[42]*s1 + v[43]*s2 + v[44]*s3 + v[45]*s4 + v[46]*s5 + v[47]*s6 + v[48]*s7;
835: v += bs2;
836: }
837: }
838: VecRestoreArray(xx,&x);
839: PetscLogFlops(2.0*bs2*(a->nz) - bs*A->cmap->n);
840: return(0);
841: }