Actual source code: sbstrmfact.c
petsc-3.6.1 2015-08-06
1: #define PETSCMAT_DLL
3: #include <../src/mat/impls/sbaij/seq/sbaij.h>
4: #include <../src/mat/impls/sbaij/seq/sbstream/sbstream.h>
6: extern PetscErrorCode MatDestroy_SeqSBSTRM(Mat A);
11: PetscErrorCode MatSolve_SeqSBSTRM_4_inplace(Mat A,Vec bb,Vec xx)
12: {
13: Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data;
14: IS isrow=a->row;
15: PetscInt mbs =a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2;
17: const PetscInt *r;
18: PetscInt nz,*vj,k,idx;
19: PetscScalar *x,*b,x0,x1,x2,x3,*t,*tp;
21: Mat_SeqSBSTRM *sbstrm = (Mat_SeqSBSTRM*)A->spptr;
22: MatScalar *as =sbstrm->as,*diag;
23: PetscScalar tp0, tp1, tp2, tp3;
24: const MatScalar *v0, *v1, *v2, *v3;
25: PetscInt slen;
28: slen = 4*(ai[mbs]-ai[0]);
29: v0 = as + 16*ai[0];
30: v1 = v0 + slen;
31: v2 = v1 + slen;
32: v3 = v2 + slen;
34: VecGetArray(bb,&b);
35: VecGetArray(xx,&x);
36: t = a->solve_work;
37: ISGetIndices(isrow,&r);
39: /* solve U^T * D * y = b by forward substitution */
40: tp = t;
41: for (k=0; k<mbs; k++) { /* t <- perm(b) */
42: idx = 4*r[k];
43: tp[0] = b[idx];
44: tp[1] = b[idx+1];
45: tp[2] = b[idx+2];
46: tp[3] = b[idx+3];
47: tp += 4;
48: }
50: for (k=0; k<mbs; k++) {
51: vj = aj + ai[k];
52: tp = t + k*4;
53: x0 = tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3];
54: nz = ai[k+1] - ai[k];
56: tp = t + (*vj)*4;
57: while (nz--) {
58: tp[0] += v0[0]*x0 + v1[0]*x1 + v2[0]*x2 + v3[0]*x3;
59: tp[1] += v0[1]*x0 + v1[1]*x1 + v2[1]*x2 + v3[1]*x3;
60: tp[2] += v0[2]*x0 + v1[2]*x1 + v2[2]*x2 + v3[2]*x3;
61: tp[3] += v0[3]*x0 + v1[3]*x1 + v2[3]*x2 + v3[3]*x3;
62: vj++; tp = t + (*vj)*4;
64: v0 += 4; v1 += 4; v2 += 4; v3 += 4;
65: }
67: /* xk = inv(Dk)*(Dk*xk) */
68: diag = as+k*16; /* ptr to inv(Dk) */
69: tp = t + k*4;
70: tp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3;
71: tp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3;
72: tp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3;
73: tp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3;
74: }
76: /* solve U*x = y by back substitution */
77: for (k=mbs-1; k>=0; k--) {
78: vj = aj + ai[k+1];
79: tp = t + k*4;
80: x0 = tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; /* xk */
81: nz = ai[k+1] - ai[k];
83: tp = t + (*vj)*4;
84: while (nz--) {
85: /* xk += U(k,* */
86: v0 -= 4; v1 -= 4; v2 -= 4; v3 -= 4;
88: vj--; tp = t + (*vj)*4;
90: tp0 = tp[0]; tp1 = tp[1]; tp2 = tp[2]; tp3 = tp[3];
91: x0 += v0[3]*tp3 + v0[2]*tp2 + v0[1]*tp1 + v0[0]*tp0;
92: x1 += v1[3]*tp3 + v1[2]*tp2 + v1[1]*tp1 + v1[0]*tp0;
93: x2 += v2[3]*tp3 + v2[2]*tp2 + v2[1]*tp1 + v2[0]*tp0;
94: x3 += v3[3]*tp3 + v3[2]*tp2 + v3[1]*tp1 + v3[0]*tp0;
95: }
96: tp = t + k*4;
97: tp[0] = x0; tp[1]=x1; tp[2]=x2; tp[3]=x3;
99: idx = 4*r[k];
100: x[idx] = x0;
101: x[idx+1] = x1;
102: x[idx+2] = x2;
103: x[idx+3] = x3;
104: }
106: ISRestoreIndices(isrow,&r);
107: VecRestoreArray(bb,&b);
108: VecRestoreArray(xx,&x);
109: PetscLogFlops(4.0*bs2*a->nz - (bs+2.0*bs2)*mbs);
110: return(0);
111: }
115: PetscErrorCode MatForwardSolve_SeqSBSTRM_4_NaturalOrdering(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
116: {
117: MatScalar *diag;
118: PetscScalar *xp,x0,x1,x2,x3;
119: PetscInt nz,*vj,k;
121: const MatScalar *v0, *v1, *v2, *v3;
122: PetscInt slen;
125: slen = 4*(ai[mbs]-ai[0]);
126: v0 = aa + 16*ai[0];
127: v1 = v0 + slen;
128: v2 = v1 + slen;
129: v3 = v2 + slen;
131: for (k=0; k<mbs; k++) {
132: xp = x + k*4;
134: x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* Dk*xk = k-th block of x */
136: nz = ai[k+1] - ai[k];
137: vj = aj + ai[k];
138: xp = x + (*vj)*4;
139: while (nz--) {
140: /* += U(k,^T*(Dk*xk) */
141: xp[0] += v0[0]*x0 + v1[0]*x1 + v2[0]*x2 + v3[0]*x3;
142: xp[1] += v0[1]*x0 + v1[1]*x1 + v2[1]*x2 + v3[1]*x3;
143: xp[2] += v0[2]*x0 + v1[2]*x1 + v2[2]*x2 + v3[2]*x3;
144: xp[3] += v0[3]*x0 + v1[3]*x1 + v2[3]*x2 + v3[3]*x3;
145: vj++; xp = x + (*vj)*4;
147: v0 += 4; v1 += 4; v2 += 4; v3 += 4;
148: }
149: /* xk = inv(Dk)*(Dk*xk) */
150: diag = aa+k*16; /* ptr to inv(Dk) */
151: xp = x + k*4;
152: xp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3;
153: xp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3;
154: xp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3;
155: xp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3;
156: }
157: return(0);
158: }
162: PetscErrorCode MatBackwardSolve_SeqSBSTRM_4_NaturalOrdering(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
163: {
164: PetscScalar *xp,x0,x1,x2,x3;
165: PetscInt nz,*vj,k;
167: PetscScalar xp0, xp1, xp2, xp3;
168: const MatScalar *v0, *v1, *v2, *v3;
169: PetscInt slen;
172: slen = 4*(ai[mbs]-ai[0]);
173: v0 = aa + 16*ai[0]+4*(ai[mbs]-ai[0]);
174: v1 = v0 + slen;
175: v2 = v1 + slen;
176: v3 = v2 + slen;
178: for (k=mbs-1; k>=0; k--) {
179: xp = x + k*4;
180: x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* xk */
181: nz = ai[k+1] - ai[k];
182: vj = aj + ai[k+1];
183: xp = x + (*vj)*4;
184: while (nz--) {
185: /* xk += U(k,* */
186: v0 -= 4; v1 -= 4; v2 -= 4; v3 -=4;
188: vj--; xp = x + (*vj)*4;
190: xp0 = xp[0]; xp1 = xp[1]; xp2 = xp[2]; xp3 = xp[3];
191: x0 += v0[3]*xp3 + v0[2]*xp2 + v0[1]*xp1 + v0[0]*xp0;
192: x1 += v1[3]*xp3 + v1[2]*xp2 + v1[1]*xp1 + v1[0]*xp0;
193: x2 += v2[3]*xp3 + v2[2]*xp2 + v2[1]*xp1 + v2[0]*xp0;
194: x3 += v3[3]*xp3 + v3[2]*xp2 + v3[1]*xp1 + v3[0]*xp0;
195: }
196: xp = x + k*4;
198: xp[0] = x0; xp[1] = x1; xp[2] = x2; xp[3] = x3;
199: }
200: return(0);
201: }
205: PetscErrorCode MatSolve_SeqSBSTRM_4_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
206: {
207: Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data;
208: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2;
209: PetscScalar *x,*b;
212: Mat_SeqSBSTRM *sbstrm = (Mat_SeqSBSTRM*)A->spptr;
213: MatScalar *as =sbstrm->as;
216: #if 0
217: MatSolve_SeqSBSTRM_4_inplace(A, bb, xx);
218: #endif
219: VecGetArray(bb,&b);
220: VecGetArray(xx,&x);
221: /* solve U^T * D * y = b by forward substitution */
222: PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar)); /* x <- b */
223: MatForwardSolve_SeqSBSTRM_4_NaturalOrdering(ai,aj,as,mbs,x);
224: /* solve U*x = y by back substitution */
225: MatBackwardSolve_SeqSBSTRM_4_NaturalOrdering(ai,aj,as,mbs,x);
226: VecRestoreArray(bb,&b);
227: VecRestoreArray(xx,&x);
228: PetscLogFlops(4.0*bs2*a->nz - (bs+2.0*bs2)*mbs);
229: return(0);
230: }
234: PetscErrorCode MatForwardSolve_SeqSBSTRM_4_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
235: {
236: Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data;
237: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2;
238: PetscScalar *x,*b;
241: Mat_SeqSBSTRM *sbstrm = (Mat_SeqSBSTRM*)A->spptr;
242: MatScalar *as =sbstrm->as;
245: VecGetArray(bb,&b);
246: VecGetArray(xx,&x);
247: PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar)); /* x <- b */
248: MatForwardSolve_SeqSBSTRM_4_NaturalOrdering(ai,aj,as,mbs,x);
249: VecRestoreArray(bb,&b);
250: VecRestoreArray(xx,&x);
251: PetscLogFlops(2.0*bs2*a->nz - bs*mbs);
252: return(0);
253: }
257: PetscErrorCode MatBackwardSolve_SeqSBSTRM_4_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
258: {
259: Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data;
260: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs2=a->bs2;
261: PetscScalar *x,*b;
264: Mat_SeqSBSTRM *sbstrm = (Mat_SeqSBSTRM*)A->spptr;
265: MatScalar *as =sbstrm->as;
268: VecGetArray(bb,&b);
269: VecGetArray(xx,&x);
270: PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));
271: MatBackwardSolve_SeqSBSTRM_4_NaturalOrdering(ai,aj,as,mbs,x);
272: VecRestoreArray(bb,&b);
273: VecRestoreArray(xx,&x);
274: PetscLogFlops(2.0*bs2*(a->nz-mbs));
275: return(0);
276: }
280: PetscErrorCode MatSolve_SeqSBSTRM_5_inplace(Mat A,Vec bb,Vec xx)
281: {
282: Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data;
283: IS isrow=a->row;
284: PetscInt mbs =a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2 = a->bs2;
286: const PetscInt *r;
287: PetscInt nz,*vj,k,idx;
288: PetscScalar *x,*b,x0,x1,x2,x3,x4,*t,*tp;
290: Mat_SeqSBSTRM *sbstrm = (Mat_SeqSBSTRM*)A->spptr;
291: MatScalar *as =sbstrm->as,*diag;
292: PetscScalar tp0, tp1, tp2, tp3, tp4;
293: const MatScalar *v0, *v1, *v2, *v3, *v4;
294: PetscInt slen;
297: VecGetArray(bb,&b);
298: VecGetArray(xx,&x);
299: t = a->solve_work;
300: ISGetIndices(isrow,&r);
303: slen = 5*(ai[mbs]-ai[0]);
304: v0 = as + 25*ai[0];
305: v1 = v0 + slen;
306: v2 = v1 + slen;
307: v3 = v2 + slen;
308: v4 = v3 + slen;
310: /* solve U^T * D * y = b by forward substitution */
311: tp = t;
312: for (k=0; k<mbs; k++) { /* t <- perm(b) */
313: idx = 5*r[k];
314: tp[0] = b[idx];
315: tp[1] = b[idx+1];
316: tp[2] = b[idx+2];
317: tp[3] = b[idx+3];
318: tp[4] = b[idx+4];
319: tp += 5;
320: }
322: for (k=0; k<mbs; k++) {
323: vj = aj + ai[k];
324: tp = t + k*5;
325: x0 =tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];
326: nz = ai[k+1] - ai[k];
328: tp = t + (*vj)*5;
329: while (nz--) {
330: tp[0] += v0[0]*x0 + v1[0]*x1 + v2[0]*x2 + v3[0]*x3 + v4[0]*x4;
331: tp[1] += v0[1]*x0 + v1[1]*x1 + v2[1]*x2 + v3[1]*x3 + v4[1]*x4;
332: tp[2] += v0[2]*x0 + v1[2]*x1 + v2[2]*x2 + v3[2]*x3 + v4[2]*x4;
333: tp[3] += v0[3]*x0 + v1[3]*x1 + v2[3]*x2 + v3[3]*x3 + v4[3]*x4;
334: tp[4] += v0[4]*x0 + v1[4]*x1 + v2[4]*x2 + v3[4]*x3 + v4[4]*x4;
335: vj++; tp = t + (*vj)*5;
337: v0 += 5; v1 += 5; v2 += 5; v3 +=5; v4 += 5;
338: }
340: /* xk = inv(Dk)*(Dk*xk) */
341: diag = as+k*25; /* ptr to inv(Dk) */
342: tp = t + k*5;
343: tp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4;
344: tp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4;
345: tp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
346: tp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
347: tp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
348: }
350: /* solve U*x = y by back substitution */
351: for (k=mbs-1; k>=0; k--) {
352: vj = aj + ai[k+1];
353: tp = t + k*5;
354: x0 =tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; /* xk */
355: nz = ai[k+1] - ai[k];
357: tp = t + (*vj)*5;
358: while (nz--) {
359: /* xk += U(k,* */
360: v0 -= 5; v1 -= 5; v2 -= 5; v3 -=5; v4 -= 5;
362: vj--; tp = t + (*vj)*5;
363: tp0 = tp[0]; tp1 = tp[1]; tp2 = tp[2]; tp3 = tp[3]; tp4 = tp[4];
364: x0 += v0[4]*tp4 + v0[3]*tp3 + v0[2]*tp2 + v0[1]*tp1 + v0[0]*tp0;
365: x1 += v1[4]*tp4 + v1[3]*tp3 + v1[2]*tp2 + v1[1]*tp1 + v1[0]*tp0;
366: x2 += v2[4]*tp4 + v2[3]*tp3 + v2[2]*tp2 + v2[1]*tp1 + v2[0]*tp0;
367: x3 += v3[4]*tp4 + v3[3]*tp3 + v3[2]*tp2 + v3[1]*tp1 + v3[0]*tp0;
368: x4 += v4[4]*tp4 + v4[3]*tp3 + v4[2]*tp2 + v4[1]*tp1 + v4[0]*tp0;
369: }
370: tp = t + k*5;
371: tp[0] = x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4;
372: idx = 5*r[k];
374: x[idx] = x0;
375: x[idx+1] = x1;
376: x[idx+2] = x2;
377: x[idx+3] = x3;
378: x[idx+4] = x4;
379: }
381: ISRestoreIndices(isrow,&r);
382: VecRestoreArray(bb,&b);
383: VecRestoreArray(xx,&x);
384: PetscLogFlops(4.0*bs2*a->nz - (bs+2.0*bs2)*mbs);
385: return(0);
386: }
390: PetscErrorCode MatForwardSolve_SeqSBSTRM_5_NaturalOrdering(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
391: {
392: MatScalar *diag;
393: PetscScalar *xp,x0,x1,x2,x3,x4;
394: PetscInt nz,*vj,k;
396: const MatScalar *v0, *v1, *v2, *v3, *v4;
397: PetscInt slen;
400: slen = 5*(ai[mbs]-ai[0]);
401: v0 = aa + 25*ai[0];
402: v1 = v0 + slen;
403: v2 = v1 + slen;
404: v3 = v2 + slen;
405: v4 = v3 + slen;
407: for (k=0; k<mbs; k++) {
408: xp = x + k*5;
409: x0 = xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; /* Dk*xk = k-th block of x */
410: nz = ai[k+1] - ai[k];
411: vj = aj + ai[k];
412: xp = x + (*vj)*5;
413: while (nz--) {
414: /* += U(k,^T*(Dk*xk) */
415: xp[0] += v0[0]*x0 + v1[0]*x1 + v2[0]*x2 + v3[0]*x3 + v4[0]*x4;
416: xp[1] += v0[1]*x0 + v1[1]*x1 + v2[1]*x2 + v3[1]*x3 + v4[1]*x4;
417: xp[2] += v0[2]*x0 + v1[2]*x1 + v2[2]*x2 + v3[2]*x3 + v4[2]*x4;
418: xp[3] += v0[3]*x0 + v1[3]*x1 + v2[3]*x2 + v3[3]*x3 + v4[3]*x4;
419: xp[4] += v0[4]*x0 + v1[4]*x1 + v2[4]*x2 + v3[4]*x3 + v4[4]*x4;
421: vj++; xp = x + (*vj)*5;
423: v0 += 5; v1 += 5; v2 += 5; v3 +=5; v4 += 5;
424: }
425: /* xk = inv(Dk)*(Dk*xk) */
426: diag = aa+k*25; /* ptr to inv(Dk) */
427: xp = x + k*5;
428: xp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4;
429: xp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4;
430: xp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
431: xp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
432: xp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
433: }
434: return(0);
435: }
439: PetscErrorCode MatBackwardSolve_SeqSBSTRM_5_NaturalOrdering(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
440: {
441: PetscScalar *xp,x0,x1,x2,x3,x4;
442: PetscInt nz,*vj,k;
444: PetscScalar xp0, xp1, xp2, xp3, xp4;
445: const MatScalar *v0, *v1, *v2, *v3, *v4;
446: PetscInt slen;
449: slen = 5*(ai[mbs]-ai[0]);
450: v0 = aa + 25*ai[0]+5*(ai[mbs]-ai[0]);
451: v1 = v0 + slen;
452: v2 = v1 + slen;
453: v3 = v2 + slen;
454: v4 = v3 + slen;
456: for (k=mbs-1; k>=0; k--) {
458: xp = x + k*5;
459: x0 = xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; /* xk */
460: nz = ai[k+1] - ai[k];
462: vj = aj + ai[k+1];
463: xp = x + (*vj)*5;
465: while (nz--) {
466: /* xk += U(k,* */
467: v0 -= 5; v1 -= 5; v2 -= 5; v3 -=5; v4 -= 5;
469: vj--; xp = x + (*vj)*5;
471: xp4 = xp[4]; xp3 = xp[3]; xp2 = xp[2]; xp1 = xp[1]; xp0 = xp[0];
472: x0 += v0[4]*xp4 + v0[3]*xp3 + v0[2]*xp2 + v0[1]*xp1 + v0[0]*xp0;
473: x1 += v1[4]*xp4 + v1[3]*xp3 + v1[2]*xp2 + v1[1]*xp1 + v1[0]*xp0;
474: x2 += v2[4]*xp4 + v2[3]*xp3 + v2[2]*xp2 + v2[1]*xp1 + v2[0]*xp0;
475: x3 += v3[4]*xp4 + v3[3]*xp3 + v3[2]*xp2 + v3[1]*xp1 + v3[0]*xp0;
476: x4 += v4[4]*xp4 + v4[3]*xp3 + v4[2]*xp2 + v4[1]*xp1 + v4[0]*xp0;
477: }
478: xp = x + k*5;
480: xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4;
481: }
482: return(0);
483: }
487: PetscErrorCode MatSolve_SeqSBSTRM_5_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
488: {
489: Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data;
490: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2 = a->bs2;
491: PetscScalar *x,*b;
494: Mat_SeqSBSTRM *sbstrm = (Mat_SeqSBSTRM*)A->spptr;
495: MatScalar *as =sbstrm->as;
498: #if 0
499: #endif
500: VecGetArray(bb,&b);
501: VecGetArray(xx,&x);
503: /* solve U^T * D * y = b by forward substitution */
504: PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar)); /* x <- b */
505: MatForwardSolve_SeqSBSTRM_5_NaturalOrdering(ai,aj,as,mbs,x);
507: /* solve U*x = y by back substitution */
508: MatBackwardSolve_SeqSBSTRM_5_NaturalOrdering(ai,aj,as,mbs,x);
510: VecRestoreArray(bb,&b);
511: VecRestoreArray(xx,&x);
512: PetscLogFlops(4.0*bs2*a->nz - (bs+2.0*bs2)*mbs);
513: return(0);
514: }
518: PetscErrorCode MatForwardSolve_SeqSBSTRM_5_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
519: {
520: Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data;
521: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2;
522: PetscScalar *x,*b;
525: Mat_SeqSBSTRM *sbstrm = (Mat_SeqSBSTRM*)A->spptr;
526: MatScalar *as =sbstrm->as;
529: VecGetArray(bb,&b);
530: VecGetArray(xx,&x);
531: PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar)); /* x <- b */
532: MatForwardSolve_SeqSBSTRM_5_NaturalOrdering(ai,aj,as,mbs,x);
533: VecRestoreArray(bb,&b);
534: VecRestoreArray(xx,&x);
535: PetscLogFlops(2.0*bs2*a->nz - bs*mbs);
536: return(0);
537: }
541: PetscErrorCode MatBackwardSolve_SeqSBSTRM_5_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
542: {
543: Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data;
544: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs2=a->bs2;
545: PetscScalar *x,*b;
547: Mat_SeqSBSTRM *sbstrm = (Mat_SeqSBSTRM*)A->spptr;
548: MatScalar *as =sbstrm->as;
551: VecGetArray(bb,&b);
552: VecGetArray(xx,&x);
553: PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));
554: MatBackwardSolve_SeqSBSTRM_5_NaturalOrdering(ai,aj,as,mbs,x);
555: VecRestoreArray(bb,&b);
556: VecRestoreArray(xx,&x);
557: PetscLogFlops(2.0*bs2*(a->nz-mbs));
558: return(0);
559: }
564: PetscErrorCode SeqSBSTRM_convertFact_sbstrm(Mat F)
565: {
566: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*) F->data;
567: Mat_SeqSBSTRM *sbstrm = (Mat_SeqSBSTRM*) F->spptr;
568: PetscInt m = a->mbs, bs = F->rmap->bs;
569: PetscInt *ai = a->i;
570: PetscInt i,j,ib,jb;
571: MatScalar *aa = a->a, *aau, *asu;
573: PetscInt bs2, rbs, cbs, blen, slen;
574: PetscScalar **asp;
577: sbstrm->rbs = bs;
578: sbstrm->cbs = bs;
580: rbs = cbs = bs;
581: bs2 = rbs*cbs;
582: blen = ai[m]-ai[0];
583: slen = blen*cbs;
585: if (sbstrm->as) {
586: PetscFree(sbstrm->as);
587: }
588: PetscMalloc1(bs2*ai[m], &sbstrm->as);
589: PetscMalloc1(rbs, &asp);
591: asu = sbstrm->as;
592: for (i=0; i<m*bs2; i++) asu[i] = aa[i];
594: asu = sbstrm->as + ai[0]*bs2;
595: aau = aa + ai[0]*bs2;
597: for (i=0; i<rbs; i++) asp[i] = asu + i*slen;
599: for (j=0;j<blen;j++) {
600: for (jb=0; jb<cbs; jb++) {
601: for (ib=0; ib<rbs; ib++) asp[ib][j*cbs+jb] = aau[j*bs2+jb*rbs+ib];
602: }
603: }
605: switch (bs) {
606: case 4:
607: F->ops->forwardsolve = MatForwardSolve_SeqSBSTRM_4_NaturalOrdering_inplace;
608: F->ops->backwardsolve = MatBackwardSolve_SeqSBSTRM_4_NaturalOrdering_inplace;
609: F->ops->solve = MatSolve_SeqSBSTRM_4_NaturalOrdering_inplace;
610: break;
611: case 5:
612: F->ops->forwardsolve = MatForwardSolve_SeqSBSTRM_5_NaturalOrdering_inplace;
613: F->ops->backwardsolve = MatBackwardSolve_SeqSBSTRM_5_NaturalOrdering_inplace;
614: F->ops->solve = MatSolve_SeqSBSTRM_5_NaturalOrdering_inplace;
615: break;
616: default:
617: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"not supported for block size %D",bs);
618: }
619: #if 0
620: #endif
622: PetscFree(asp);
623: return(0);
624: }
626: /*=========================================================*/
630: PetscErrorCode MatFactorGetSolverPackage_seqsbaij_sbstrm(Mat A,const MatSolverPackage *type)
631: {
633: *type = MATSOLVERSBSTRM;
634: return(0);
635: }
639: PetscErrorCode MatCholeskyFactorNumeric_sbstrm(Mat F,Mat A,const MatFactorInfo *info)
640: {
641: PetscInt bs = A->rmap->bs;
645: switch (bs) {
646: case 4:
647: MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering(F,A,info);
648: break;
649: case 5:
650: MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering(F,A,info);
651: break;
652: default:
653: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"not supported for block size %D",bs);
654: }
656: SeqSBSTRM_convertFact_sbstrm(F);
657: return(0);
658: }
659: /*=========================================================*/
662: PetscErrorCode MatICCFactorSymbolic_sbstrm(Mat B,Mat A,IS perm,const MatFactorInfo *info)
663: {
664: PetscInt ierr;
667: (MatICCFactorSymbolic_SeqSBAIJ)(B,A,perm,info);
669: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_sbstrm;
670: return(0);
671: }
672: /*=========================================================*/
675: PetscErrorCode MatCholeskyFactorSymbolic_sbstrm(Mat B,Mat A,IS perm,const MatFactorInfo *info)
676: {
677: PetscInt ierr;
680: (MatCholeskyFactorSymbolic_SeqSBAIJ)(B,A,perm,info);
682: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_sbstrm;
683: return(0);
684: }
685: /*=========================================================*/
689: PETSC_EXTERN PetscErrorCode MatGetFactor_seqsbaij_sbstrm(Mat A,MatFactorType ftype,Mat *F)
690: {
691: Mat B;
692: PetscInt bs = A->rmap->bs;
693: Mat_SeqSBSTRM *sbstrm;
697: if (A->cmap->N != A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
698: MatCreate(PetscObjectComm((PetscObject)A),&B);
699: MatSetSizes(B,A->rmap->n,A->cmap->n,PETSC_DETERMINE,PETSC_DETERMINE);
700: MatSetType(B,((PetscObject)A)->type_name);
701: MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,MAT_SKIP_ALLOCATION,NULL);
704: B->ops->iccfactorsymbolic = MatICCFactorSymbolic_sbstrm;
705: B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_sbstrm;
706: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_sbstrm;
708: B->ops->destroy = MatDestroy_SeqSBSTRM;
709: B->factortype = ftype;
710: B->assembled = PETSC_TRUE; /* required by -ksp_view */
711: B->preallocated = PETSC_TRUE;
713: PetscNewLog(B,&sbstrm);
715: PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverPackage_C",MatFactorGetSolverPackage_seqsbaij_sbstrm);
717: B->spptr = sbstrm;
718: *F = B;
719: return(0);
720: }