Actual source code: sbstrmfact.c
petsc-3.7.3 2016-08-01
1: #define PETSCMAT_DLL
3: #include <../src/mat/impls/sbaij/seq/sbaij.h>
4: #include <../src/mat/impls/sbaij/seq/sbstream/sbstream.h>
6: extern PetscErrorCode MatDestroy_SeqSBSTRM(Mat A);
11: PetscErrorCode MatSolve_SeqSBSTRM_4_inplace(Mat A,Vec bb,Vec xx)
12: {
13: Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data;
14: IS isrow=a->row;
15: PetscInt mbs =a->mbs,*ai=a->i,*aj=a->j;
17: const PetscInt *r;
18: PetscInt nz,*vj,k,idx;
19: PetscScalar *x,*b,x0,x1,x2,x3,*t,*tp;
21: Mat_SeqSBSTRM *sbstrm = (Mat_SeqSBSTRM*)A->spptr;
22: MatScalar *as =sbstrm->as,*diag;
23: PetscScalar tp0, tp1, tp2, tp3;
24: const MatScalar *v0, *v1, *v2, *v3;
25: PetscInt slen;
28: slen = 4*(ai[mbs]-ai[0]);
29: v0 = as + 16*ai[0];
30: v1 = v0 + slen;
31: v2 = v1 + slen;
32: v3 = v2 + slen;
34: VecGetArray(bb,&b);
35: VecGetArray(xx,&x);
36: t = a->solve_work;
37: ISGetIndices(isrow,&r);
39: /* solve U^T * D * y = b by forward substitution */
40: tp = t;
41: for (k=0; k<mbs; k++) { /* t <- perm(b) */
42: idx = 4*r[k];
43: tp[0] = b[idx];
44: tp[1] = b[idx+1];
45: tp[2] = b[idx+2];
46: tp[3] = b[idx+3];
47: tp += 4;
48: }
50: for (k=0; k<mbs; k++) {
51: vj = aj + ai[k];
52: tp = t + k*4;
53: x0 = tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3];
54: nz = ai[k+1] - ai[k];
56: tp = t + (*vj)*4;
57: while (nz--) {
58: tp[0] += v0[0]*x0 + v1[0]*x1 + v2[0]*x2 + v3[0]*x3;
59: tp[1] += v0[1]*x0 + v1[1]*x1 + v2[1]*x2 + v3[1]*x3;
60: tp[2] += v0[2]*x0 + v1[2]*x1 + v2[2]*x2 + v3[2]*x3;
61: tp[3] += v0[3]*x0 + v1[3]*x1 + v2[3]*x2 + v3[3]*x3;
62: vj++; tp = t + (*vj)*4;
64: v0 += 4; v1 += 4; v2 += 4; v3 += 4;
65: }
67: /* xk = inv(Dk)*(Dk*xk) */
68: diag = as+k*16; /* ptr to inv(Dk) */
69: tp = t + k*4;
70: tp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3;
71: tp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3;
72: tp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3;
73: tp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3;
74: }
76: /* solve U*x = y by back substitution */
77: for (k=mbs-1; k>=0; k--) {
78: vj = aj + ai[k+1];
79: tp = t + k*4;
80: x0 = tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; /* xk */
81: nz = ai[k+1] - ai[k];
83: while (nz--) {
84: /* xk += U(k,* */
85: v0 -= 4; v1 -= 4; v2 -= 4; v3 -= 4;
87: vj--; tp = t + (*vj)*4;
89: tp0 = tp[0]; tp1 = tp[1]; tp2 = tp[2]; tp3 = tp[3];
90: x0 += v0[3]*tp3 + v0[2]*tp2 + v0[1]*tp1 + v0[0]*tp0;
91: x1 += v1[3]*tp3 + v1[2]*tp2 + v1[1]*tp1 + v1[0]*tp0;
92: x2 += v2[3]*tp3 + v2[2]*tp2 + v2[1]*tp1 + v2[0]*tp0;
93: x3 += v3[3]*tp3 + v3[2]*tp2 + v3[1]*tp1 + v3[0]*tp0;
94: }
95: tp = t + k*4;
96: tp[0] = x0; tp[1]=x1; tp[2]=x2; tp[3]=x3;
98: idx = 4*r[k];
99: x[idx] = x0;
100: x[idx+1] = x1;
101: x[idx+2] = x2;
102: x[idx+3] = x3;
103: }
105: ISRestoreIndices(isrow,&r);
106: VecRestoreArray(bb,&b);
107: VecRestoreArray(xx,&x);
108: PetscLogFlops(4.0*a->bs2*a->nz - (A->rmap->bs+2.0*a->bs2)*mbs);
109: return(0);
110: }
114: PetscErrorCode MatForwardSolve_SeqSBSTRM_4_NaturalOrdering(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
115: {
116: MatScalar *diag;
117: PetscScalar *xp,x0,x1,x2,x3;
118: PetscInt nz,*vj,k;
120: const MatScalar *v0, *v1, *v2, *v3;
121: PetscInt slen;
124: slen = 4*(ai[mbs]-ai[0]);
125: v0 = aa + 16*ai[0];
126: v1 = v0 + slen;
127: v2 = v1 + slen;
128: v3 = v2 + slen;
130: for (k=0; k<mbs; k++) {
131: xp = x + k*4;
133: x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* Dk*xk = k-th block of x */
135: nz = ai[k+1] - ai[k];
136: vj = aj + ai[k];
137: xp = x + (*vj)*4;
138: while (nz--) {
139: /* += U(k,^T*(Dk*xk) */
140: xp[0] += v0[0]*x0 + v1[0]*x1 + v2[0]*x2 + v3[0]*x3;
141: xp[1] += v0[1]*x0 + v1[1]*x1 + v2[1]*x2 + v3[1]*x3;
142: xp[2] += v0[2]*x0 + v1[2]*x1 + v2[2]*x2 + v3[2]*x3;
143: xp[3] += v0[3]*x0 + v1[3]*x1 + v2[3]*x2 + v3[3]*x3;
144: vj++; xp = x + (*vj)*4;
146: v0 += 4; v1 += 4; v2 += 4; v3 += 4;
147: }
148: /* xk = inv(Dk)*(Dk*xk) */
149: diag = aa+k*16; /* ptr to inv(Dk) */
150: xp = x + k*4;
151: xp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3;
152: xp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3;
153: xp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3;
154: xp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3;
155: }
156: return(0);
157: }
161: PetscErrorCode MatBackwardSolve_SeqSBSTRM_4_NaturalOrdering(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
162: {
163: PetscScalar *xp,x0,x1,x2,x3;
164: PetscInt nz,*vj,k;
166: PetscScalar xp0, xp1, xp2, xp3;
167: const MatScalar *v0, *v1, *v2, *v3;
168: PetscInt slen;
171: slen = 4*(ai[mbs]-ai[0]);
172: v0 = aa + 16*ai[0]+4*(ai[mbs]-ai[0]);
173: v1 = v0 + slen;
174: v2 = v1 + slen;
175: v3 = v2 + slen;
177: for (k=mbs-1; k>=0; k--) {
178: xp = x + k*4;
179: x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* xk */
180: nz = ai[k+1] - ai[k];
181: vj = aj + ai[k+1];
182: while (nz--) {
183: /* xk += U(k,* */
184: v0 -= 4; v1 -= 4; v2 -= 4; v3 -=4;
186: vj--; xp = x + (*vj)*4;
188: xp0 = xp[0]; xp1 = xp[1]; xp2 = xp[2]; xp3 = xp[3];
189: x0 += v0[3]*xp3 + v0[2]*xp2 + v0[1]*xp1 + v0[0]*xp0;
190: x1 += v1[3]*xp3 + v1[2]*xp2 + v1[1]*xp1 + v1[0]*xp0;
191: x2 += v2[3]*xp3 + v2[2]*xp2 + v2[1]*xp1 + v2[0]*xp0;
192: x3 += v3[3]*xp3 + v3[2]*xp2 + v3[1]*xp1 + v3[0]*xp0;
193: }
194: xp = x + k*4;
196: xp[0] = x0; xp[1] = x1; xp[2] = x2; xp[3] = x3;
197: }
198: return(0);
199: }
203: PetscErrorCode MatSolve_SeqSBSTRM_4_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
204: {
205: Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data;
206: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
207: PetscScalar *x,*b;
210: Mat_SeqSBSTRM *sbstrm = (Mat_SeqSBSTRM*)A->spptr;
211: MatScalar *as =sbstrm->as;
214: #if 0
215: MatSolve_SeqSBSTRM_4_inplace(A, bb, xx);
216: #endif
217: VecGetArray(bb,&b);
218: VecGetArray(xx,&x);
219: /* solve U^T * D * y = b by forward substitution */
220: PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar)); /* x <- b */
221: MatForwardSolve_SeqSBSTRM_4_NaturalOrdering(ai,aj,as,mbs,x);
222: /* solve U*x = y by back substitution */
223: MatBackwardSolve_SeqSBSTRM_4_NaturalOrdering(ai,aj,as,mbs,x);
224: VecRestoreArray(bb,&b);
225: VecRestoreArray(xx,&x);
226: PetscLogFlops(4.0*a->bs2*a->nz - (A->rmap->bs+2.0*a->bs2)*mbs);
227: return(0);
228: }
232: PetscErrorCode MatForwardSolve_SeqSBSTRM_4_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
233: {
234: Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data;
235: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
236: PetscScalar *x,*b;
239: Mat_SeqSBSTRM *sbstrm = (Mat_SeqSBSTRM*)A->spptr;
240: MatScalar *as =sbstrm->as;
243: VecGetArray(bb,&b);
244: VecGetArray(xx,&x);
245: PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar)); /* x <- b */
246: MatForwardSolve_SeqSBSTRM_4_NaturalOrdering(ai,aj,as,mbs,x);
247: VecRestoreArray(bb,&b);
248: VecRestoreArray(xx,&x);
249: PetscLogFlops(2.0*a->bs2*a->nz - A->rmap->bs*mbs);
250: return(0);
251: }
255: PetscErrorCode MatBackwardSolve_SeqSBSTRM_4_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
256: {
257: Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data;
258: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
259: PetscScalar *x,*b;
262: Mat_SeqSBSTRM *sbstrm = (Mat_SeqSBSTRM*)A->spptr;
263: MatScalar *as =sbstrm->as;
266: VecGetArray(bb,&b);
267: VecGetArray(xx,&x);
268: PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));
269: MatBackwardSolve_SeqSBSTRM_4_NaturalOrdering(ai,aj,as,mbs,x);
270: VecRestoreArray(bb,&b);
271: VecRestoreArray(xx,&x);
272: PetscLogFlops(2.0*a->bs2*(a->nz-mbs));
273: return(0);
274: }
278: PetscErrorCode MatSolve_SeqSBSTRM_5_inplace(Mat A,Vec bb,Vec xx)
279: {
280: Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data;
281: IS isrow=a->row;
282: PetscInt mbs =a->mbs,*ai=a->i,*aj=a->j;
284: const PetscInt *r;
285: PetscInt nz,*vj,k,idx;
286: PetscScalar *x,*b,x0,x1,x2,x3,x4,*t,*tp;
288: Mat_SeqSBSTRM *sbstrm = (Mat_SeqSBSTRM*)A->spptr;
289: MatScalar *as =sbstrm->as,*diag;
290: PetscScalar tp0, tp1, tp2, tp3, tp4;
291: const MatScalar *v0, *v1, *v2, *v3, *v4;
292: PetscInt slen;
295: VecGetArray(bb,&b);
296: VecGetArray(xx,&x);
297: t = a->solve_work;
298: ISGetIndices(isrow,&r);
301: slen = 5*(ai[mbs]-ai[0]);
302: v0 = as + 25*ai[0];
303: v1 = v0 + slen;
304: v2 = v1 + slen;
305: v3 = v2 + slen;
306: v4 = v3 + slen;
308: /* solve U^T * D * y = b by forward substitution */
309: tp = t;
310: for (k=0; k<mbs; k++) { /* t <- perm(b) */
311: idx = 5*r[k];
312: tp[0] = b[idx];
313: tp[1] = b[idx+1];
314: tp[2] = b[idx+2];
315: tp[3] = b[idx+3];
316: tp[4] = b[idx+4];
317: tp += 5;
318: }
320: for (k=0; k<mbs; k++) {
321: vj = aj + ai[k];
322: tp = t + k*5;
323: x0 =tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];
324: nz = ai[k+1] - ai[k];
326: tp = t + (*vj)*5;
327: while (nz--) {
328: tp[0] += v0[0]*x0 + v1[0]*x1 + v2[0]*x2 + v3[0]*x3 + v4[0]*x4;
329: tp[1] += v0[1]*x0 + v1[1]*x1 + v2[1]*x2 + v3[1]*x3 + v4[1]*x4;
330: tp[2] += v0[2]*x0 + v1[2]*x1 + v2[2]*x2 + v3[2]*x3 + v4[2]*x4;
331: tp[3] += v0[3]*x0 + v1[3]*x1 + v2[3]*x2 + v3[3]*x3 + v4[3]*x4;
332: tp[4] += v0[4]*x0 + v1[4]*x1 + v2[4]*x2 + v3[4]*x3 + v4[4]*x4;
333: vj++; tp = t + (*vj)*5;
335: v0 += 5; v1 += 5; v2 += 5; v3 +=5; v4 += 5;
336: }
338: /* xk = inv(Dk)*(Dk*xk) */
339: diag = as+k*25; /* ptr to inv(Dk) */
340: tp = t + k*5;
341: tp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4;
342: tp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4;
343: tp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
344: tp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
345: tp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
346: }
348: /* solve U*x = y by back substitution */
349: for (k=mbs-1; k>=0; k--) {
350: vj = aj + ai[k+1];
351: tp = t + k*5;
352: x0 =tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; /* xk */
353: nz = ai[k+1] - ai[k];
355: while (nz--) {
356: /* xk += U(k,* */
357: v0 -= 5; v1 -= 5; v2 -= 5; v3 -=5; v4 -= 5;
359: vj--; tp = t + (*vj)*5;
360: tp0 = tp[0]; tp1 = tp[1]; tp2 = tp[2]; tp3 = tp[3]; tp4 = tp[4];
361: x0 += v0[4]*tp4 + v0[3]*tp3 + v0[2]*tp2 + v0[1]*tp1 + v0[0]*tp0;
362: x1 += v1[4]*tp4 + v1[3]*tp3 + v1[2]*tp2 + v1[1]*tp1 + v1[0]*tp0;
363: x2 += v2[4]*tp4 + v2[3]*tp3 + v2[2]*tp2 + v2[1]*tp1 + v2[0]*tp0;
364: x3 += v3[4]*tp4 + v3[3]*tp3 + v3[2]*tp2 + v3[1]*tp1 + v3[0]*tp0;
365: x4 += v4[4]*tp4 + v4[3]*tp3 + v4[2]*tp2 + v4[1]*tp1 + v4[0]*tp0;
366: }
367: tp = t + k*5;
368: tp[0] = x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4;
369: idx = 5*r[k];
371: x[idx] = x0;
372: x[idx+1] = x1;
373: x[idx+2] = x2;
374: x[idx+3] = x3;
375: x[idx+4] = x4;
376: }
378: ISRestoreIndices(isrow,&r);
379: VecRestoreArray(bb,&b);
380: VecRestoreArray(xx,&x);
381: PetscLogFlops(4.0*a->bs2*a->nz - (A->rmap->bs+2.0*a->bs2)*mbs);
382: return(0);
383: }
387: PetscErrorCode MatForwardSolve_SeqSBSTRM_5_NaturalOrdering(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
388: {
389: MatScalar *diag;
390: PetscScalar *xp,x0,x1,x2,x3,x4;
391: PetscInt nz,*vj,k;
393: const MatScalar *v0, *v1, *v2, *v3, *v4;
394: PetscInt slen;
397: slen = 5*(ai[mbs]-ai[0]);
398: v0 = aa + 25*ai[0];
399: v1 = v0 + slen;
400: v2 = v1 + slen;
401: v3 = v2 + slen;
402: v4 = v3 + slen;
404: for (k=0; k<mbs; k++) {
405: xp = x + k*5;
406: x0 = xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; /* Dk*xk = k-th block of x */
407: nz = ai[k+1] - ai[k];
408: vj = aj + ai[k];
409: xp = x + (*vj)*5;
410: while (nz--) {
411: /* += U(k,^T*(Dk*xk) */
412: xp[0] += v0[0]*x0 + v1[0]*x1 + v2[0]*x2 + v3[0]*x3 + v4[0]*x4;
413: xp[1] += v0[1]*x0 + v1[1]*x1 + v2[1]*x2 + v3[1]*x3 + v4[1]*x4;
414: xp[2] += v0[2]*x0 + v1[2]*x1 + v2[2]*x2 + v3[2]*x3 + v4[2]*x4;
415: xp[3] += v0[3]*x0 + v1[3]*x1 + v2[3]*x2 + v3[3]*x3 + v4[3]*x4;
416: xp[4] += v0[4]*x0 + v1[4]*x1 + v2[4]*x2 + v3[4]*x3 + v4[4]*x4;
418: vj++; xp = x + (*vj)*5;
420: v0 += 5; v1 += 5; v2 += 5; v3 +=5; v4 += 5;
421: }
422: /* xk = inv(Dk)*(Dk*xk) */
423: diag = aa+k*25; /* ptr to inv(Dk) */
424: xp = x + k*5;
425: xp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4;
426: xp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4;
427: xp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
428: xp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
429: xp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
430: }
431: return(0);
432: }
436: PetscErrorCode MatBackwardSolve_SeqSBSTRM_5_NaturalOrdering(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
437: {
438: PetscScalar *xp,x0,x1,x2,x3,x4;
439: PetscInt nz,*vj,k;
441: PetscScalar xp0, xp1, xp2, xp3, xp4;
442: const MatScalar *v0, *v1, *v2, *v3, *v4;
443: PetscInt slen;
446: slen = 5*(ai[mbs]-ai[0]);
447: v0 = aa + 25*ai[0]+5*(ai[mbs]-ai[0]);
448: v1 = v0 + slen;
449: v2 = v1 + slen;
450: v3 = v2 + slen;
451: v4 = v3 + slen;
453: for (k=mbs-1; k>=0; k--) {
455: xp = x + k*5;
456: x0 = xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; /* xk */
457: nz = ai[k+1] - ai[k];
459: vj = aj + ai[k+1];
461: while (nz--) {
462: /* xk += U(k,* */
463: v0 -= 5; v1 -= 5; v2 -= 5; v3 -=5; v4 -= 5;
465: vj--; xp = x + (*vj)*5;
467: xp4 = xp[4]; xp3 = xp[3]; xp2 = xp[2]; xp1 = xp[1]; xp0 = xp[0];
468: x0 += v0[4]*xp4 + v0[3]*xp3 + v0[2]*xp2 + v0[1]*xp1 + v0[0]*xp0;
469: x1 += v1[4]*xp4 + v1[3]*xp3 + v1[2]*xp2 + v1[1]*xp1 + v1[0]*xp0;
470: x2 += v2[4]*xp4 + v2[3]*xp3 + v2[2]*xp2 + v2[1]*xp1 + v2[0]*xp0;
471: x3 += v3[4]*xp4 + v3[3]*xp3 + v3[2]*xp2 + v3[1]*xp1 + v3[0]*xp0;
472: x4 += v4[4]*xp4 + v4[3]*xp3 + v4[2]*xp2 + v4[1]*xp1 + v4[0]*xp0;
473: }
474: xp = x + k*5;
476: xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4;
477: }
478: return(0);
479: }
483: PetscErrorCode MatSolve_SeqSBSTRM_5_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
484: {
485: Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data;
486: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
487: PetscScalar *x,*b;
490: Mat_SeqSBSTRM *sbstrm = (Mat_SeqSBSTRM*)A->spptr;
491: MatScalar *as =sbstrm->as;
494: VecGetArray(bb,&b);
495: VecGetArray(xx,&x);
497: /* solve U^T * D * y = b by forward substitution */
498: PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar)); /* x <- b */
499: MatForwardSolve_SeqSBSTRM_5_NaturalOrdering(ai,aj,as,mbs,x);
501: /* solve U*x = y by back substitution */
502: MatBackwardSolve_SeqSBSTRM_5_NaturalOrdering(ai,aj,as,mbs,x);
504: VecRestoreArray(bb,&b);
505: VecRestoreArray(xx,&x);
506: PetscLogFlops(4.0*a->bs2*a->nz - (A->rmap->bs+2.0*a->bs2)*mbs);
507: return(0);
508: }
512: PetscErrorCode MatForwardSolve_SeqSBSTRM_5_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
513: {
514: Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data;
515: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
516: PetscScalar *x,*b;
519: Mat_SeqSBSTRM *sbstrm = (Mat_SeqSBSTRM*)A->spptr;
520: MatScalar *as =sbstrm->as;
523: VecGetArray(bb,&b);
524: VecGetArray(xx,&x);
525: PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar)); /* x <- b */
526: MatForwardSolve_SeqSBSTRM_5_NaturalOrdering(ai,aj,as,mbs,x);
527: VecRestoreArray(bb,&b);
528: VecRestoreArray(xx,&x);
529: PetscLogFlops(2.0*a->bs2*a->nz - A->rmap->bs*mbs);
530: return(0);
531: }
535: PetscErrorCode MatBackwardSolve_SeqSBSTRM_5_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
536: {
537: Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data;
538: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
539: PetscScalar *x,*b;
541: Mat_SeqSBSTRM *sbstrm = (Mat_SeqSBSTRM*)A->spptr;
542: MatScalar *as =sbstrm->as;
545: VecGetArray(bb,&b);
546: VecGetArray(xx,&x);
547: PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));
548: MatBackwardSolve_SeqSBSTRM_5_NaturalOrdering(ai,aj,as,mbs,x);
549: VecRestoreArray(bb,&b);
550: VecRestoreArray(xx,&x);
551: PetscLogFlops(2.0*a->bs2*(a->nz-mbs));
552: return(0);
553: }
558: PetscErrorCode SeqSBSTRM_convertFact_sbstrm(Mat F)
559: {
560: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*) F->data;
561: Mat_SeqSBSTRM *sbstrm = (Mat_SeqSBSTRM*) F->spptr;
562: PetscInt m = a->mbs, bs = F->rmap->bs;
563: PetscInt *ai = a->i;
564: PetscInt i,j,ib,jb;
565: MatScalar *aa = a->a, *aau, *asu;
567: PetscInt bs2, rbs, cbs, blen, slen;
568: PetscScalar **asp;
571: sbstrm->rbs = bs;
572: sbstrm->cbs = bs;
574: rbs = cbs = bs;
575: bs2 = rbs*cbs;
576: blen = ai[m]-ai[0];
577: slen = blen*cbs;
579: if (sbstrm->as) {
580: PetscFree(sbstrm->as);
581: }
582: PetscMalloc1(bs2*ai[m], &sbstrm->as);
583: PetscMalloc1(rbs, &asp);
585: asu = sbstrm->as;
586: for (i=0; i<m*bs2; i++) asu[i] = aa[i];
588: asu = sbstrm->as + ai[0]*bs2;
589: aau = aa + ai[0]*bs2;
591: for (i=0; i<rbs; i++) asp[i] = asu + i*slen;
593: for (j=0;j<blen;j++) {
594: for (jb=0; jb<cbs; jb++) {
595: for (ib=0; ib<rbs; ib++) asp[ib][j*cbs+jb] = aau[j*bs2+jb*rbs+ib];
596: }
597: }
599: switch (bs) {
600: case 4:
601: F->ops->forwardsolve = MatForwardSolve_SeqSBSTRM_4_NaturalOrdering_inplace;
602: F->ops->backwardsolve = MatBackwardSolve_SeqSBSTRM_4_NaturalOrdering_inplace;
603: F->ops->solve = MatSolve_SeqSBSTRM_4_NaturalOrdering_inplace;
604: break;
605: case 5:
606: F->ops->forwardsolve = MatForwardSolve_SeqSBSTRM_5_NaturalOrdering_inplace;
607: F->ops->backwardsolve = MatBackwardSolve_SeqSBSTRM_5_NaturalOrdering_inplace;
608: F->ops->solve = MatSolve_SeqSBSTRM_5_NaturalOrdering_inplace;
609: break;
610: default:
611: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"not supported for block size %D",bs);
612: }
613: #if 0
614: #endif
616: PetscFree(asp);
617: return(0);
618: }
620: /*=========================================================*/
624: PetscErrorCode MatFactorGetSolverPackage_seqsbaij_sbstrm(Mat A,const MatSolverPackage *type)
625: {
627: *type = MATSOLVERSBSTRM;
628: return(0);
629: }
633: PetscErrorCode MatCholeskyFactorNumeric_sbstrm(Mat F,Mat A,const MatFactorInfo *info)
634: {
635: PetscInt bs = A->rmap->bs;
639: switch (bs) {
640: case 4:
641: MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering(F,A,info);
642: break;
643: case 5:
644: MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering(F,A,info);
645: break;
646: default:
647: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"not supported for block size %D",bs);
648: }
650: SeqSBSTRM_convertFact_sbstrm(F);
651: return(0);
652: }
653: /*=========================================================*/
656: PetscErrorCode MatICCFactorSymbolic_sbstrm(Mat B,Mat A,IS perm,const MatFactorInfo *info)
657: {
658: PetscInt ierr;
661: (MatICCFactorSymbolic_SeqSBAIJ)(B,A,perm,info);
663: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_sbstrm;
664: return(0);
665: }
666: /*=========================================================*/
669: PetscErrorCode MatCholeskyFactorSymbolic_sbstrm(Mat B,Mat A,IS perm,const MatFactorInfo *info)
670: {
671: PetscInt ierr;
674: (MatCholeskyFactorSymbolic_SeqSBAIJ)(B,A,perm,info);
676: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_sbstrm;
677: return(0);
678: }
679: /*=========================================================*/
683: PETSC_INTERN PetscErrorCode MatGetFactor_seqsbaij_sbstrm(Mat A,MatFactorType ftype,Mat *F)
684: {
685: Mat B;
686: PetscInt bs = A->rmap->bs;
687: Mat_SeqSBSTRM *sbstrm;
691: if (A->cmap->N != A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
692: MatCreate(PetscObjectComm((PetscObject)A),&B);
693: MatSetSizes(B,A->rmap->n,A->cmap->n,PETSC_DETERMINE,PETSC_DETERMINE);
694: MatSetType(B,((PetscObject)A)->type_name);
695: MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,MAT_SKIP_ALLOCATION,NULL);
698: B->ops->iccfactorsymbolic = MatICCFactorSymbolic_sbstrm;
699: B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_sbstrm;
700: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_sbstrm;
702: B->ops->destroy = MatDestroy_SeqSBSTRM;
703: B->factortype = ftype;
704: B->assembled = PETSC_TRUE; /* required by -ksp_view */
705: B->preallocated = PETSC_TRUE;
707: PetscNewLog(B,&sbstrm);
709: PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverPackage_C",MatFactorGetSolverPackage_seqsbaij_sbstrm);
711: PetscFree(B->solvertype);
712: PetscStrallocpy(MATSOLVERSBSTRM,&B->solvertype);
714: B->spptr = sbstrm;
715: *F = B;
716: return(0);
717: }