Actual source code: sbstrmfact.c
petsc-3.3-p7 2013-05-11
1: #define PETSCMAT_DLL
3: #include ../src/mat/impls/sbaij/seq/sbaij.h
4: #include ../src/mat/impls/sbaij/seq/sbstream/sbstream.h
6: extern PetscErrorCode MatDestroy_SeqSBSTRM(Mat A);
11: PetscErrorCode MatSolve_SeqSBSTRM_4_inplace(Mat A,Vec bb,Vec xx)
12: {
13: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
14: IS isrow=a->row;
15: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2;
17: const PetscInt *r;
18: PetscInt nz,*vj,k,idx;
19: PetscScalar *x,*b,x0,x1,x2,x3,*t,*tp;
21: Mat_SeqSBSTRM *sbstrm = (Mat_SeqSBSTRM *)A->spptr;
22: MatScalar *as=sbstrm->as,*diag;
23: PetscScalar tp0, tp1, tp2, tp3;
24: const MatScalar *v0, *v1, *v2, *v3;
25: PetscInt slen;
29: slen = 4*(ai[mbs]-ai[0]);
30: v0 = as + 16*ai[0];
31: v1 = v0 + slen;
32: v2 = v1 + slen;
33: v3 = v2 + slen;
35: VecGetArray(bb,&b);
36: VecGetArray(xx,&x);
37: t = a->solve_work;
38: ISGetIndices(isrow,&r);
40: /* solve U^T * D * y = b by forward substitution */
41: tp = t;
42: for (k=0; k<mbs; k++) { /* t <- perm(b) */
43: idx = 4*r[k];
44: tp[0] = b[idx];
45: tp[1] = b[idx+1];
46: tp[2] = b[idx+2];
47: tp[3] = b[idx+3];
48: tp += 4;
49: }
50:
51: for (k=0; k<mbs; k++){
52: vj = aj + ai[k];
53: tp = t + k*4;
54: x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3];
55: nz = ai[k+1] - ai[k];
57: tp = t + (*vj)*4;
58: while (nz--) {
59: tp[0] += v0[0]*x0 + v1[0]*x1 + v2[0]*x2 + v3[0]*x3;
60: tp[1] += v0[1]*x0 + v1[1]*x1 + v2[1]*x2 + v3[1]*x3;
61: tp[2] += v0[2]*x0 + v1[2]*x1 + v2[2]*x2 + v3[2]*x3;
62: tp[3] += v0[3]*x0 + v1[3]*x1 + v2[3]*x2 + v3[3]*x3;
63: vj++; tp = t + (*vj)*4;
64: v0 += 4; v1 += 4; v2 += 4; v3 += 4;
65: }
67: /* xk = inv(Dk)*(Dk*xk) */
68: diag = as+k*16; /* ptr to inv(Dk) */
69: tp = t + k*4;
70: tp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3;
71: tp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3;
72: tp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3;
73: tp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3;
74: }
76: /* solve U*x = y by back substitution */
77: for (k=mbs-1; k>=0; k--){
78: vj = aj + ai[k+1];
79: tp = t + k*4;
80: x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; /* xk */
81: nz = ai[k+1] - ai[k];
82:
83: tp = t + (*vj)*4;
84: while (nz--) {
85: /* xk += U(k,* */
86: v0 -= 4; v1 -= 4; v2 -= 4; v3 -= 4;
87: vj--; tp = t + (*vj)*4;
88: tp0 = tp[0]; tp1 = tp[1]; tp2 = tp[2]; tp3 = tp[3];
89: x0 += v0[3]*tp3 + v0[2]*tp2 + v0[1]*tp1 + v0[0]*tp0;
90: x1 += v1[3]*tp3 + v1[2]*tp2 + v1[1]*tp1 + v1[0]*tp0;
91: x2 += v2[3]*tp3 + v2[2]*tp2 + v2[1]*tp1 + v2[0]*tp0;
92: x3 += v3[3]*tp3 + v3[2]*tp2 + v3[1]*tp1 + v3[0]*tp0;
93: }
94: tp = t + k*4;
95: tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3;
96: idx = 4*r[k];
97: x[idx] = x0;
98: x[idx+1] = x1;
99: x[idx+2] = x2;
100: x[idx+3] = x3;
101: }
103: ISRestoreIndices(isrow,&r);
104: VecRestoreArray(bb,&b);
105: VecRestoreArray(xx,&x);
106: PetscLogFlops(4.0*bs2*a->nz - (bs+2.0*bs2)*mbs);
107: return(0);
108: }
112: PetscErrorCode MatForwardSolve_SeqSBSTRM_4_NaturalOrdering(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
113: {
114: MatScalar *diag;
115: PetscScalar *xp,x0,x1,x2,x3;
116: PetscInt nz,*vj,k;
118: const MatScalar *v0, *v1, *v2, *v3;
119: PetscInt slen;
123: slen = 4*(ai[mbs]-ai[0]);
124: v0 = aa + 16*ai[0];
125: v1 = v0 + slen;
126: v2 = v1 + slen;
127: v3 = v2 + slen;
129: for (k=0; k<mbs; k++){
130: xp = x + k*4;
131: x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* Dk*xk = k-th block of x */
132: nz = ai[k+1] - ai[k];
133: vj = aj + ai[k];
134: xp = x + (*vj)*4;
135: while (nz--) {
136: /* += U(k,^T*(Dk*xk) */
137: xp[0] += v0[0]*x0 + v1[0]*x1 + v2[0]*x2 + v3[0]*x3;
138: xp[1] += v0[1]*x0 + v1[1]*x1 + v2[1]*x2 + v3[1]*x3;
139: xp[2] += v0[2]*x0 + v1[2]*x1 + v2[2]*x2 + v3[2]*x3;
140: xp[3] += v0[3]*x0 + v1[3]*x1 + v2[3]*x2 + v3[3]*x3;
141: vj++; xp = x + (*vj)*4;
142: v0 += 4; v1 += 4; v2 += 4; v3 += 4;
143: }
144: /* xk = inv(Dk)*(Dk*xk) */
145: diag = aa+k*16; /* ptr to inv(Dk) */
146: xp = x + k*4;
147: xp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3;
148: xp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3;
149: xp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3;
150: xp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3;
151: }
152: return(0);
153: }
157: PetscErrorCode MatBackwardSolve_SeqSBSTRM_4_NaturalOrdering(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
158: {
159: PetscScalar *xp,x0,x1,x2,x3;
160: PetscInt nz,*vj,k;
162: PetscScalar xp0, xp1, xp2, xp3;
163: const MatScalar *v0, *v1, *v2, *v3;
164: PetscInt slen;
167: slen = 4*(ai[mbs]-ai[0]);
168: v0 = aa + 16*ai[0]+4*(ai[mbs]-ai[0]);
169: v1 = v0 + slen;
170: v2 = v1 + slen;
171: v3 = v2 + slen;
173: for (k=mbs-1; k>=0; k--){
174: xp = x + k*4;
175: x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* xk */
176: nz = ai[k+1] - ai[k];
177: vj = aj + ai[k+1];
178: xp = x + (*vj)*4;
179: while (nz--) {
180: /* xk += U(k,* */
181: v0 -= 4; v1 -= 4; v2 -= 4; v3 -=4;
182: vj--; xp = x + (*vj)*4;
183: xp0 = xp[0]; xp1 = xp[1]; xp2 = xp[2]; xp3 = xp[3];
184: x0 += v0[3]*xp3 + v0[2]*xp2 + v0[1]*xp1 + v0[0]*xp0;
185: x1 += v1[3]*xp3 + v1[2]*xp2 + v1[1]*xp1 + v1[0]*xp0;
186: x2 += v2[3]*xp3 + v2[2]*xp2 + v2[1]*xp1 + v2[0]*xp0;
187: x3 += v3[3]*xp3 + v3[2]*xp2 + v3[1]*xp1 + v3[0]*xp0;
188: }
189: xp = x + k*4;
190: xp[0] = x0; xp[1] = x1; xp[2] = x2; xp[3] = x3;
191: }
192: return(0);
193: }
197: PetscErrorCode MatSolve_SeqSBSTRM_4_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
198: {
199: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
200: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2;
201: PetscScalar *x,*b;
204: Mat_SeqSBSTRM *sbstrm = (Mat_SeqSBSTRM *)A->spptr;
205: MatScalar *as=sbstrm->as;
208: #if 0
209: MatSolve_SeqSBSTRM_4_inplace(A, bb, xx);
210: #endif
211: VecGetArray(bb,&b);
212: VecGetArray(xx,&x);
213: /* solve U^T * D * y = b by forward substitution */
214: PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar)); /* x <- b */
215: MatForwardSolve_SeqSBSTRM_4_NaturalOrdering(ai,aj,as,mbs,x);
216: /* solve U*x = y by back substitution */
217: MatBackwardSolve_SeqSBSTRM_4_NaturalOrdering(ai,aj,as,mbs,x);
218: VecRestoreArray(bb,&b);
219: VecRestoreArray(xx,&x);
220: PetscLogFlops(4.0*bs2*a->nz - (bs+2.0*bs2)*mbs);
221: return(0);
222: }
226: PetscErrorCode MatForwardSolve_SeqSBSTRM_4_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
227: {
228: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
229: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2;
230: PetscScalar *x,*b;
233: Mat_SeqSBSTRM *sbstrm = (Mat_SeqSBSTRM *)A->spptr;
234: MatScalar *as=sbstrm->as;
237: VecGetArray(bb,&b);
238: VecGetArray(xx,&x);
239: PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar)); /* x <- b */
240: MatForwardSolve_SeqSBSTRM_4_NaturalOrdering(ai,aj,as,mbs,x);
241: VecRestoreArray(bb,&b);
242: VecRestoreArray(xx,&x);
243: PetscLogFlops(2.0*bs2*a->nz - bs*mbs);
244: return(0);
245: }
249: PetscErrorCode MatBackwardSolve_SeqSBSTRM_4_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
250: {
251: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
252: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs2=a->bs2;
253: PetscScalar *x,*b;
256: Mat_SeqSBSTRM *sbstrm = (Mat_SeqSBSTRM *)A->spptr;
257: MatScalar *as=sbstrm->as;
260: VecGetArray(bb,&b);
261: VecGetArray(xx,&x);
262: PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));
263: MatBackwardSolve_SeqSBSTRM_4_NaturalOrdering(ai,aj,as,mbs,x);
264: VecRestoreArray(bb,&b);
265: VecRestoreArray(xx,&x);
266: PetscLogFlops(2.0*bs2*(a->nz-mbs));
267: return(0);
268: }
272: PetscErrorCode MatSolve_SeqSBSTRM_5_inplace(Mat A,Vec bb,Vec xx)
273: {
274: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
275: IS isrow=a->row;
276: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2 = a->bs2;
278: const PetscInt *r;
279: PetscInt nz,*vj,k,idx;
280: PetscScalar *x,*b,x0,x1,x2,x3,x4,*t,*tp;
282: Mat_SeqSBSTRM *sbstrm = (Mat_SeqSBSTRM *)A->spptr;
283: MatScalar *as=sbstrm->as,*diag;
284: PetscScalar tp0, tp1, tp2, tp3, tp4;
285: const MatScalar *v0, *v1, *v2, *v3, *v4;
286: PetscInt slen;
289: VecGetArray(bb,&b);
290: VecGetArray(xx,&x);
291: t = a->solve_work;
292: ISGetIndices(isrow,&r);
295: slen = 5*(ai[mbs]-ai[0]);
296: v0 = as + 25*ai[0];
297: v1 = v0 + slen;
298: v2 = v1 + slen;
299: v3 = v2 + slen;
300: v4 = v3 + slen;
302: /* solve U^T * D * y = b by forward substitution */
303: tp = t;
304: for (k=0; k<mbs; k++) { /* t <- perm(b) */
305: idx = 5*r[k];
306: tp[0] = b[idx];
307: tp[1] = b[idx+1];
308: tp[2] = b[idx+2];
309: tp[3] = b[idx+3];
310: tp[4] = b[idx+4];
311: tp += 5;
312: }
313:
314: for (k=0; k<mbs; k++){
315: vj = aj + ai[k];
316: tp = t + k*5;
317: x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];
318: nz = ai[k+1] - ai[k];
320: tp = t + (*vj)*5;
321: while (nz--) {
322: tp[0] += v0[0]*x0 + v1[0]*x1 + v2[0]*x2 + v3[0]*x3 + v4[0]*x4;
323: tp[1] += v0[1]*x0 + v1[1]*x1 + v2[1]*x2 + v3[1]*x3 + v4[1]*x4;
324: tp[2] += v0[2]*x0 + v1[2]*x1 + v2[2]*x2 + v3[2]*x3 + v4[2]*x4;
325: tp[3] += v0[3]*x0 + v1[3]*x1 + v2[3]*x2 + v3[3]*x3 + v4[3]*x4;
326: tp[4] += v0[4]*x0 + v1[4]*x1 + v2[4]*x2 + v3[4]*x3 + v4[4]*x4;
327: vj++; tp = t + (*vj)*5;
328: v0 += 5; v1 += 5; v2 += 5; v3 +=5; v4 += 5;
329: }
331: /* xk = inv(Dk)*(Dk*xk) */
332: diag = as+k*25; /* ptr to inv(Dk) */
333: tp = t + k*5;
334: tp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4;
335: tp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4;
336: tp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
337: tp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
338: tp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
339: }
341: /* solve U*x = y by back substitution */
342: for (k=mbs-1; k>=0; k--){
343: vj = aj + ai[k+1];
344: tp = t + k*5;
345: x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];/* xk */
346: nz = ai[k+1] - ai[k];
347:
348: tp = t + (*vj)*5;
349: while (nz--) {
350: /* xk += U(k,* */
351: v0 -= 5; v1 -= 5; v2 -= 5; v3 -=5; v4 -= 5;
352: vj--; tp = t + (*vj)*5;
353: tp0 = tp[0]; tp1 = tp[1]; tp2 = tp[2]; tp3 = tp[3]; tp4 = tp[4];
354: x0 += v0[4]*tp4 + v0[3]*tp3 + v0[2]*tp2 + v0[1]*tp1 + v0[0]*tp0;
355: x1 += v1[4]*tp4 + v1[3]*tp3 + v1[2]*tp2 + v1[1]*tp1 + v1[0]*tp0;
356: x2 += v2[4]*tp4 + v2[3]*tp3 + v2[2]*tp2 + v2[1]*tp1 + v2[0]*tp0;
357: x3 += v3[4]*tp4 + v3[3]*tp3 + v3[2]*tp2 + v3[1]*tp1 + v3[0]*tp0;
358: x4 += v4[4]*tp4 + v4[3]*tp3 + v4[2]*tp2 + v4[1]*tp1 + v4[0]*tp0;
359: }
360: tp = t + k*5;
361: tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4;
362: idx = 5*r[k];
363: x[idx] = x0;
364: x[idx+1] = x1;
365: x[idx+2] = x2;
366: x[idx+3] = x3;
367: x[idx+4] = x4;
368: }
370: ISRestoreIndices(isrow,&r);
371: VecRestoreArray(bb,&b);
372: VecRestoreArray(xx,&x);
373: PetscLogFlops(4.0*bs2*a->nz - (bs+2.0*bs2)*mbs);
374: return(0);
375: }
379: PetscErrorCode MatForwardSolve_SeqSBSTRM_5_NaturalOrdering(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
380: {
381: MatScalar *diag;
382: PetscScalar *xp,x0,x1,x2,x3,x4;
383: PetscInt nz,*vj,k;
385: const MatScalar *v0, *v1, *v2, *v3, *v4;
386: PetscInt slen;
389:
390:
392: slen = 5*(ai[mbs]-ai[0]);
393: v0 = aa + 25*ai[0];
394: v1 = v0 + slen;
395: v2 = v1 + slen;
396: v3 = v2 + slen;
397: v4 = v3 + slen;
399: for (k=0; k<mbs; k++){
400: xp = x + k*5;
401: x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* Dk*xk = k-th block of x */
402: nz = ai[k+1] - ai[k];
403: vj = aj + ai[k];
404: xp = x + (*vj)*5;
405: while (nz--) {
406: /* += U(k,^T*(Dk*xk) */
407: xp[0] += v0[0]*x0 + v1[0]*x1 + v2[0]*x2 + v3[0]*x3 + v4[0]*x4;
408: xp[1] += v0[1]*x0 + v1[1]*x1 + v2[1]*x2 + v3[1]*x3 + v4[1]*x4;
409: xp[2] += v0[2]*x0 + v1[2]*x1 + v2[2]*x2 + v3[2]*x3 + v4[2]*x4;
410: xp[3] += v0[3]*x0 + v1[3]*x1 + v2[3]*x2 + v3[3]*x3 + v4[3]*x4;
411: xp[4] += v0[4]*x0 + v1[4]*x1 + v2[4]*x2 + v3[4]*x3 + v4[4]*x4;
412: vj++; xp = x + (*vj)*5;
413: v0 += 5; v1 += 5; v2 += 5; v3 +=5; v4 += 5;
414: }
415: /* xk = inv(Dk)*(Dk*xk) */
416: diag = aa+k*25; /* ptr to inv(Dk) */
417: xp = x + k*5;
418: xp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4;
419: xp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4;
420: xp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
421: xp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
422: xp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
423: }
424: return(0);
425: }
429: PetscErrorCode MatBackwardSolve_SeqSBSTRM_5_NaturalOrdering(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
430: {
431: PetscScalar *xp,x0,x1,x2,x3,x4;
432: PetscInt nz,*vj,k;
434: PetscScalar xp0, xp1, xp2, xp3, xp4;
435: const MatScalar *v0, *v1, *v2, *v3, *v4;
436: PetscInt slen;
441: slen = 5*(ai[mbs]-ai[0]);
442: v0 = aa + 25*ai[0]+5*(ai[mbs]-ai[0]);
443: v1 = v0 + slen;
444: v2 = v1 + slen;
445: v3 = v2 + slen;
446: v4 = v3 + slen;
448: for (k=mbs-1; k>=0; k--){
450: xp = x + k*5;
451: x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* xk */
452: nz = ai[k+1] - ai[k];
454: vj = aj + ai[k+1];
455: xp = x + (*vj)*5;
457: while (nz--) {
458: /* xk += U(k,* */
459: v0 -= 5; v1 -= 5; v2 -= 5; v3 -=5; v4 -= 5;
460: vj--; xp = x + (*vj)*5;
461: xp4 = xp[4]; xp3 = xp[3]; xp2 = xp[2]; xp1 = xp[1]; xp0 = xp[0];
462: x0 += v0[4]*xp4 + v0[3]*xp3 + v0[2]*xp2 + v0[1]*xp1 + v0[0]*xp0;
463: x1 += v1[4]*xp4 + v1[3]*xp3 + v1[2]*xp2 + v1[1]*xp1 + v1[0]*xp0;
464: x2 += v2[4]*xp4 + v2[3]*xp3 + v2[2]*xp2 + v2[1]*xp1 + v2[0]*xp0;
465: x3 += v3[4]*xp4 + v3[3]*xp3 + v3[2]*xp2 + v3[1]*xp1 + v3[0]*xp0;
466: x4 += v4[4]*xp4 + v4[3]*xp3 + v4[2]*xp2 + v4[1]*xp1 + v4[0]*xp0;
467: }
468: xp = x + k*5;
469: xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4;
470: }
471: return(0);
472: }
476: PetscErrorCode MatSolve_SeqSBSTRM_5_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
477: {
478: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
479: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2 = a->bs2;
480: PetscScalar *x,*b;
483: Mat_SeqSBSTRM *sbstrm = (Mat_SeqSBSTRM *)A->spptr;
484: MatScalar *as=sbstrm->as;
487: #if 0
488: #endif
489: VecGetArray(bb,&b);
490: VecGetArray(xx,&x);
492: /* solve U^T * D * y = b by forward substitution */
493: PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar)); /* x <- b */
494: MatForwardSolve_SeqSBSTRM_5_NaturalOrdering(ai,aj,as,mbs,x);
496: /* solve U*x = y by back substitution */
497: MatBackwardSolve_SeqSBSTRM_5_NaturalOrdering(ai,aj,as,mbs,x);
499: VecRestoreArray(bb,&b);
500: VecRestoreArray(xx,&x);
501: PetscLogFlops(4.0*bs2*a->nz - (bs+2.0*bs2)*mbs);
502: return(0);
503: }
507: PetscErrorCode MatForwardSolve_SeqSBSTRM_5_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
508: {
509: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
510: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2;
511: PetscScalar *x,*b;
514: Mat_SeqSBSTRM *sbstrm = (Mat_SeqSBSTRM *)A->spptr;
515: MatScalar *as=sbstrm->as;
518: VecGetArray(bb,&b);
519: VecGetArray(xx,&x);
520: PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar)); /* x <- b */
521: MatForwardSolve_SeqSBSTRM_5_NaturalOrdering(ai,aj,as,mbs,x);
522: VecRestoreArray(bb,&b);
523: VecRestoreArray(xx,&x);
524: PetscLogFlops(2.0*bs2*a->nz - bs*mbs);
525: return(0);
526: }
530: PetscErrorCode MatBackwardSolve_SeqSBSTRM_5_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
531: {
532: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
533: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs2=a->bs2;
534: PetscScalar *x,*b;
536: Mat_SeqSBSTRM *sbstrm = (Mat_SeqSBSTRM *)A->spptr;
537: MatScalar *as=sbstrm->as;
540: VecGetArray(bb,&b);
541: VecGetArray(xx,&x);
542: PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));
543: MatBackwardSolve_SeqSBSTRM_5_NaturalOrdering(ai,aj,as,mbs,x);
544: VecRestoreArray(bb,&b);
545: VecRestoreArray(xx,&x);
546: PetscLogFlops(2.0*bs2*(a->nz-mbs));
547: return(0);
548: }
553: PetscErrorCode SeqSBSTRM_convertFact_sbstrm(Mat F)
554: {
555: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *) F->data;
556: Mat_SeqSBSTRM *sbstrm = (Mat_SeqSBSTRM*) F->spptr;
557: PetscInt m = a->mbs, bs = F->rmap->bs;
558: PetscInt *ai = a->i;
559: PetscInt i,j,ib,jb;
560: MatScalar *aa = a->a, *aau, *asu;
562: PetscInt bs2, rbs, cbs, blen, slen;
563: PetscScalar **asp ;
566: sbstrm->rbs = bs;
567: sbstrm->cbs = bs;
569: rbs = cbs = bs;
570: bs2 = rbs*cbs;
571: blen = ai[m]-ai[0];
572: slen = blen*cbs;
574: if(sbstrm->as) {
575: PetscFree(sbstrm->as);
576: }
577: PetscMalloc(bs2*ai[m]*sizeof(MatScalar), &sbstrm->as);
578: PetscMalloc(rbs*sizeof(MatScalar *), &asp);
580: asu = sbstrm->as;
581: for (i=0; i<m*bs2; i++) asu[i] = aa[i];
583: asu = sbstrm->as + ai[0]*bs2;
584: aau = aa + ai[0]*bs2;
586: for(i=0;i<rbs;i++) asp[i] = asu + i*slen;
588: for(j=0;j<blen;j++) {
589: for (jb=0; jb<cbs; jb++){
590: for (ib=0; ib<rbs; ib++){
591: asp[ib][j*cbs+jb] = aau[j*bs2+jb*rbs+ib];
592: }}
593: }
595: switch (bs){
596: case 4:
597: F->ops->forwardsolve = MatForwardSolve_SeqSBSTRM_4_NaturalOrdering_inplace;
598: F->ops->backwardsolve = MatBackwardSolve_SeqSBSTRM_4_NaturalOrdering_inplace;
599: F->ops->solve = MatSolve_SeqSBSTRM_4_NaturalOrdering_inplace;
600: break;
601: case 5:
602: F->ops->forwardsolve = MatForwardSolve_SeqSBSTRM_5_NaturalOrdering_inplace;
603: F->ops->backwardsolve = MatBackwardSolve_SeqSBSTRM_5_NaturalOrdering_inplace;
604: F->ops->solve = MatSolve_SeqSBSTRM_5_NaturalOrdering_inplace;
605: break;
606: default:
607: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"not supported for block size %D",bs);
608: }
609: #if 0
610: #endif
612: PetscFree(asp);
613: return(0);
614: }
616: /*=========================================================*/
618: EXTERN_C_BEGIN
621: PetscErrorCode MatFactorGetSolverPackage_seqsbaij_sbstrm(Mat A,const MatSolverPackage *type)
622: {
624: *type = MATSOLVERSBSTRM;
625: return(0);
626: }
627: EXTERN_C_END
631: PetscErrorCode MatCholeskyFactorNumeric_sbstrm(Mat F,Mat A,const MatFactorInfo *info)
632: {
633: PetscInt bs = A->rmap->bs;
637: switch (bs){
638: case 4:
639: MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering(F,A,info);
640: break;
641: case 5:
642: MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering(F,A,info);
643: break;
644: default:
645: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"not supported for block size %D",bs);
646: }
647:
648: SeqSBSTRM_convertFact_sbstrm(F);
650: return(0);
651: }
652: /*=========================================================*/
655: PetscErrorCode MatICCFactorSymbolic_sbstrm(Mat B,Mat A,IS perm,const MatFactorInfo *info)
656: {
657: PetscInt ierr;
659: (MatICCFactorSymbolic_SeqSBAIJ)(B,A,perm,info);
660: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_sbstrm;
661: return(0);
662: }
663: /*=========================================================*/
666: PetscErrorCode MatCholeskyFactorSymbolic_sbstrm(Mat B,Mat A,IS perm,const MatFactorInfo *info)
667: {
668: PetscInt ierr;
670: (MatCholeskyFactorSymbolic_SeqSBAIJ)(B,A,perm,info);
671: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_sbstrm;
672: return(0);
673: }
674: /*=========================================================*/
676: EXTERN_C_BEGIN
679: PetscErrorCode MatGetFactor_seqsbaij_sbstrm(Mat A,MatFactorType ftype,Mat *F)
680: {
681: Mat B;
682: PetscInt bs = A->rmap->bs;
683: Mat_SeqSBSTRM *sbstrm;
687: if (A->cmap->N != A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
688: MatCreate(((PetscObject)A)->comm,&B);
689: MatSetSizes(B,A->rmap->n,A->cmap->n,PETSC_DETERMINE,PETSC_DETERMINE);
690: MatSetType(B,((PetscObject)A)->type_name);
691: MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);
694: B->ops->iccfactorsymbolic = MatICCFactorSymbolic_sbstrm;
695: B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_sbstrm;
696: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_sbstrm;
698: B->ops->destroy = MatDestroy_SeqSBSTRM;
699: B->factortype = ftype;
700: B->assembled = PETSC_TRUE; /* required by -ksp_view */
701: B->preallocated = PETSC_TRUE;
703: PetscNewLog(B,Mat_SeqSBSTRM,&sbstrm);
705: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_seqsbaij_sbstrm",MatFactorGetSolverPackage_seqsbaij_sbstrm);
707: B->spptr = sbstrm;
708: *F = B;
709: return(0);
710: }
711: EXTERN_C_END