Actual source code: bstrmfact.c
petsc-3.3-p7 2013-05-11
1: #define PETSCMAT_DLL
3: #include ../src/mat/impls/baij/seq/baij.h
4: #include ../src/mat/impls/baij/seq/bstream/bstream.h
6: extern PetscErrorCode MatDestroy_SeqBSTRM(Mat A);
7: extern PetscErrorCode MatSeqBSTRM_convert_bstrm(Mat A);
8: /*=========================================================*/
11: PetscErrorCode MatSolve_SeqBSTRM_4(Mat A,Vec bb,Vec xx)
12: {
13: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
14: Mat_SeqBSTRM *bstrm = (Mat_SeqBSTRM *)A->spptr;
15: PetscErrorCode ierr;
16: PetscInt i,j,n=a->mbs,*ai=a->i,*aj=a->j, *diag=a->diag,idx,jdx;
17: PetscScalar *x,s1,s2,s3,s4,x1,x2,x3,x4;
18: PetscScalar *v1, *v2, *v3, *v4;
19: const PetscScalar *b;
20: PetscInt slen;
23: VecGetArray(bb,(PetscScalar**)&b);
24: VecGetArray(xx,&x);
25: slen = 4*(ai[n]-ai[0]+diag[0]-diag[n]);
27: v1 = bstrm->as;
28: v2 = v1 + slen;
29: v3 = v2 + slen;
30: v4 = v3 + slen;
32: /* forward solve the lower triangular */
33: x[0] = b[0];
34: x[1] = b[1];
35: x[2] = b[2];
36: x[3] = b[3];
38: for (i=1; i<n; i++) {
39: idx = 4*i;
40: s1 = b[idx ];
41: s2 = b[idx+1];
42: s3 = b[idx+2];
43: s4 = b[idx+3];
44: for (j=ai[i]; j<ai[i+1]; j++) {
45: jdx = 4*aj[j];
46: x1 = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];x4 = x[3+jdx];
47: s1 -= v1[0]*x1 + v1[1]*x2 + v1[2]*x3 + v1[3]*x4;
48: s2 -= v2[0]*x1 + v2[1]*x2 + v2[2]*x3 + v2[3]*x4;
49: s3 -= v3[0]*x1 + v3[1]*x2 + v3[2]*x3 + v3[3]*x4;
50: s4 -= v4[0]*x1 + v4[1]*x2 + v4[2]*x3 + v4[3]*x4;
51: v1 += 4; v2 += 4; v3 += 4; v4 += 4;
52: }
53: x[idx ] = s1;
54: x[idx+1] = s2;
55: x[idx+2] = s3;
56: x[idx+3] = s4;
57: }
59: /* backward solve the upper triangular */
60: for (i=n-1;i>=0;i--){
61: idx = 4*i;
62: s1 = x[idx ];
63: s2 = x[idx+1];
64: s3 = x[idx+2];
65: s4 = x[idx+3];
66: for (j=diag[i+1]+1; j<diag[i]; j++) {
67: jdx = 4*aj[j];
68: x1 = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];x4 = x[3+jdx];
69: s1 -= v1[0]*x1 + v1[1]*x2 + v1[2]*x3 + v1[3]*x4;
70: s2 -= v2[0]*x1 + v2[1]*x2 + v2[2]*x3 + v2[3]*x4;
71: s3 -= v3[0]*x1 + v3[1]*x2 + v3[2]*x3 + v3[3]*x4;
72: s4 -= v4[0]*x1 + v4[1]*x2 + v4[2]*x3 + v4[3]*x4;
73: v1 += 4; v2 += 4; v3 += 4; v4 += 4;
74: }
75: x[idx ] = v1[0]*s1 + v1[1]*s2 + v1[2]*s3 + v1[3]*s4;
76: x[idx+1] = v2[0]*s1 + v2[1]*s2 + v2[2]*s3 + v2[3]*s4;
77: x[idx+2] = v3[0]*s1 + v3[1]*s2 + v3[2]*s3 + v3[3]*s4;
78: x[idx+3] = v4[0]*s1 + v4[1]*s2 + v4[2]*s3 + v4[3]*s4;
79: v1 += 4; v2 += 4; v3 += 4; v4 += 4;
80: }
82: VecRestoreArray(bb,(PetscScalar**)&b);
83: VecRestoreArray(xx,&x);
84: PetscLogFlops(2.0*16*(a->nz) - 4.0*A->cmap->n);
85: return(0);
86: }
88: /*=========================================================*/
91: PetscErrorCode MatSolve_SeqBSTRM_5(Mat A,Vec bb,Vec xx)
92: {
93: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
94: Mat_SeqBSTRM *bstrm = (Mat_SeqBSTRM *)A->spptr;
95: PetscErrorCode ierr;
96: PetscInt i,j,n=a->mbs,*ai=a->i,*aj=a->j,*diag = a->diag,idx,jdx;
97: PetscScalar *x,s1,s2,s3,s4,s5,x1,x2,x3,x4,x5;
98: PetscScalar *v1, *v2, *v3, *v4, *v5;
99: const PetscScalar *b;
100: PetscInt slen;
103: VecGetArray(bb,(PetscScalar**)&b);
104: VecGetArray(xx,&x);
106: slen = 5*(ai[n]-ai[0]+diag[0]-diag[n]);
107: v1 = bstrm->as;
108: v2 = v1 + slen;
109: v3 = v2 + slen;
110: v4 = v3 + slen;
111: v5 = v4 + slen;
114: /* forward solve the lower triangular */
115: x[0] = b[0];
116: x[1] = b[1];
117: x[2] = b[2];
118: x[3] = b[3];
119: x[4] = b[4];
121: for (i=1; i<n; i++) {
122: idx = 5*i;
123: s1 = b[idx ];
124: s2 = b[idx+1];
125: s3 = b[idx+2];
126: s4 = b[idx+3];
127: s5 = b[idx+4];
128: for (j=ai[i]; j<ai[i+1]; j++) {
129: jdx = 5*aj[j];
130: x1 = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];x4 = x[3+jdx]; x5 = x[4+jdx];
131: s1 -= v1[0]*x1 + v1[1]*x2 + v1[2]*x3 + v1[3]*x4 + v1[4]*x5;
132: s2 -= v2[0]*x1 + v2[1]*x2 + v2[2]*x3 + v2[3]*x4 + v2[4]*x5;
133: s3 -= v3[0]*x1 + v3[1]*x2 + v3[2]*x3 + v3[3]*x4 + v3[4]*x5;
134: s4 -= v4[0]*x1 + v4[1]*x2 + v4[2]*x3 + v4[3]*x4 + v4[4]*x5;
135: s5 -= v5[0]*x1 + v5[1]*x2 + v5[2]*x3 + v5[3]*x4 + v5[4]*x5;
136: v1 += 5; v2 += 5; v3 += 5; v4 += 5; v5 += 5;
137: }
138: x[idx ] = s1;
139: x[idx+1] = s2;
140: x[idx+2] = s3;
141: x[idx+3] = s4;
142: x[idx+4] = s5;
143: }
145: /* backward solve the upper triangular */
146: for (i=n-1;i>=0;i--){
147: idx = 5*i;
148: s1 = x[idx ];
149: s2 = x[idx+1];
150: s3 = x[idx+2];
151: s4 = x[idx+3];
152: s5 = x[idx+4];
153: for (j=diag[i+1]+1; j<diag[i]; j++) {
154: jdx = 5*aj[j];
155: x1 = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];x4 = x[3+jdx]; x5 = x[4+jdx];
156: s1 -= v1[0]*x1 + v1[1]*x2 + v1[2]*x3 + v1[3]*x4 + v1[4]*x5;
157: s2 -= v2[0]*x1 + v2[1]*x2 + v2[2]*x3 + v2[3]*x4 + v2[4]*x5;
158: s3 -= v3[0]*x1 + v3[1]*x2 + v3[2]*x3 + v3[3]*x4 + v3[4]*x5;
159: s4 -= v4[0]*x1 + v4[1]*x2 + v4[2]*x3 + v4[3]*x4 + v4[4]*x5;
160: s5 -= v5[0]*x1 + v5[1]*x2 + v5[2]*x3 + v5[3]*x4 + v5[4]*x5;
161: v1 += 5; v2 += 5; v3 += 5; v4 += 5; v5 += 5;
162: }
163: x[idx ] = v1[0]*s1 + v1[1]*s2 + v1[2]*s3 + v1[3]*s4 + v1[4]*s5;
164: x[idx+1] = v2[0]*s1 + v2[1]*s2 + v2[2]*s3 + v2[3]*s4 + v2[4]*s5;
165: x[idx+2] = v3[0]*s1 + v3[1]*s2 + v3[2]*s3 + v3[3]*s4 + v3[4]*s5;
166: x[idx+3] = v4[0]*s1 + v4[1]*s2 + v4[2]*s3 + v4[3]*s4 + v4[4]*s5;
167: x[idx+4] = v5[0]*s1 + v5[1]*s2 + v5[2]*s3 + v5[3]*s4 + v5[4]*s5;
168: v1 += 5; v2 += 5; v3 += 5; v4 += 5; v5 += 5;
169: }
171: VecRestoreArray(bb,(PetscScalar**)&b);
172: VecRestoreArray(xx,&x);
173: PetscLogFlops(2.0*25*(a->nz) - 5.0*A->cmap->n);
174: return(0);
175: }
177: /*=========================================================*/
178: EXTERN_C_BEGIN
181: PetscErrorCode MatFactorGetSolverPackage_bstrm(Mat A,const MatSolverPackage *type)
182: {
184: *type = MATSOLVERBSTRM;
185: return(0);
186: }
187: EXTERN_C_END
189: /*=========================================================*/
190: EXTERN_C_BEGIN
193: PetscErrorCode MatLUFactorNumeric_bstrm(Mat F,Mat A,const MatFactorInfo *info)
194: {
195: /* Mat_SeqBSTRM *bstrm = (Mat_SeqBSTRM *) F->spptr; */
196: PetscInt bs = A->rmap->bs;
198: Mat_SeqBSTRM *bstrm;
201: /*(*bstrm ->MatLUFactorNumeric)(F,A,info); */
202: switch (bs){
203: case 4:
204: MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering(F,A,info);
205: break;
206: case 5:
207: MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering(F,A,info);
208: /* MatLUFactorNumeric_SeqBAIJ_5(F,A,info); */
209: break;
210: default:
211: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"not supported for block size %D",bs);
212: }
213:
214: PetscNewLog(F,Mat_SeqBSTRM,&bstrm);
215: F->spptr = (void *) bstrm;
216: MatSeqBSTRM_convert_bstrm(F);
217: /*.........................................................
218: F->ops->solve = MatSolve_SeqBSTRM_5;
219: .........................................................*/
222: return(0);
223: }
224: EXTERN_C_END
225: /*=========================================================*/
226: EXTERN_C_BEGIN
229: PetscErrorCode MatILUFactorSymbolic_bstrm(Mat B,Mat A,IS r,IS c,const MatFactorInfo *info)
230: {
231: PetscInt ierr;
233: (MatILUFactorSymbolic_SeqBAIJ)(B,A,r,c,info);
234: B->ops->lufactornumeric = MatLUFactorNumeric_bstrm;
235: return(0);
236: }
237: EXTERN_C_END
238: /*=========================================================*/
239: EXTERN_C_BEGIN
242: PetscErrorCode MatLUFactorSymbolic_bstrm(Mat B,Mat A,IS r,IS c,const MatFactorInfo *info)
243: {
244: PetscInt ierr;
246: /* (*bstrm ->MatLUFactorSymbolic)(B,A,r,c,info); */
247: (MatLUFactorSymbolic_SeqBAIJ)(B,A,r,c,info);
248: B->ops->lufactornumeric = MatLUFactorNumeric_bstrm;
249: return(0);
250: }
251: EXTERN_C_END
252: /*=========================================================*/
253: EXTERN_C_BEGIN
256: PetscErrorCode MatGetFactor_seqbaij_bstrm(Mat A,MatFactorType ftype,Mat *B)
257: {
258: PetscInt n = A->rmap->n;
259: Mat_SeqBSTRM *bstrm;
263: if (A->cmap->N != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"matrix must be square");
264: MatCreate(((PetscObject)A)->comm,B);
265: MatSetSizes(*B,n,n,n,n);
266: MatSetType(*B,((PetscObject)A)->type_name);
267: /* MatSeqBAIJSetPreallocation(*B,bs,0,PETSC_NULL); */
269: (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_bstrm;
270: (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_bstrm;
271: (*B)->ops->lufactornumeric = MatLUFactorNumeric_bstrm;
272: (*B)->ops->destroy = MatDestroy_SeqBSTRM;
273: (*B)->factortype = ftype;
274: (*B)->assembled = PETSC_TRUE; /* required by -ksp_view */
275: (*B)->preallocated = PETSC_TRUE;
276: PetscNewLog(*B,Mat_SeqBSTRM,&bstrm);
277: (*B)->spptr = (void *) bstrm;
278: PetscObjectComposeFunctionDynamic((PetscObject)*B,"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_bstrm",MatFactorGetSolverPackage_bstrm);
279: return(0);
280: }
281: EXTERN_C_END