Actual source code: bstrmfact.c
petsc-3.6.1 2015-08-06
1: #define PETSCMAT_DLL
3: #include <../src/mat/impls/baij/seq/baij.h>
4: #include <../src/mat/impls/baij/seq/bstream/bstream.h>
6: extern PetscErrorCode MatDestroy_SeqBSTRM(Mat A);
7: extern PetscErrorCode MatSeqBSTRM_convert_bstrm(Mat A);
8: /*=========================================================*/
11: PetscErrorCode MatSolve_SeqBSTRM_4(Mat A,Vec bb,Vec xx)
12: {
13: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
14: Mat_SeqBSTRM *bstrm = (Mat_SeqBSTRM*)A->spptr;
15: PetscErrorCode ierr;
16: PetscInt i,j,n=a->mbs,*ai=a->i,*aj=a->j, *diag=a->diag,idx,jdx;
17: PetscScalar *x,s1,s2,s3,s4,x1,x2,x3,x4;
18: PetscScalar *v1, *v2, *v3, *v4;
19: const PetscScalar *b;
20: PetscInt slen;
23: VecGetArray(bb,(PetscScalar**)&b);
24: VecGetArray(xx,&x);
25: slen = 4*(ai[n]-ai[0]+diag[0]-diag[n]);
27: v1 = bstrm->as;
28: v2 = v1 + slen;
29: v3 = v2 + slen;
30: v4 = v3 + slen;
32: /* forward solve the lower triangular */
33: x[0] = b[0];
34: x[1] = b[1];
35: x[2] = b[2];
36: x[3] = b[3];
38: for (i=1; i<n; i++) {
39: idx = 4*i;
40: s1 = b[idx];
41: s2 = b[idx+1];
42: s3 = b[idx+2];
43: s4 = b[idx+3];
44: for (j=ai[i]; j<ai[i+1]; j++) {
45: jdx = 4*aj[j];
46: x1 = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];x4 = x[3+jdx];
47: s1 -= v1[0]*x1 + v1[1]*x2 + v1[2]*x3 + v1[3]*x4;
48: s2 -= v2[0]*x1 + v2[1]*x2 + v2[2]*x3 + v2[3]*x4;
49: s3 -= v3[0]*x1 + v3[1]*x2 + v3[2]*x3 + v3[3]*x4;
50: s4 -= v4[0]*x1 + v4[1]*x2 + v4[2]*x3 + v4[3]*x4;
51: v1 += 4; v2 += 4; v3 += 4; v4 += 4;
52: }
53: x[idx] = s1;
54: x[idx+1] = s2;
55: x[idx+2] = s3;
56: x[idx+3] = s4;
57: }
59: /* backward solve the upper triangular */
60: for (i=n-1; i>=0; i--) {
61: idx = 4*i;
62: s1 = x[idx];
63: s2 = x[idx+1];
64: s3 = x[idx+2];
65: s4 = x[idx+3];
66: for (j=diag[i+1]+1; j<diag[i]; j++) {
67: jdx = 4*aj[j];
68: x1 = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];x4 = x[3+jdx];
69: s1 -= v1[0]*x1 + v1[1]*x2 + v1[2]*x3 + v1[3]*x4;
70: s2 -= v2[0]*x1 + v2[1]*x2 + v2[2]*x3 + v2[3]*x4;
71: s3 -= v3[0]*x1 + v3[1]*x2 + v3[2]*x3 + v3[3]*x4;
72: s4 -= v4[0]*x1 + v4[1]*x2 + v4[2]*x3 + v4[3]*x4;
73: v1 += 4; v2 += 4; v3 += 4; v4 += 4;
74: }
75: x[idx] = v1[0]*s1 + v1[1]*s2 + v1[2]*s3 + v1[3]*s4;
76: x[idx+1] = v2[0]*s1 + v2[1]*s2 + v2[2]*s3 + v2[3]*s4;
77: x[idx+2] = v3[0]*s1 + v3[1]*s2 + v3[2]*s3 + v3[3]*s4;
78: x[idx+3] = v4[0]*s1 + v4[1]*s2 + v4[2]*s3 + v4[3]*s4;
79: v1 += 4; v2 += 4; v3 += 4; v4 += 4;
80: }
82: VecRestoreArray(bb,(PetscScalar**)&b);
83: VecRestoreArray(xx,&x);
84: PetscLogFlops(2.0*16*(a->nz) - 4.0*A->cmap->n);
85: return(0);
86: }
88: /*=========================================================*/
91: PetscErrorCode MatSolve_SeqBSTRM_5(Mat A,Vec bb,Vec xx)
92: {
93: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
94: Mat_SeqBSTRM *bstrm = (Mat_SeqBSTRM*)A->spptr;
95: PetscErrorCode ierr;
96: PetscInt i,j,n=a->mbs,*ai=a->i,*aj=a->j,*diag = a->diag,idx,jdx;
97: PetscScalar *x,s1,s2,s3,s4,s5,x1,x2,x3,x4,x5;
98: PetscScalar *v1, *v2, *v3, *v4, *v5;
99: const PetscScalar *b;
100: PetscInt slen;
103: VecGetArray(bb,(PetscScalar**)&b);
104: VecGetArray(xx,&x);
106: slen = 5*(ai[n]-ai[0]+diag[0]-diag[n]);
107: v1 = bstrm->as;
108: v2 = v1 + slen;
109: v3 = v2 + slen;
110: v4 = v3 + slen;
111: v5 = v4 + slen;
114: /* forward solve the lower triangular */
115: x[0] = b[0];
116: x[1] = b[1];
117: x[2] = b[2];
118: x[3] = b[3];
119: x[4] = b[4];
121: for (i=1; i<n; i++) {
122: idx = 5*i;
123: s1 = b[idx];
124: s2 = b[idx+1];
125: s3 = b[idx+2];
126: s4 = b[idx+3];
127: s5 = b[idx+4];
128: for (j=ai[i]; j<ai[i+1]; j++) {
129: jdx = 5*aj[j];
130: x1 = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];x4 = x[3+jdx]; x5 = x[4+jdx];
131: s1 -= v1[0]*x1 + v1[1]*x2 + v1[2]*x3 + v1[3]*x4 + v1[4]*x5;
132: s2 -= v2[0]*x1 + v2[1]*x2 + v2[2]*x3 + v2[3]*x4 + v2[4]*x5;
133: s3 -= v3[0]*x1 + v3[1]*x2 + v3[2]*x3 + v3[3]*x4 + v3[4]*x5;
134: s4 -= v4[0]*x1 + v4[1]*x2 + v4[2]*x3 + v4[3]*x4 + v4[4]*x5;
135: s5 -= v5[0]*x1 + v5[1]*x2 + v5[2]*x3 + v5[3]*x4 + v5[4]*x5;
136: v1 += 5; v2 += 5; v3 += 5; v4 += 5; v5 += 5;
137: }
138: x[idx] = s1;
139: x[idx+1] = s2;
140: x[idx+2] = s3;
141: x[idx+3] = s4;
142: x[idx+4] = s5;
143: }
145: /* backward solve the upper triangular */
146: for (i=n-1; i>=0; i--) {
147: idx = 5*i;
148: s1 = x[idx];
149: s2 = x[idx+1];
150: s3 = x[idx+2];
151: s4 = x[idx+3];
152: s5 = x[idx+4];
153: for (j=diag[i+1]+1; j<diag[i]; j++) {
154: jdx = 5*aj[j];
155: x1 = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];x4 = x[3+jdx]; x5 = x[4+jdx];
156: s1 -= v1[0]*x1 + v1[1]*x2 + v1[2]*x3 + v1[3]*x4 + v1[4]*x5;
157: s2 -= v2[0]*x1 + v2[1]*x2 + v2[2]*x3 + v2[3]*x4 + v2[4]*x5;
158: s3 -= v3[0]*x1 + v3[1]*x2 + v3[2]*x3 + v3[3]*x4 + v3[4]*x5;
159: s4 -= v4[0]*x1 + v4[1]*x2 + v4[2]*x3 + v4[3]*x4 + v4[4]*x5;
160: s5 -= v5[0]*x1 + v5[1]*x2 + v5[2]*x3 + v5[3]*x4 + v5[4]*x5;
161: v1 += 5; v2 += 5; v3 += 5; v4 += 5; v5 += 5;
162: }
163: x[idx] = v1[0]*s1 + v1[1]*s2 + v1[2]*s3 + v1[3]*s4 + v1[4]*s5;
164: x[idx+1] = v2[0]*s1 + v2[1]*s2 + v2[2]*s3 + v2[3]*s4 + v2[4]*s5;
165: x[idx+2] = v3[0]*s1 + v3[1]*s2 + v3[2]*s3 + v3[3]*s4 + v3[4]*s5;
166: x[idx+3] = v4[0]*s1 + v4[1]*s2 + v4[2]*s3 + v4[3]*s4 + v4[4]*s5;
167: x[idx+4] = v5[0]*s1 + v5[1]*s2 + v5[2]*s3 + v5[3]*s4 + v5[4]*s5;
168: v1 += 5; v2 += 5; v3 += 5; v4 += 5; v5 += 5;
169: }
171: VecRestoreArray(bb,(PetscScalar**)&b);
172: VecRestoreArray(xx,&x);
173: PetscLogFlops(2.0*25*(a->nz) - 5.0*A->cmap->n);
174: return(0);
175: }
177: /*=========================================================*/
180: PetscErrorCode MatFactorGetSolverPackage_bstrm(Mat A,const MatSolverPackage *type)
181: {
183: *type = MATSOLVERBSTRM;
184: return(0);
185: }
187: /*=========================================================*/
190: PetscErrorCode MatLUFactorNumeric_bstrm(Mat F,Mat A,const MatFactorInfo *info)
191: {
192: /* Mat_SeqBSTRM *bstrm = (Mat_SeqBSTRM*) F->spptr; */
193: PetscInt bs = A->rmap->bs;
195: Mat_SeqBSTRM *bstrm;
198: /*(*bstrm ->MatLUFactorNumeric)(F,A,info); */
199: switch (bs) {
200: case 4:
201: MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering(F,A,info);
202: break;
203: case 5:
204: MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering(F,A,info);
205: /* MatLUFactorNumeric_SeqBAIJ_5(F,A,info); */
206: break;
207: default:
208: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"not supported for block size %D",bs);
209: }
211: PetscNewLog(F,&bstrm);
212: F->spptr = (void*) bstrm;
213: MatSeqBSTRM_convert_bstrm(F);
214: /*.........................................................
215: F->ops->solve = MatSolve_SeqBSTRM_5;
216: .........................................................*/
217: return(0);
218: }
219: /*=========================================================*/
222: PetscErrorCode MatILUFactorSymbolic_bstrm(Mat B,Mat A,IS r,IS c,const MatFactorInfo *info)
223: {
224: PetscInt ierr;
227: (MatILUFactorSymbolic_SeqBAIJ)(B,A,r,c,info);
228: B->ops->lufactornumeric = MatLUFactorNumeric_bstrm;
229: return(0);
230: }
231: /*=========================================================*/
234: PetscErrorCode MatLUFactorSymbolic_bstrm(Mat B,Mat A,IS r,IS c,const MatFactorInfo *info)
235: {
236: PetscInt ierr;
239: /* (*bstrm ->MatLUFactorSymbolic)(B,A,r,c,info); */
240: (MatLUFactorSymbolic_SeqBAIJ)(B,A,r,c,info);
241: B->ops->lufactornumeric = MatLUFactorNumeric_bstrm;
242: return(0);
243: }
244: /*=========================================================*/
247: PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_bstrm(Mat A,MatFactorType ftype,Mat *B)
248: {
249: PetscInt n = A->rmap->n;
250: Mat_SeqBSTRM *bstrm;
254: if (A->cmap->N != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"matrix must be square");
255: MatCreate(PetscObjectComm((PetscObject)A),B);
256: MatSetSizes(*B,n,n,n,n);
257: MatSetType(*B,((PetscObject)A)->type_name);
258: /* MatSeqBAIJSetPreallocation(*B,bs,0,NULL); */
260: (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_bstrm;
261: (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_bstrm;
262: (*B)->ops->lufactornumeric = MatLUFactorNumeric_bstrm;
263: (*B)->ops->destroy = MatDestroy_SeqBSTRM;
264: (*B)->factortype = ftype;
265: (*B)->assembled = PETSC_TRUE; /* required by -ksp_view */
266: (*B)->preallocated = PETSC_TRUE;
268: PetscNewLog(*B,&bstrm);
269: (*B)->spptr = (void*) bstrm;
270: PetscObjectComposeFunction((PetscObject)*B,"MatFactorGetSolverPackage_C",MatFactorGetSolverPackage_bstrm);
271: return(0);
272: }