Actual source code: bstrmfact.c

petsc-3.3-p7 2013-05-11
  1: #define PETSCMAT_DLL

 3:  #include ../src/mat/impls/baij/seq/baij.h
 4:  #include ../src/mat/impls/baij/seq/bstream/bstream.h

  6: extern PetscErrorCode MatDestroy_SeqBSTRM(Mat A);
  7: extern PetscErrorCode MatSeqBSTRM_convert_bstrm(Mat A);
  8: /*=========================================================*/
 11: PetscErrorCode MatSolve_SeqBSTRM_4(Mat A,Vec bb,Vec xx)
 12: {
 13:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
 14:   Mat_SeqBSTRM      *bstrm = (Mat_SeqBSTRM *)A->spptr;
 15:   PetscErrorCode    ierr;
 16:   PetscInt          i,j,n=a->mbs,*ai=a->i,*aj=a->j, *diag=a->diag,idx,jdx;
 17:   PetscScalar       *x,s1,s2,s3,s4,x1,x2,x3,x4;
 18:   PetscScalar       *v1, *v2, *v3, *v4;
 19:   const PetscScalar *b;
 20:   PetscInt slen;

 23:   VecGetArray(bb,(PetscScalar**)&b);
 24:   VecGetArray(xx,&x);
 25:   slen = 4*(ai[n]-ai[0]+diag[0]-diag[n]);

 27:   v1  = bstrm->as;
 28:   v2  = v1 + slen;
 29:   v3  = v2 + slen;
 30:   v4  = v3 + slen;

 32:   /* forward solve the lower triangular */
 33:   x[0] = b[0];
 34:   x[1] = b[1];
 35:   x[2] = b[2];
 36:   x[3] = b[3];

 38:   for (i=1; i<n; i++) {
 39:     idx  = 4*i;
 40:     s1 = b[idx  ];
 41:     s2 = b[idx+1];
 42:     s3 = b[idx+2];
 43:     s4 = b[idx+3];
 44:     for (j=ai[i]; j<ai[i+1]; j++) {
 45:       jdx   = 4*aj[j];
 46:       x1    = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];x4 = x[3+jdx];
 47:       s1 -= v1[0]*x1 + v1[1]*x2 + v1[2]*x3  + v1[3]*x4;
 48:       s2 -= v2[0]*x1 + v2[1]*x2 + v2[2]*x3  + v2[3]*x4;
 49:       s3 -= v3[0]*x1 + v3[1]*x2 + v3[2]*x3  + v3[3]*x4;
 50:       s4 -= v4[0]*x1 + v4[1]*x2 + v4[2]*x3  + v4[3]*x4;
 51:       v1 += 4; v2 += 4; v3 += 4; v4 += 4;
 52:     }
 53:     x[idx  ] = s1;
 54:     x[idx+1] = s2;
 55:     x[idx+2] = s3;
 56:     x[idx+3] = s4;
 57:   }

 59:   /* backward solve the upper triangular */
 60:   for (i=n-1;i>=0;i--){
 61:     idx  = 4*i;
 62:     s1 = x[idx  ];
 63:     s2 = x[idx+1];
 64:     s3 = x[idx+2];
 65:     s4 = x[idx+3];
 66:     for (j=diag[i+1]+1; j<diag[i]; j++) {
 67:       jdx = 4*aj[j];
 68:       x1  = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];x4 = x[3+jdx];
 69:       s1 -= v1[0]*x1 + v1[1]*x2 + v1[2]*x3  + v1[3]*x4;
 70:       s2 -= v2[0]*x1 + v2[1]*x2 + v2[2]*x3  + v2[3]*x4;
 71:       s3 -= v3[0]*x1 + v3[1]*x2 + v3[2]*x3  + v3[3]*x4;
 72:       s4 -= v4[0]*x1 + v4[1]*x2 + v4[2]*x3  + v4[3]*x4;
 73:       v1 += 4; v2 += 4; v3 += 4; v4 += 4;
 74:     }
 75:     x[idx  ]  =  v1[0]*s1 + v1[1]*s2 + v1[2]*s3  + v1[3]*s4;
 76:     x[idx+1]  =  v2[0]*s1 + v2[1]*s2 + v2[2]*s3  + v2[3]*s4;
 77:     x[idx+2]  =  v3[0]*s1 + v3[1]*s2 + v3[2]*s3  + v3[3]*s4;
 78:     x[idx+3]  =  v4[0]*s1 + v4[1]*s2 + v4[2]*s3  + v4[3]*s4;
 79:     v1 += 4; v2 += 4; v3 += 4; v4 += 4;
 80:   }

 82:   VecRestoreArray(bb,(PetscScalar**)&b);
 83:   VecRestoreArray(xx,&x);
 84:   PetscLogFlops(2.0*16*(a->nz) - 4.0*A->cmap->n);
 85:   return(0);
 86: }

 88: /*=========================================================*/
 91: PetscErrorCode MatSolve_SeqBSTRM_5(Mat A,Vec bb,Vec xx)
 92: {
 93:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
 94:   Mat_SeqBSTRM      *bstrm = (Mat_SeqBSTRM *)A->spptr;
 95:   PetscErrorCode    ierr;
 96:   PetscInt          i,j,n=a->mbs,*ai=a->i,*aj=a->j,*diag = a->diag,idx,jdx;
 97:   PetscScalar       *x,s1,s2,s3,s4,s5,x1,x2,x3,x4,x5;
 98:   PetscScalar       *v1, *v2, *v3, *v4, *v5;
 99:   const PetscScalar *b;
100:   PetscInt slen;

103:   VecGetArray(bb,(PetscScalar**)&b);
104:   VecGetArray(xx,&x);

106:   slen = 5*(ai[n]-ai[0]+diag[0]-diag[n]);
107:   v1  = bstrm->as;
108:   v2  = v1 + slen;
109:   v3  = v2 + slen;
110:   v4  = v3 + slen;
111:   v5  = v4 + slen;


114:   /* forward solve the lower triangular */
115:   x[0] = b[0];
116:   x[1] = b[1];
117:   x[2] = b[2];
118:   x[3] = b[3];
119:   x[4] = b[4];

121:   for (i=1; i<n; i++) {
122:     idx  = 5*i;
123:     s1 = b[idx  ];
124:     s2 = b[idx+1];
125:     s3 = b[idx+2];
126:     s4 = b[idx+3];
127:     s5 = b[idx+4];
128:     for (j=ai[i]; j<ai[i+1]; j++) {
129:       jdx = 5*aj[j];
130:       x1  = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];x4 = x[3+jdx]; x5 = x[4+jdx];
131:       s1 -= v1[0]*x1 + v1[1]*x2 + v1[2]*x3 + v1[3]*x4 + v1[4]*x5;
132:       s2 -= v2[0]*x1 + v2[1]*x2 + v2[2]*x3 + v2[3]*x4 + v2[4]*x5;
133:       s3 -= v3[0]*x1 + v3[1]*x2 + v3[2]*x3 + v3[3]*x4 + v3[4]*x5;
134:       s4 -= v4[0]*x1 + v4[1]*x2 + v4[2]*x3 + v4[3]*x4 + v4[4]*x5;
135:       s5 -= v5[0]*x1 + v5[1]*x2 + v5[2]*x3 + v5[3]*x4 + v5[4]*x5;
136:       v1 += 5; v2 += 5; v3 += 5; v4 += 5; v5 += 5;
137:     }
138:     x[idx  ] = s1;
139:     x[idx+1] = s2;
140:     x[idx+2] = s3;
141:     x[idx+3] = s4;
142:     x[idx+4] = s5;
143:   }

145:   /* backward solve the upper triangular */
146:   for (i=n-1;i>=0;i--){
147:     idx  = 5*i;
148:     s1 = x[idx  ];
149:     s2 = x[idx+1];
150:     s3 = x[idx+2];
151:     s4 = x[idx+3];
152:     s5 = x[idx+4];
153:     for (j=diag[i+1]+1; j<diag[i]; j++) {
154:       jdx = 5*aj[j];
155:       x1  = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];x4 = x[3+jdx]; x5 = x[4+jdx];
156:       s1 -= v1[0]*x1 + v1[1]*x2 + v1[2]*x3 + v1[3]*x4 + v1[4]*x5;
157:       s2 -= v2[0]*x1 + v2[1]*x2 + v2[2]*x3 + v2[3]*x4 + v2[4]*x5;
158:       s3 -= v3[0]*x1 + v3[1]*x2 + v3[2]*x3 + v3[3]*x4 + v3[4]*x5;
159:       s4 -= v4[0]*x1 + v4[1]*x2 + v4[2]*x3 + v4[3]*x4 + v4[4]*x5;
160:       s5 -= v5[0]*x1 + v5[1]*x2 + v5[2]*x3 + v5[3]*x4 + v5[4]*x5;
161:       v1 += 5; v2 += 5; v3 += 5; v4 += 5; v5 += 5;
162:     }
163:     x[idx  ] = v1[0]*s1 + v1[1]*s2 + v1[2]*s3 + v1[3]*s4 + v1[4]*s5;
164:     x[idx+1] = v2[0]*s1 + v2[1]*s2 + v2[2]*s3 + v2[3]*s4 + v2[4]*s5;
165:     x[idx+2] = v3[0]*s1 + v3[1]*s2 + v3[2]*s3 + v3[3]*s4 + v3[4]*s5;
166:     x[idx+3] = v4[0]*s1 + v4[1]*s2 + v4[2]*s3 + v4[3]*s4 + v4[4]*s5;
167:     x[idx+4] = v5[0]*s1 + v5[1]*s2 + v5[2]*s3 + v5[3]*s4 + v5[4]*s5;
168:     v1 += 5; v2 += 5; v3 += 5; v4 += 5; v5 += 5;
169:   }

171:   VecRestoreArray(bb,(PetscScalar**)&b);
172:   VecRestoreArray(xx,&x);
173:   PetscLogFlops(2.0*25*(a->nz) - 5.0*A->cmap->n);
174:   return(0);
175: }

177: /*=========================================================*/
178: EXTERN_C_BEGIN
181: PetscErrorCode MatFactorGetSolverPackage_bstrm(Mat A,const MatSolverPackage *type)
182: {
184:   *type = MATSOLVERBSTRM;
185:   return(0);
186: }
187: EXTERN_C_END

189: /*=========================================================*/
190: EXTERN_C_BEGIN
193: PetscErrorCode MatLUFactorNumeric_bstrm(Mat F,Mat A,const MatFactorInfo *info)
194: {
195:   /* Mat_SeqBSTRM     *bstrm = (Mat_SeqBSTRM *) F->spptr; */
196:   PetscInt          bs = A->rmap->bs;
198:   Mat_SeqBSTRM  *bstrm;

201:   /*(*bstrm ->MatLUFactorNumeric)(F,A,info); */
202:   switch (bs){
203:     case 4:
204:        MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering(F,A,info);
205:        break;
206:     case 5:
207:        MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering(F,A,info);
208:        /* MatLUFactorNumeric_SeqBAIJ_5(F,A,info); */
209:        break;
210:     default:
211:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"not supported for block size %D",bs);
212:   }
213: 
214:   PetscNewLog(F,Mat_SeqBSTRM,&bstrm);
215:   F->spptr = (void *) bstrm;
216:   MatSeqBSTRM_convert_bstrm(F);
217: /*.........................................................
218:   F->ops->solve          = MatSolve_SeqBSTRM_5;
219:   .........................................................*/


222:   return(0);
223: }
224: EXTERN_C_END
225: /*=========================================================*/
226: EXTERN_C_BEGIN
229: PetscErrorCode MatILUFactorSymbolic_bstrm(Mat B,Mat A,IS r,IS c,const MatFactorInfo *info)
230: {
231:   PetscInt ierr;
233:   (MatILUFactorSymbolic_SeqBAIJ)(B,A,r,c,info);
234:   B->ops->lufactornumeric  = MatLUFactorNumeric_bstrm;
235:   return(0);
236: }
237: EXTERN_C_END
238: /*=========================================================*/
239: EXTERN_C_BEGIN
242: PetscErrorCode MatLUFactorSymbolic_bstrm(Mat B,Mat A,IS r,IS c,const MatFactorInfo *info)
243: {
244:   PetscInt ierr;
246:   /* (*bstrm ->MatLUFactorSymbolic)(B,A,r,c,info); */
247:   (MatLUFactorSymbolic_SeqBAIJ)(B,A,r,c,info);
248:   B->ops->lufactornumeric  = MatLUFactorNumeric_bstrm;
249:   return(0);
250: }
251: EXTERN_C_END
252: /*=========================================================*/
253: EXTERN_C_BEGIN
256: PetscErrorCode MatGetFactor_seqbaij_bstrm(Mat A,MatFactorType ftype,Mat *B)
257: {
258:   PetscInt       n = A->rmap->n;
259:   Mat_SeqBSTRM   *bstrm;

263:   if (A->cmap->N != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"matrix must be square");
264:   MatCreate(((PetscObject)A)->comm,B);
265:   MatSetSizes(*B,n,n,n,n);
266:   MatSetType(*B,((PetscObject)A)->type_name);
267:   /* MatSeqBAIJSetPreallocation(*B,bs,0,PETSC_NULL); */

269:   (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_bstrm;
270:   (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_bstrm;
271:   (*B)->ops->lufactornumeric   = MatLUFactorNumeric_bstrm;
272:   (*B)->ops->destroy           = MatDestroy_SeqBSTRM;
273:   (*B)->factortype             = ftype;
274:   (*B)->assembled              = PETSC_TRUE;  /* required by -ksp_view */
275:   (*B)->preallocated           = PETSC_TRUE;
276:   PetscNewLog(*B,Mat_SeqBSTRM,&bstrm);
277:   (*B)->spptr = (void *) bstrm;
278:   PetscObjectComposeFunctionDynamic((PetscObject)*B,"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_bstrm",MatFactorGetSolverPackage_bstrm);
279:   return(0);
280: }
281: EXTERN_C_END