Actual source code: bstrmfact.c

petsc-3.6.1 2015-08-06
Report Typos and Errors
  1: #define PETSCMAT_DLL

  3: #include <../src/mat/impls/baij/seq/baij.h>
  4: #include <../src/mat/impls/baij/seq/bstream/bstream.h>

  6: extern PetscErrorCode MatDestroy_SeqBSTRM(Mat A);
  7: extern PetscErrorCode MatSeqBSTRM_convert_bstrm(Mat A);
  8: /*=========================================================*/
 11: PetscErrorCode MatSolve_SeqBSTRM_4(Mat A,Vec bb,Vec xx)
 12: {
 13:   Mat_SeqBAIJ       *a     = (Mat_SeqBAIJ*)A->data;
 14:   Mat_SeqBSTRM      *bstrm = (Mat_SeqBSTRM*)A->spptr;
 15:   PetscErrorCode    ierr;
 16:   PetscInt          i,j,n=a->mbs,*ai=a->i,*aj=a->j, *diag=a->diag,idx,jdx;
 17:   PetscScalar       *x,s1,s2,s3,s4,x1,x2,x3,x4;
 18:   PetscScalar       *v1, *v2, *v3, *v4;
 19:   const PetscScalar *b;
 20:   PetscInt          slen;

 23:   VecGetArray(bb,(PetscScalar**)&b);
 24:   VecGetArray(xx,&x);
 25:   slen = 4*(ai[n]-ai[0]+diag[0]-diag[n]);

 27:   v1 = bstrm->as;
 28:   v2 = v1 + slen;
 29:   v3 = v2 + slen;
 30:   v4 = v3 + slen;

 32:   /* forward solve the lower triangular */
 33:   x[0] = b[0];
 34:   x[1] = b[1];
 35:   x[2] = b[2];
 36:   x[3] = b[3];

 38:   for (i=1; i<n; i++) {
 39:     idx = 4*i;
 40:     s1  = b[idx];
 41:     s2  = b[idx+1];
 42:     s3  = b[idx+2];
 43:     s4  = b[idx+3];
 44:     for (j=ai[i]; j<ai[i+1]; j++) {
 45:       jdx = 4*aj[j];
 46:       x1  = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];x4 = x[3+jdx];
 47:       s1 -= v1[0]*x1 + v1[1]*x2 + v1[2]*x3  + v1[3]*x4;
 48:       s2 -= v2[0]*x1 + v2[1]*x2 + v2[2]*x3  + v2[3]*x4;
 49:       s3 -= v3[0]*x1 + v3[1]*x2 + v3[2]*x3  + v3[3]*x4;
 50:       s4 -= v4[0]*x1 + v4[1]*x2 + v4[2]*x3  + v4[3]*x4;
 51:       v1 += 4; v2 += 4; v3 += 4; v4 += 4;
 52:     }
 53:     x[idx]   = s1;
 54:     x[idx+1] = s2;
 55:     x[idx+2] = s3;
 56:     x[idx+3] = s4;
 57:   }

 59:   /* backward solve the upper triangular */
 60:   for (i=n-1; i>=0; i--) {
 61:     idx = 4*i;
 62:     s1  = x[idx];
 63:     s2  = x[idx+1];
 64:     s3  = x[idx+2];
 65:     s4  = x[idx+3];
 66:     for (j=diag[i+1]+1; j<diag[i]; j++) {
 67:       jdx = 4*aj[j];
 68:       x1  = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];x4 = x[3+jdx];
 69:       s1 -= v1[0]*x1 + v1[1]*x2 + v1[2]*x3  + v1[3]*x4;
 70:       s2 -= v2[0]*x1 + v2[1]*x2 + v2[2]*x3  + v2[3]*x4;
 71:       s3 -= v3[0]*x1 + v3[1]*x2 + v3[2]*x3  + v3[3]*x4;
 72:       s4 -= v4[0]*x1 + v4[1]*x2 + v4[2]*x3  + v4[3]*x4;
 73:       v1 += 4; v2 += 4; v3 += 4; v4 += 4;
 74:     }
 75:     x[idx]   =  v1[0]*s1 + v1[1]*s2 + v1[2]*s3  + v1[3]*s4;
 76:     x[idx+1] =  v2[0]*s1 + v2[1]*s2 + v2[2]*s3  + v2[3]*s4;
 77:     x[idx+2] =  v3[0]*s1 + v3[1]*s2 + v3[2]*s3  + v3[3]*s4;
 78:     x[idx+3] =  v4[0]*s1 + v4[1]*s2 + v4[2]*s3  + v4[3]*s4;
 79:     v1      += 4; v2 += 4; v3 += 4; v4 += 4;
 80:   }

 82:   VecRestoreArray(bb,(PetscScalar**)&b);
 83:   VecRestoreArray(xx,&x);
 84:   PetscLogFlops(2.0*16*(a->nz) - 4.0*A->cmap->n);
 85:   return(0);
 86: }

 88: /*=========================================================*/
 91: PetscErrorCode MatSolve_SeqBSTRM_5(Mat A,Vec bb,Vec xx)
 92: {
 93:   Mat_SeqBAIJ       *a     = (Mat_SeqBAIJ*)A->data;
 94:   Mat_SeqBSTRM      *bstrm = (Mat_SeqBSTRM*)A->spptr;
 95:   PetscErrorCode    ierr;
 96:   PetscInt          i,j,n=a->mbs,*ai=a->i,*aj=a->j,*diag = a->diag,idx,jdx;
 97:   PetscScalar       *x,s1,s2,s3,s4,s5,x1,x2,x3,x4,x5;
 98:   PetscScalar       *v1, *v2, *v3, *v4, *v5;
 99:   const PetscScalar *b;
100:   PetscInt          slen;

103:   VecGetArray(bb,(PetscScalar**)&b);
104:   VecGetArray(xx,&x);

106:   slen = 5*(ai[n]-ai[0]+diag[0]-diag[n]);
107:   v1   = bstrm->as;
108:   v2   = v1 + slen;
109:   v3   = v2 + slen;
110:   v4   = v3 + slen;
111:   v5   = v4 + slen;


114:   /* forward solve the lower triangular */
115:   x[0] = b[0];
116:   x[1] = b[1];
117:   x[2] = b[2];
118:   x[3] = b[3];
119:   x[4] = b[4];

121:   for (i=1; i<n; i++) {
122:     idx = 5*i;
123:     s1  = b[idx];
124:     s2  = b[idx+1];
125:     s3  = b[idx+2];
126:     s4  = b[idx+3];
127:     s5  = b[idx+4];
128:     for (j=ai[i]; j<ai[i+1]; j++) {
129:       jdx = 5*aj[j];
130:       x1  = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];x4 = x[3+jdx]; x5 = x[4+jdx];
131:       s1 -= v1[0]*x1 + v1[1]*x2 + v1[2]*x3 + v1[3]*x4 + v1[4]*x5;
132:       s2 -= v2[0]*x1 + v2[1]*x2 + v2[2]*x3 + v2[3]*x4 + v2[4]*x5;
133:       s3 -= v3[0]*x1 + v3[1]*x2 + v3[2]*x3 + v3[3]*x4 + v3[4]*x5;
134:       s4 -= v4[0]*x1 + v4[1]*x2 + v4[2]*x3 + v4[3]*x4 + v4[4]*x5;
135:       s5 -= v5[0]*x1 + v5[1]*x2 + v5[2]*x3 + v5[3]*x4 + v5[4]*x5;
136:       v1 += 5; v2 += 5; v3 += 5; v4 += 5; v5 += 5;
137:     }
138:     x[idx]   = s1;
139:     x[idx+1] = s2;
140:     x[idx+2] = s3;
141:     x[idx+3] = s4;
142:     x[idx+4] = s5;
143:   }

145:   /* backward solve the upper triangular */
146:   for (i=n-1; i>=0; i--) {
147:     idx = 5*i;
148:     s1  = x[idx];
149:     s2  = x[idx+1];
150:     s3  = x[idx+2];
151:     s4  = x[idx+3];
152:     s5  = x[idx+4];
153:     for (j=diag[i+1]+1; j<diag[i]; j++) {
154:       jdx = 5*aj[j];
155:       x1  = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];x4 = x[3+jdx]; x5 = x[4+jdx];
156:       s1 -= v1[0]*x1 + v1[1]*x2 + v1[2]*x3 + v1[3]*x4 + v1[4]*x5;
157:       s2 -= v2[0]*x1 + v2[1]*x2 + v2[2]*x3 + v2[3]*x4 + v2[4]*x5;
158:       s3 -= v3[0]*x1 + v3[1]*x2 + v3[2]*x3 + v3[3]*x4 + v3[4]*x5;
159:       s4 -= v4[0]*x1 + v4[1]*x2 + v4[2]*x3 + v4[3]*x4 + v4[4]*x5;
160:       s5 -= v5[0]*x1 + v5[1]*x2 + v5[2]*x3 + v5[3]*x4 + v5[4]*x5;
161:       v1 += 5; v2 += 5; v3 += 5; v4 += 5; v5 += 5;
162:     }
163:     x[idx]   = v1[0]*s1 + v1[1]*s2 + v1[2]*s3 + v1[3]*s4 + v1[4]*s5;
164:     x[idx+1] = v2[0]*s1 + v2[1]*s2 + v2[2]*s3 + v2[3]*s4 + v2[4]*s5;
165:     x[idx+2] = v3[0]*s1 + v3[1]*s2 + v3[2]*s3 + v3[3]*s4 + v3[4]*s5;
166:     x[idx+3] = v4[0]*s1 + v4[1]*s2 + v4[2]*s3 + v4[3]*s4 + v4[4]*s5;
167:     x[idx+4] = v5[0]*s1 + v5[1]*s2 + v5[2]*s3 + v5[3]*s4 + v5[4]*s5;
168:     v1      += 5; v2 += 5; v3 += 5; v4 += 5; v5 += 5;
169:   }

171:   VecRestoreArray(bb,(PetscScalar**)&b);
172:   VecRestoreArray(xx,&x);
173:   PetscLogFlops(2.0*25*(a->nz) - 5.0*A->cmap->n);
174:   return(0);
175: }

177: /*=========================================================*/
180: PetscErrorCode MatFactorGetSolverPackage_bstrm(Mat A,const MatSolverPackage *type)
181: {
183:   *type = MATSOLVERBSTRM;
184:   return(0);
185: }

187: /*=========================================================*/
190: PetscErrorCode MatLUFactorNumeric_bstrm(Mat F,Mat A,const MatFactorInfo *info)
191: {
192:   /* Mat_SeqBSTRM     *bstrm = (Mat_SeqBSTRM*) F->spptr; */
193:   PetscInt       bs = A->rmap->bs;
195:   Mat_SeqBSTRM   *bstrm;

198:   /*(*bstrm ->MatLUFactorNumeric)(F,A,info); */
199:   switch (bs) {
200:   case 4:
201:     MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering(F,A,info);
202:     break;
203:   case 5:
204:     MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering(F,A,info);
205:     /* MatLUFactorNumeric_SeqBAIJ_5(F,A,info); */
206:     break;
207:   default:
208:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"not supported for block size %D",bs);
209:   }

211:   PetscNewLog(F,&bstrm);
212:   F->spptr = (void*) bstrm;
213:   MatSeqBSTRM_convert_bstrm(F);
214: /*.........................................................
215:   F->ops->solve          = MatSolve_SeqBSTRM_5;
216:   .........................................................*/
217:   return(0);
218: }
219: /*=========================================================*/
222: PetscErrorCode MatILUFactorSymbolic_bstrm(Mat B,Mat A,IS r,IS c,const MatFactorInfo *info)
223: {
224:   PetscInt ierr;

227:   (MatILUFactorSymbolic_SeqBAIJ)(B,A,r,c,info);
228:   B->ops->lufactornumeric = MatLUFactorNumeric_bstrm;
229:   return(0);
230: }
231: /*=========================================================*/
234: PetscErrorCode MatLUFactorSymbolic_bstrm(Mat B,Mat A,IS r,IS c,const MatFactorInfo *info)
235: {
236:   PetscInt ierr;

239:   /* (*bstrm ->MatLUFactorSymbolic)(B,A,r,c,info); */
240:   (MatLUFactorSymbolic_SeqBAIJ)(B,A,r,c,info);
241:   B->ops->lufactornumeric = MatLUFactorNumeric_bstrm;
242:   return(0);
243: }
244: /*=========================================================*/
247: PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_bstrm(Mat A,MatFactorType ftype,Mat *B)
248: {
249:   PetscInt       n = A->rmap->n;
250:   Mat_SeqBSTRM   *bstrm;

254:   if (A->cmap->N != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"matrix must be square");
255:   MatCreate(PetscObjectComm((PetscObject)A),B);
256:   MatSetSizes(*B,n,n,n,n);
257:   MatSetType(*B,((PetscObject)A)->type_name);
258:   /* MatSeqBAIJSetPreallocation(*B,bs,0,NULL); */

260:   (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_bstrm;
261:   (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_bstrm;
262:   (*B)->ops->lufactornumeric   = MatLUFactorNumeric_bstrm;
263:   (*B)->ops->destroy           = MatDestroy_SeqBSTRM;
264:   (*B)->factortype             = ftype;
265:   (*B)->assembled              = PETSC_TRUE;  /* required by -ksp_view */
266:   (*B)->preallocated           = PETSC_TRUE;

268:   PetscNewLog(*B,&bstrm);
269:   (*B)->spptr = (void*) bstrm;
270:   PetscObjectComposeFunction((PetscObject)*B,"MatFactorGetSolverPackage_C",MatFactorGetSolverPackage_bstrm);
271:   return(0);
272: }