Actual source code: sbstrmfact.c

petsc-3.7.3 2016-08-01
Report Typos and Errors
  1: #define PETSCMAT_DLL

  3: #include <../src/mat/impls/sbaij/seq/sbaij.h>
  4: #include <../src/mat/impls/sbaij/seq/sbstream/sbstream.h>

  6: extern PetscErrorCode MatDestroy_SeqSBSTRM(Mat A);


 11: PetscErrorCode MatSolve_SeqSBSTRM_4_inplace(Mat A,Vec bb,Vec xx)
 12: {
 13:   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)A->data;
 14:   IS             isrow=a->row;
 15:   PetscInt       mbs  =a->mbs,*ai=a->i,*aj=a->j;
 17:   const PetscInt *r;
 18:   PetscInt       nz,*vj,k,idx;
 19:   PetscScalar    *x,*b,x0,x1,x2,x3,*t,*tp;

 21:   Mat_SeqSBSTRM   *sbstrm = (Mat_SeqSBSTRM*)A->spptr;
 22:   MatScalar       *as     =sbstrm->as,*diag;
 23:   PetscScalar     tp0, tp1, tp2, tp3;
 24:   const MatScalar *v0, *v1, *v2, *v3;
 25:   PetscInt        slen;

 28:   slen = 4*(ai[mbs]-ai[0]);
 29:   v0   = as + 16*ai[0];
 30:   v1   = v0 + slen;
 31:   v2   = v1 + slen;
 32:   v3   = v2 + slen;

 34:   VecGetArray(bb,&b);
 35:   VecGetArray(xx,&x);
 36:   t    = a->solve_work;
 37:   ISGetIndices(isrow,&r);

 39:   /* solve U^T * D * y = b by forward substitution */
 40:   tp = t;
 41:   for (k=0; k<mbs; k++) { /* t <- perm(b) */
 42:     idx   = 4*r[k];
 43:     tp[0] = b[idx];
 44:     tp[1] = b[idx+1];
 45:     tp[2] = b[idx+2];
 46:     tp[3] = b[idx+3];
 47:     tp   += 4;
 48:   }

 50:   for (k=0; k<mbs; k++) {
 51:     vj = aj + ai[k];
 52:     tp = t + k*4;
 53:     x0 = tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3];
 54:     nz = ai[k+1] - ai[k];

 56:     tp = t + (*vj)*4;
 57:     while (nz--) {
 58:       tp[0]   += v0[0]*x0 + v1[0]*x1 + v2[0]*x2 + v3[0]*x3;
 59:       tp[1]   += v0[1]*x0 + v1[1]*x1 + v2[1]*x2 + v3[1]*x3;
 60:       tp[2]   += v0[2]*x0 + v1[2]*x1 + v2[2]*x2 + v3[2]*x3;
 61:       tp[3]   += v0[3]*x0 + v1[3]*x1 + v2[3]*x2 + v3[3]*x3;
 62:       vj++; tp = t + (*vj)*4;

 64:       v0 += 4; v1 += 4; v2 += 4; v3 += 4;
 65:     }

 67:     /* xk = inv(Dk)*(Dk*xk) */
 68:     diag  = as+k*16;          /* ptr to inv(Dk) */
 69:     tp    = t + k*4;
 70:     tp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3;
 71:     tp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3;
 72:     tp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3;
 73:     tp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3;
 74:   }

 76:   /* solve U*x = y by back substitution */
 77:   for (k=mbs-1; k>=0; k--) {
 78:     vj = aj + ai[k+1];
 79:     tp = t + k*4;
 80:     x0 = tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; /* xk */
 81:     nz = ai[k+1] - ai[k];

 83:     while (nz--) {
 84:       /* xk += U(k,* */
 85:       v0 -= 4; v1 -= 4; v2 -= 4; v3 -= 4;

 87:       vj--; tp = t + (*vj)*4;

 89:       tp0 = tp[0]; tp1 = tp[1]; tp2 = tp[2]; tp3 = tp[3];
 90:       x0 += v0[3]*tp3 + v0[2]*tp2 + v0[1]*tp1 + v0[0]*tp0;
 91:       x1 += v1[3]*tp3 + v1[2]*tp2 + v1[1]*tp1 + v1[0]*tp0;
 92:       x2 += v2[3]*tp3 + v2[2]*tp2 + v2[1]*tp1 + v2[0]*tp0;
 93:       x3 += v3[3]*tp3 + v3[2]*tp2 + v3[1]*tp1 + v3[0]*tp0;
 94:     }
 95:     tp    = t + k*4;
 96:     tp[0] = x0; tp[1]=x1; tp[2]=x2; tp[3]=x3;

 98:     idx      = 4*r[k];
 99:     x[idx]   = x0;
100:     x[idx+1] = x1;
101:     x[idx+2] = x2;
102:     x[idx+3] = x3;
103:   }

105:   ISRestoreIndices(isrow,&r);
106:   VecRestoreArray(bb,&b);
107:   VecRestoreArray(xx,&x);
108:   PetscLogFlops(4.0*a->bs2*a->nz - (A->rmap->bs+2.0*a->bs2)*mbs);
109:   return(0);
110: }

114: PetscErrorCode MatForwardSolve_SeqSBSTRM_4_NaturalOrdering(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
115: {
116:   MatScalar   *diag;
117:   PetscScalar *xp,x0,x1,x2,x3;
118:   PetscInt    nz,*vj,k;

120:   const MatScalar *v0, *v1, *v2, *v3;
121:   PetscInt        slen;

124:   slen = 4*(ai[mbs]-ai[0]);
125:   v0   = aa + 16*ai[0];
126:   v1   = v0 + slen;
127:   v2   = v1 + slen;
128:   v3   = v2 + slen;

130:   for (k=0; k<mbs; k++) {
131:     xp = x + k*4;

133:     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* Dk*xk = k-th block of x */

135:     nz = ai[k+1] - ai[k];
136:     vj = aj + ai[k];
137:     xp = x + (*vj)*4;
138:     while (nz--) {
139:       /*  += U(k,^T*(Dk*xk) */
140:       xp[0]   += v0[0]*x0 + v1[0]*x1 + v2[0]*x2 + v3[0]*x3;
141:       xp[1]   += v0[1]*x0 + v1[1]*x1 + v2[1]*x2 + v3[1]*x3;
142:       xp[2]   += v0[2]*x0 + v1[2]*x1 + v2[2]*x2 + v3[2]*x3;
143:       xp[3]   += v0[3]*x0 + v1[3]*x1 + v2[3]*x2 + v3[3]*x3;
144:       vj++; xp = x + (*vj)*4;

146:       v0 += 4; v1 += 4; v2 += 4; v3 += 4;
147:     }
148:     /* xk = inv(Dk)*(Dk*xk) */
149:     diag  = aa+k*16;         /* ptr to inv(Dk) */
150:     xp    = x + k*4;
151:     xp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3;
152:     xp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3;
153:     xp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3;
154:     xp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3;
155:   }
156:   return(0);
157: }

161: PetscErrorCode MatBackwardSolve_SeqSBSTRM_4_NaturalOrdering(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
162: {
163:   PetscScalar *xp,x0,x1,x2,x3;
164:   PetscInt    nz,*vj,k;

166:   PetscScalar     xp0, xp1, xp2, xp3;
167:   const MatScalar *v0, *v1, *v2, *v3;
168:   PetscInt        slen;

171:   slen = 4*(ai[mbs]-ai[0]);
172:   v0   = aa + 16*ai[0]+4*(ai[mbs]-ai[0]);
173:   v1   = v0 + slen;
174:   v2   = v1 + slen;
175:   v3   = v2 + slen;

177:   for (k=mbs-1; k>=0; k--) {
178:     xp = x + k*4;
179:     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* xk */
180:     nz = ai[k+1] - ai[k];
181:     vj = aj + ai[k+1];
182:     while (nz--) {
183:       /* xk += U(k,* */
184:       v0 -= 4; v1 -= 4; v2 -= 4; v3 -=4;

186:       vj--; xp = x + (*vj)*4;

188:       xp0 = xp[0]; xp1 = xp[1]; xp2 = xp[2]; xp3 = xp[3];
189:       x0 += v0[3]*xp3 + v0[2]*xp2 + v0[1]*xp1 + v0[0]*xp0;
190:       x1 += v1[3]*xp3 + v1[2]*xp2 + v1[1]*xp1 + v1[0]*xp0;
191:       x2 += v2[3]*xp3 + v2[2]*xp2 + v2[1]*xp1 + v2[0]*xp0;
192:       x3 += v3[3]*xp3 + v3[2]*xp2 + v3[1]*xp1 + v3[0]*xp0;
193:     }
194:     xp = x + k*4;

196:     xp[0] = x0; xp[1] = x1; xp[2] = x2; xp[3] = x3;
197:   }
198:   return(0);
199: }

203: PetscErrorCode MatSolve_SeqSBSTRM_4_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
204: {
205:   Mat_SeqSBAIJ   *a =(Mat_SeqSBAIJ*)A->data;
206:   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
207:   PetscScalar    *x,*b;

210:   Mat_SeqSBSTRM *sbstrm = (Mat_SeqSBSTRM*)A->spptr;
211:   MatScalar     *as     =sbstrm->as;

214: #if 0
215:   MatSolve_SeqSBSTRM_4_inplace(A, bb, xx);
216: #endif
217:   VecGetArray(bb,&b);
218:   VecGetArray(xx,&x);
219:   /* solve U^T * D * y = b by forward substitution */
220:   PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar)); /* x <- b */
221:   MatForwardSolve_SeqSBSTRM_4_NaturalOrdering(ai,aj,as,mbs,x);
222:   /* solve U*x = y by back substitution */
223:   MatBackwardSolve_SeqSBSTRM_4_NaturalOrdering(ai,aj,as,mbs,x);
224:   VecRestoreArray(bb,&b);
225:   VecRestoreArray(xx,&x);
226:   PetscLogFlops(4.0*a->bs2*a->nz - (A->rmap->bs+2.0*a->bs2)*mbs);
227:   return(0);
228: }

232: PetscErrorCode MatForwardSolve_SeqSBSTRM_4_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
233: {
234:   Mat_SeqSBAIJ   *a =(Mat_SeqSBAIJ*)A->data;
235:   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
236:   PetscScalar    *x,*b;

239:   Mat_SeqSBSTRM *sbstrm = (Mat_SeqSBSTRM*)A->spptr;
240:   MatScalar     *as     =sbstrm->as;

243:   VecGetArray(bb,&b);
244:   VecGetArray(xx,&x);
245:   PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar)); /* x <- b */
246:   MatForwardSolve_SeqSBSTRM_4_NaturalOrdering(ai,aj,as,mbs,x);
247:   VecRestoreArray(bb,&b);
248:   VecRestoreArray(xx,&x);
249:   PetscLogFlops(2.0*a->bs2*a->nz - A->rmap->bs*mbs);
250:   return(0);
251: }

255: PetscErrorCode MatBackwardSolve_SeqSBSTRM_4_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
256: {
257:   Mat_SeqSBAIJ   *a =(Mat_SeqSBAIJ*)A->data;
258:   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
259:   PetscScalar    *x,*b;

262:   Mat_SeqSBSTRM *sbstrm = (Mat_SeqSBSTRM*)A->spptr;
263:   MatScalar     *as     =sbstrm->as;

266:   VecGetArray(bb,&b);
267:   VecGetArray(xx,&x);
268:   PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));
269:   MatBackwardSolve_SeqSBSTRM_4_NaturalOrdering(ai,aj,as,mbs,x);
270:   VecRestoreArray(bb,&b);
271:   VecRestoreArray(xx,&x);
272:   PetscLogFlops(2.0*a->bs2*(a->nz-mbs));
273:   return(0);
274: }

278: PetscErrorCode MatSolve_SeqSBSTRM_5_inplace(Mat A,Vec bb,Vec xx)
279: {
280:   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)A->data;
281:   IS             isrow=a->row;
282:   PetscInt       mbs  =a->mbs,*ai=a->i,*aj=a->j;
284:   const PetscInt *r;
285:   PetscInt       nz,*vj,k,idx;
286:   PetscScalar    *x,*b,x0,x1,x2,x3,x4,*t,*tp;

288:   Mat_SeqSBSTRM   *sbstrm = (Mat_SeqSBSTRM*)A->spptr;
289:   MatScalar       *as     =sbstrm->as,*diag;
290:   PetscScalar     tp0, tp1, tp2, tp3, tp4;
291:   const MatScalar *v0, *v1, *v2, *v3, *v4;
292:   PetscInt        slen;

295:   VecGetArray(bb,&b);
296:   VecGetArray(xx,&x);
297:   t    = a->solve_work;
298:   ISGetIndices(isrow,&r);


301:   slen = 5*(ai[mbs]-ai[0]);
302:   v0   = as + 25*ai[0];
303:   v1   = v0 + slen;
304:   v2   = v1 + slen;
305:   v3   = v2 + slen;
306:   v4   = v3 + slen;

308:   /* solve U^T * D * y = b by forward substitution */
309:   tp = t;
310:   for (k=0; k<mbs; k++) { /* t <- perm(b) */
311:     idx   = 5*r[k];
312:     tp[0] = b[idx];
313:     tp[1] = b[idx+1];
314:     tp[2] = b[idx+2];
315:     tp[3] = b[idx+3];
316:     tp[4] = b[idx+4];
317:     tp   += 5;
318:   }

320:   for (k=0; k<mbs; k++) {
321:     vj = aj + ai[k];
322:     tp = t + k*5;
323:     x0 =tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];
324:     nz = ai[k+1] - ai[k];

326:     tp = t + (*vj)*5;
327:     while (nz--) {
328:       tp[0]   += v0[0]*x0 + v1[0]*x1 + v2[0]*x2 + v3[0]*x3 + v4[0]*x4;
329:       tp[1]   += v0[1]*x0 + v1[1]*x1 + v2[1]*x2 + v3[1]*x3 + v4[1]*x4;
330:       tp[2]   += v0[2]*x0 + v1[2]*x1 + v2[2]*x2 + v3[2]*x3 + v4[2]*x4;
331:       tp[3]   += v0[3]*x0 + v1[3]*x1 + v2[3]*x2 + v3[3]*x3 + v4[3]*x4;
332:       tp[4]   += v0[4]*x0 + v1[4]*x1 + v2[4]*x2 + v3[4]*x3 + v4[4]*x4;
333:       vj++; tp = t + (*vj)*5;

335:       v0 += 5; v1 += 5; v2 += 5; v3 +=5; v4 += 5;
336:     }

338:     /* xk = inv(Dk)*(Dk*xk) */
339:     diag  = as+k*25;          /* ptr to inv(Dk) */
340:     tp    = t + k*5;
341:     tp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4;
342:     tp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4;
343:     tp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
344:     tp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
345:     tp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
346:   }

348:   /* solve U*x = y by back substitution */
349:   for (k=mbs-1; k>=0; k--) {
350:     vj = aj + ai[k+1];
351:     tp = t + k*5;
352:     x0 =tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; /* xk */
353:     nz = ai[k+1] - ai[k];

355:     while (nz--) {
356:       /* xk += U(k,* */
357:       v0 -= 5; v1 -= 5; v2 -= 5; v3 -=5; v4 -= 5;

359:       vj--; tp = t + (*vj)*5;
360:       tp0 = tp[0]; tp1 = tp[1]; tp2 = tp[2]; tp3 = tp[3]; tp4 = tp[4];
361:       x0 += v0[4]*tp4 + v0[3]*tp3 + v0[2]*tp2 + v0[1]*tp1 + v0[0]*tp0;
362:       x1 += v1[4]*tp4 + v1[3]*tp3 + v1[2]*tp2 + v1[1]*tp1 + v1[0]*tp0;
363:       x2 += v2[4]*tp4 + v2[3]*tp3 + v2[2]*tp2 + v2[1]*tp1 + v2[0]*tp0;
364:       x3 += v3[4]*tp4 + v3[3]*tp3 + v3[2]*tp2 + v3[1]*tp1 + v3[0]*tp0;
365:       x4 += v4[4]*tp4 + v4[3]*tp3 + v4[2]*tp2 + v4[1]*tp1 + v4[0]*tp0;
366:     }
367:     tp    = t + k*5;
368:     tp[0] = x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4;
369:     idx   = 5*r[k];

371:     x[idx]   = x0;
372:     x[idx+1] = x1;
373:     x[idx+2] = x2;
374:     x[idx+3] = x3;
375:     x[idx+4] = x4;
376:   }

378:   ISRestoreIndices(isrow,&r);
379:   VecRestoreArray(bb,&b);
380:   VecRestoreArray(xx,&x);
381:   PetscLogFlops(4.0*a->bs2*a->nz - (A->rmap->bs+2.0*a->bs2)*mbs);
382:   return(0);
383: }

387: PetscErrorCode MatForwardSolve_SeqSBSTRM_5_NaturalOrdering(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
388: {
389:   MatScalar   *diag;
390:   PetscScalar *xp,x0,x1,x2,x3,x4;
391:   PetscInt    nz,*vj,k;

393:   const MatScalar *v0, *v1, *v2, *v3, *v4;
394:   PetscInt        slen;

397:   slen = 5*(ai[mbs]-ai[0]);
398:   v0   = aa + 25*ai[0];
399:   v1   = v0 + slen;
400:   v2   = v1 + slen;
401:   v3   = v2 + slen;
402:   v4   = v3 + slen;

404:   for (k=0; k<mbs; k++) {
405:     xp = x + k*5;
406:     x0 = xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; /* Dk*xk = k-th block of x */
407:     nz = ai[k+1] - ai[k];
408:     vj = aj + ai[k];
409:     xp = x + (*vj)*5;
410:     while (nz--) {
411:       /*  += U(k,^T*(Dk*xk) */
412:       xp[0] += v0[0]*x0 + v1[0]*x1 + v2[0]*x2 + v3[0]*x3 + v4[0]*x4;
413:       xp[1] += v0[1]*x0 + v1[1]*x1 + v2[1]*x2 + v3[1]*x3 + v4[1]*x4;
414:       xp[2] += v0[2]*x0 + v1[2]*x1 + v2[2]*x2 + v3[2]*x3 + v4[2]*x4;
415:       xp[3] += v0[3]*x0 + v1[3]*x1 + v2[3]*x2 + v3[3]*x3 + v4[3]*x4;
416:       xp[4] += v0[4]*x0 + v1[4]*x1 + v2[4]*x2 + v3[4]*x3 + v4[4]*x4;

418:       vj++; xp = x + (*vj)*5;

420:       v0 += 5; v1 += 5; v2 += 5; v3 +=5; v4 += 5;
421:     }
422:     /* xk = inv(Dk)*(Dk*xk) */
423:     diag  = aa+k*25;         /* ptr to inv(Dk) */
424:     xp    = x + k*5;
425:     xp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4;
426:     xp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4;
427:     xp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
428:     xp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
429:     xp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
430:   }
431:   return(0);
432: }

436: PetscErrorCode MatBackwardSolve_SeqSBSTRM_5_NaturalOrdering(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
437: {
438:   PetscScalar *xp,x0,x1,x2,x3,x4;
439:   PetscInt    nz,*vj,k;

441:   PetscScalar     xp0, xp1, xp2, xp3, xp4;
442:   const MatScalar *v0, *v1, *v2, *v3, *v4;
443:   PetscInt        slen;

446:   slen = 5*(ai[mbs]-ai[0]);
447:   v0   = aa + 25*ai[0]+5*(ai[mbs]-ai[0]);
448:   v1   = v0 + slen;
449:   v2   = v1 + slen;
450:   v3   = v2 + slen;
451:   v4   = v3 + slen;

453:   for (k=mbs-1; k>=0; k--) {

455:     xp = x + k*5;
456:     x0 = xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; /* xk */
457:     nz = ai[k+1] - ai[k];

459:     vj = aj + ai[k+1];

461:     while (nz--) {
462:       /* xk += U(k,* */
463:       v0 -= 5; v1 -= 5; v2 -= 5; v3 -=5; v4 -= 5;

465:       vj--; xp = x + (*vj)*5;

467:       xp4 = xp[4]; xp3 = xp[3]; xp2 = xp[2]; xp1 = xp[1]; xp0 = xp[0];
468:       x0 += v0[4]*xp4 + v0[3]*xp3 + v0[2]*xp2 + v0[1]*xp1 + v0[0]*xp0;
469:       x1 += v1[4]*xp4 + v1[3]*xp3 + v1[2]*xp2 + v1[1]*xp1 + v1[0]*xp0;
470:       x2 += v2[4]*xp4 + v2[3]*xp3 + v2[2]*xp2 + v2[1]*xp1 + v2[0]*xp0;
471:       x3 += v3[4]*xp4 + v3[3]*xp3 + v3[2]*xp2 + v3[1]*xp1 + v3[0]*xp0;
472:       x4 += v4[4]*xp4 + v4[3]*xp3 + v4[2]*xp2 + v4[1]*xp1 + v4[0]*xp0;
473:     }
474:     xp = x + k*5;

476:     xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4;
477:   }
478:   return(0);
479: }

483: PetscErrorCode MatSolve_SeqSBSTRM_5_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
484: {
485:   Mat_SeqSBAIJ   *a =(Mat_SeqSBAIJ*)A->data;
486:   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
487:   PetscScalar    *x,*b;

490:   Mat_SeqSBSTRM *sbstrm = (Mat_SeqSBSTRM*)A->spptr;
491:   MatScalar     *as     =sbstrm->as;

494:   VecGetArray(bb,&b);
495:   VecGetArray(xx,&x);

497:   /* solve U^T * D * y = b by forward substitution */
498:   PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar)); /* x <- b */
499:   MatForwardSolve_SeqSBSTRM_5_NaturalOrdering(ai,aj,as,mbs,x);

501:   /* solve U*x = y by back substitution */
502:   MatBackwardSolve_SeqSBSTRM_5_NaturalOrdering(ai,aj,as,mbs,x);

504:   VecRestoreArray(bb,&b);
505:   VecRestoreArray(xx,&x);
506:   PetscLogFlops(4.0*a->bs2*a->nz - (A->rmap->bs+2.0*a->bs2)*mbs);
507:   return(0);
508: }

512: PetscErrorCode MatForwardSolve_SeqSBSTRM_5_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
513: {
514:   Mat_SeqSBAIJ   *a =(Mat_SeqSBAIJ*)A->data;
515:   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
516:   PetscScalar    *x,*b;

519:   Mat_SeqSBSTRM *sbstrm = (Mat_SeqSBSTRM*)A->spptr;
520:   MatScalar     *as     =sbstrm->as;

523:   VecGetArray(bb,&b);
524:   VecGetArray(xx,&x);
525:   PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar)); /* x <- b */
526:   MatForwardSolve_SeqSBSTRM_5_NaturalOrdering(ai,aj,as,mbs,x);
527:   VecRestoreArray(bb,&b);
528:   VecRestoreArray(xx,&x);
529:   PetscLogFlops(2.0*a->bs2*a->nz - A->rmap->bs*mbs);
530:   return(0);
531: }

535: PetscErrorCode MatBackwardSolve_SeqSBSTRM_5_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
536: {
537:   Mat_SeqSBAIJ   *a =(Mat_SeqSBAIJ*)A->data;
538:   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
539:   PetscScalar    *x,*b;
541:   Mat_SeqSBSTRM  *sbstrm = (Mat_SeqSBSTRM*)A->spptr;
542:   MatScalar      *as     =sbstrm->as;

545:   VecGetArray(bb,&b);
546:   VecGetArray(xx,&x);
547:   PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));
548:   MatBackwardSolve_SeqSBSTRM_5_NaturalOrdering(ai,aj,as,mbs,x);
549:   VecRestoreArray(bb,&b);
550:   VecRestoreArray(xx,&x);
551:   PetscLogFlops(2.0*a->bs2*(a->nz-mbs));
552:   return(0);
553: }


558: PetscErrorCode SeqSBSTRM_convertFact_sbstrm(Mat F)
559: {
560:   Mat_SeqSBAIJ   *a      = (Mat_SeqSBAIJ*) F->data;
561:   Mat_SeqSBSTRM  *sbstrm = (Mat_SeqSBSTRM*) F->spptr;
562:   PetscInt       m       = a->mbs, bs = F->rmap->bs;
563:   PetscInt       *ai     = a->i;
564:   PetscInt       i,j,ib,jb;
565:   MatScalar      *aa = a->a, *aau, *asu;
567:   PetscInt       bs2, rbs,  cbs, blen, slen;
568:   PetscScalar    **asp;

571:   sbstrm->rbs = bs;
572:   sbstrm->cbs = bs;

574:   rbs  = cbs = bs;
575:   bs2  = rbs*cbs;
576:   blen = ai[m]-ai[0];
577:   slen = blen*cbs;

579:   if (sbstrm->as) {
580:     PetscFree(sbstrm->as);
581:   }
582:   PetscMalloc1(bs2*ai[m], &sbstrm->as);
583:   PetscMalloc1(rbs, &asp);

585:   asu = sbstrm->as;
586:   for (i=0; i<m*bs2; i++) asu[i] = aa[i];

588:   asu = sbstrm->as + ai[0]*bs2;
589:   aau = aa         + ai[0]*bs2;

591:   for (i=0; i<rbs; i++) asp[i] = asu + i*slen;

593:   for (j=0;j<blen;j++) {
594:     for (jb=0; jb<cbs; jb++) {
595:       for (ib=0; ib<rbs; ib++) asp[ib][j*cbs+jb] = aau[j*bs2+jb*rbs+ib];
596:     }
597:   }

599:   switch (bs) {
600:   case 4:
601:     F->ops->forwardsolve  = MatForwardSolve_SeqSBSTRM_4_NaturalOrdering_inplace;
602:     F->ops->backwardsolve = MatBackwardSolve_SeqSBSTRM_4_NaturalOrdering_inplace;
603:     F->ops->solve         = MatSolve_SeqSBSTRM_4_NaturalOrdering_inplace;
604:     break;
605:   case 5:
606:     F->ops->forwardsolve  = MatForwardSolve_SeqSBSTRM_5_NaturalOrdering_inplace;
607:     F->ops->backwardsolve = MatBackwardSolve_SeqSBSTRM_5_NaturalOrdering_inplace;
608:     F->ops->solve         = MatSolve_SeqSBSTRM_5_NaturalOrdering_inplace;
609:     break;
610:   default:
611:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"not supported for block size %D",bs);
612:   }
613: #if 0
614: #endif

616:   PetscFree(asp);
617:   return(0);
618: }

620: /*=========================================================*/

624: PetscErrorCode MatFactorGetSolverPackage_seqsbaij_sbstrm(Mat A,const MatSolverPackage *type)
625: {
627:   *type = MATSOLVERSBSTRM;
628:   return(0);
629: }

633: PetscErrorCode MatCholeskyFactorNumeric_sbstrm(Mat F,Mat A,const MatFactorInfo *info)
634: {
635:   PetscInt       bs = A->rmap->bs;

639:   switch (bs) {
640:   case 4:
641:     MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering(F,A,info);
642:     break;
643:   case 5:
644:     MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering(F,A,info);
645:     break;
646:   default:
647:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"not supported for block size %D",bs);
648:   }

650:   SeqSBSTRM_convertFact_sbstrm(F);
651:   return(0);
652: }
653: /*=========================================================*/
656: PetscErrorCode MatICCFactorSymbolic_sbstrm(Mat B,Mat A,IS perm,const MatFactorInfo *info)
657: {
658:   PetscInt ierr;

661:   (MatICCFactorSymbolic_SeqSBAIJ)(B,A,perm,info);

663:   B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_sbstrm;
664:   return(0);
665: }
666: /*=========================================================*/
669: PetscErrorCode MatCholeskyFactorSymbolic_sbstrm(Mat B,Mat A,IS perm,const MatFactorInfo *info)
670: {
671:   PetscInt ierr;

674:   (MatCholeskyFactorSymbolic_SeqSBAIJ)(B,A,perm,info);

676:   B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_sbstrm;
677:   return(0);
678: }
679: /*=========================================================*/

683: PETSC_INTERN PetscErrorCode MatGetFactor_seqsbaij_sbstrm(Mat A,MatFactorType ftype,Mat *F)
684: {
685:   Mat            B;
686:   PetscInt       bs = A->rmap->bs;
687:   Mat_SeqSBSTRM  *sbstrm;

691:   if (A->cmap->N != A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
692:   MatCreate(PetscObjectComm((PetscObject)A),&B);
693:   MatSetSizes(B,A->rmap->n,A->cmap->n,PETSC_DETERMINE,PETSC_DETERMINE);
694:   MatSetType(B,((PetscObject)A)->type_name);
695:   MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,MAT_SKIP_ALLOCATION,NULL);


698:   B->ops->iccfactorsymbolic      = MatICCFactorSymbolic_sbstrm;
699:   B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_sbstrm;
700:   B->ops->choleskyfactornumeric  = MatCholeskyFactorNumeric_sbstrm;

702:   B->ops->destroy = MatDestroy_SeqSBSTRM;
703:   B->factortype   = ftype;
704:   B->assembled    = PETSC_TRUE;                  /* required by -ksp_view */
705:   B->preallocated = PETSC_TRUE;

707:   PetscNewLog(B,&sbstrm);

709:   PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverPackage_C",MatFactorGetSolverPackage_seqsbaij_sbstrm);

711:   PetscFree(B->solvertype);
712:   PetscStrallocpy(MATSOLVERSBSTRM,&B->solvertype);

714:   B->spptr = sbstrm;
715:   *F       = B;
716:   return(0);
717: }