Actual source code: bstream.c

petsc-3.3-p7 2013-05-11
  1: #define PETSCMAT_DLL

 3:  #include ../src/mat/impls/baij/seq/baij.h
 4:  #include ../src/mat/impls/baij/seq/bstream/bstream.h

  8: PetscErrorCode MatDestroy_SeqBSTRM(Mat A)
  9: {
 10:   PetscErrorCode  ierr;
 11:   Mat_SeqBSTRM    *bstrm = (Mat_SeqBSTRM *) A->spptr;

 13:   if(bstrm) {
 14:      PetscFree(bstrm->as);
 15:   }
 16:   PetscObjectChangeTypeName( (PetscObject)A, MATSEQBAIJ);
 17:   MatDestroy_SeqBAIJ(A);
 18:   return(0);
 19: }
 20: /*=========================================================*/
 21: PetscErrorCode MatDuplicate_SeqBSTRM(Mat A, MatDuplicateOption op, Mat *M)
 22: {
 24:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot duplicate STRM matrices yet");
 25:   return(0);
 26: }
 27: /*=========================================================*/
 28: extern PetscErrorCode MatSolve_SeqBSTRM_4(Mat A,Vec bb,Vec xx);
 29: extern PetscErrorCode MatSolve_SeqBSTRM_5(Mat A,Vec bb,Vec xx);


 32: /*=========================================================*/
 35: PetscErrorCode MatSeqBSTRM_convert_bstrm(Mat A)
 36: {
 37:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ *) A->data;
 38:   Mat_SeqBSTRM   *bstrm = (Mat_SeqBSTRM*) A->spptr;
 39:   PetscInt       m = a->mbs, bs = A->rmap->bs;
 40:   PetscInt       *ai = a->i, *diag = a->diag;
 41:   PetscInt       i,j,ib,jb;
 42:   MatScalar      *aa = a->a;
 44:   PetscInt  bs2, rbs,  cbs, blen, slen;
 45:   PetscScalar **asp;

 48:   bstrm->rbs = bs;
 49:   bstrm->cbs = bs;


 52:   rbs = cbs = bs;
 53:   bs2 = rbs*cbs;
 54:   blen = ai[m]-ai[0]+diag[0] - diag[m];
 55:   slen = blen*cbs;

 57:   PetscFree(bstrm->as);
 58:   PetscMalloc(bs2*blen*sizeof(MatScalar), &bstrm->as);

 60:   PetscMalloc(rbs*sizeof(MatScalar *), &asp);
 61: 
 62:   for(i=0;i<rbs;i++) asp[i] = bstrm->as + i*slen;

 64:   for(j=0;j<blen;j++) {
 65:      for (jb=0; jb<cbs; jb++){
 66:      for (ib=0; ib<rbs; ib++){
 67:          asp[ib][j*cbs+jb] = aa[j*bs2+jb*rbs+ib];
 68:      }}
 69:   }

 71:   PetscFree(asp);
 72:   switch (bs){
 73:     case 4:
 74:        A->ops->solve   = MatSolve_SeqBSTRM_4;
 75:        break;
 76:     case 5:
 77:        A->ops->solve   = MatSolve_SeqBSTRM_5;
 78:        break;
 79:     default:
 80:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"not supported for block size %D",bs);
 81:   }
 82:   return(0);
 83: }

 85: /*=========================================================*/
 86: extern PetscErrorCode MatSeqBSTRM_create_bstrm(Mat);

 90: PetscErrorCode MatAssemblyEnd_SeqBSTRM(Mat A, MatAssemblyType mode)
 91: {

 95:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);
 96:   MatAssemblyEnd_SeqBAIJ(A, mode);

 98:   MatSeqBSTRM_create_bstrm(A);
 99:   return(0);
100: }
101: /*=========================================================*/
102: EXTERN_C_BEGIN
105: PetscErrorCode  MatConvert_SeqBAIJ_SeqBSTRM(Mat A,const MatType type,MatReuse reuse,Mat *newmat)
106: {
108:   Mat            B = *newmat;
109:   Mat_SeqBSTRM   *bstrm;

112:   if (reuse == MAT_INITIAL_MATRIX) {
113:     MatDuplicate(A,MAT_COPY_VALUES,&B);
114:   }
115: 

117:   PetscNewLog(B,Mat_SeqBSTRM,&bstrm);
118:   B->spptr = (void *) bstrm;

120:   /* Set function pointers for methods that we inherit from BAIJ but override. */
121:   B->ops->duplicate        = MatDuplicate_SeqBSTRM;
122:   B->ops->assemblyend      = MatAssemblyEnd_SeqBSTRM;
123:   B->ops->destroy          = MatDestroy_SeqBSTRM;

125:   /* If A has already been assembled, compute the permutation. */
126:   if (A->assembled) {
127:       MatSeqBSTRM_create_bstrm(B);
128:   }
129:   PetscObjectChangeTypeName((PetscObject)B,MATSEQBSTRM);
130:   *newmat = B;
131:   return(0);
132: }
133: EXTERN_C_END

135: /*=========================================================*/
138: PetscErrorCode MatCreateSeqBSTRM(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
139: {
142:     MatCreate(comm,A);
143:     MatSetSizes(*A,m,n,m,n);
144:     MatSetType(*A,MATSEQBSTRM);
145:     MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);
146:     (*A)->rmap->bs = bs;
147:   return(0);
148: }
149: /*=========================================================*/
150: EXTERN_C_BEGIN
153: PetscErrorCode MatCreate_SeqBSTRM(Mat A)
154: {

158:   MatSetType(A,MATSEQBAIJ);
159:   MatConvert_SeqBAIJ_SeqBSTRM(A,MATSEQBSTRM,MAT_REUSE_MATRIX,&A);

161:   PetscObjectComposeFunctionDynamic((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",
162:                                      "MatConvert_SeqBAIJ_SeqBSTRM",
163:                                       MatConvert_SeqBAIJ_SeqBSTRM);
164:   return(0);
165: }
166: EXTERN_C_END
167: /*=========================================================*/

169: /*=========================================================*/
172: PetscErrorCode MatSOR_SeqBSTRM_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
173: {
174:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
175:   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
176:   const MatScalar    *idiag,*mdiag;
177:   const PetscScalar  *b;
178:   PetscErrorCode     ierr;
179:   PetscInt           m = a->mbs,i,i2,nz,idx;
180:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

182:   Mat_SeqBSTRM      *bstrm = (Mat_SeqBSTRM *)A->spptr;
183:   MatScalar         *v1,*v2,*v3,*v4, *v10,*v20,*v30,*v40;
184:   PetscInt slen;

187:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
188:   its = its*lits;
189:   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
190:   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
191:   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
192:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
193:   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

195:   if (!a->idiagvalid){MatInvertBlockDiagonal(A,PETSC_NULL);}

197:   diag  = a->diag;
198:   idiag = a->idiag;
199:   VecGetArray(xx,&x);
200:   VecGetArray(bb,(PetscScalar**)&b);

202:   slen = 4*(ai[m]-ai[0]);
203:   v10 = bstrm->as;
204:   v20 = v10 + slen;
205:   v30 = v20 + slen;
206:   v40 = v30 + slen;

208:   if (flag & SOR_ZERO_INITIAL_GUESS) {
209:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
210:       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
211:       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
212:       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
213:       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
214:       i2     = 4;
215:       idiag += 16;
216:       for (i=1; i<m; i++) {
217:         v1     = v10 + 4*ai[i];
218:         v2     = v20 + 4*ai[i];
219:         v3     = v30 + 4*ai[i];
220:         v4     = v40 + 4*ai[i];
221:         vi    = aj + ai[i];
222:         nz    = diag[i] - ai[i];
223:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
224:         while (nz--) {
225:           idx  = 4*(*vi++);
226:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
227:           s1  -= v1[0]*x1 + v1[1]*x2 + v1[2]*x3 + v1[3]*x4;
228:           s2  -= v2[0]*x1 + v2[1]*x2 + v2[2]*x3 + v2[3]*x4;
229:           s3  -= v3[0]*x1 + v3[1]*x2 + v3[2]*x3 + v3[3]*x4;
230:           s4  -= v4[0]*x1 + v4[1]*x2 + v4[2]*x3 + v4[3]*x4;
231:           v1 += 4; v2 += 4; v3 += 4; v4 += 4;
232:         }
233:         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
234:         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
235:         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
236:         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
237:         idiag   += 16;
238:         i2      += 4;
239:       }
240:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
241:       PetscLogFlops(16.0*(a->nz));
242:     }
243:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
244:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
245:       i2    = 0;
246:       mdiag = a->idiag+16*a->mbs;
247:       for (i=0; i<m; i++) {
248:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
249:         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
250:         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
251:         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
252:         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
253:         mdiag  += 16;
254:         i2     += 4;
255:       }
256:       PetscLogFlops(28.0*m);
257:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
258:       PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
259:     }
260:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
261:       idiag   = a->idiag+16*a->mbs - 16;
262:       i2      = 4*m - 4;
263:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
264:       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
265:       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
266:       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
267:       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
268:       idiag -= 16;
269:       i2    -= 4;
270:       for (i=m-2; i>=0; i--) {
271:         v1    = v10 + 4*(ai[i+1]-1);
272:         v2    = v20 + 4*(ai[i+1]-1);
273:         v3    = v30 + 4*(ai[i+1]-1);
274:         v4    = v40 + 4*(ai[i+1]-1);
275:         vi    = aj + ai[i+1] - 1;
276:         nz    = ai[i+1] - diag[i] - 1;
277:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
278:         while (nz--) {
279:           idx  = 4*(*vi--);
280:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
281:           s1  -= v1[3]*x4 + v1[2]*x3 + v1[1]*x2 + v1[0]*x1;
282:           s2  -= v2[3]*x4 + v2[2]*x3 + v2[1]*x2 + v2[0]*x1;
283:           s3  -= v3[3]*x4 + v3[2]*x3 + v3[1]*x2 + v3[0]*x1;
284:           s4  -= v4[3]*x4 + v4[2]*x3 + v4[1]*x2 + v4[0]*x1;
285:           v1 -= 4; v2 -= 4; v3 -= 4; v4 -= 4;
286:         }
287:         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
288:         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
289:         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
290:         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
291:         idiag   -= 16;
292:         i2      -= 4;
293:       }
294:       PetscLogFlops(16.0*(a->nz));
295:     }
296:   } else {
297:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
298:   }
299:   VecRestoreArray(xx,&x);
300:   VecRestoreArray(bb,(PetscScalar**)&b);
301:   return(0);
302: }
303: /*=========================================================*/
304: /*=========================================================*/
307: PetscErrorCode MatSOR_SeqBSTRM_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
308: {
309:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
310:   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
311:   const MatScalar    *idiag,*mdiag;
312:   const PetscScalar  *b;
313:   PetscErrorCode     ierr;
314:   PetscInt           m = a->mbs,i,i2,nz,idx;
315:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

317:   Mat_SeqBSTRM      *bstrm = (Mat_SeqBSTRM *)A->spptr;
318:   MatScalar         *v1, *v2, *v3, *v4, *v5, *v10, *v20, *v30, *v40, *v50;
319:   PetscInt slen;

322:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
323:   its = its*lits;
324:   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
325:   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
326:   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
327:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
328:   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

330:   if (!a->idiagvalid){MatInvertBlockDiagonal(A,PETSC_NULL);}

332:   diag  = a->diag;
333:   idiag = a->idiag;
334:   VecGetArray(xx,&x);
335:   VecGetArray(bb,(PetscScalar**)&b);

337:   slen = 5*(ai[m]-ai[0]);
338:   v10 = bstrm->as;
339:   v20 = v10 + slen;
340:   v30 = v20 + slen;
341:   v40 = v30 + slen;
342:   v50 = v40 + slen;

344:   if (flag & SOR_ZERO_INITIAL_GUESS) {
345:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
346:       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
347:       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
348:       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
349:       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
350:       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
351:       i2     = 5;
352:       idiag += 25;
353:       for (i=1; i<m; i++) {
354:         v1     = v10 + 5*ai[i];
355:         v2     = v20 + 5*ai[i];
356:         v3     = v30 + 5*ai[i];
357:         v4     = v40 + 5*ai[i];
358:         v5     = v50 + 5*ai[i];
359:         vi    = aj + ai[i];
360:         nz    = diag[i] - ai[i];
361:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
362:         while (nz--) {
363:           idx  = 5*(*vi++);
364:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
365:           s1  -= v1[0]*x1 + v1[1]*x2 + v1[2]*x3 + v1[3]*x4 + v1[4]*x5;
366:           s2  -= v2[0]*x1 + v2[1]*x2 + v2[2]*x3 + v2[3]*x4 + v2[4]*x5;
367:           s3  -= v3[0]*x1 + v3[1]*x2 + v3[2]*x3 + v3[3]*x4 + v3[4]*x5;
368:           s4  -= v4[0]*x1 + v4[1]*x2 + v4[2]*x3 + v4[3]*x4 + v4[4]*x5;
369:           s5  -= v5[0]*x1 + v5[1]*x2 + v5[2]*x3 + v5[3]*x4 + v5[4]*x5;
370:           v1 += 5; v2 += 5; v3 += 5; v4 += 5; v5 += 5;
371:         }
372:         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
373:         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
374:         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
375:         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
376:         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
377:         idiag   += 25;
378:         i2      += 5;
379:       }
380:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
381:       PetscLogFlops(25.0*(a->nz));
382:     }
383:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
384:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
385:       i2    = 0;
386:       mdiag = a->idiag+25*a->mbs;
387:       for (i=0; i<m; i++) {
388:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
389:         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
390:         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
391:         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
392:         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
393:         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
394:         mdiag  += 25;
395:         i2     += 5;
396:       }
397:       PetscLogFlops(45.0*m);
398:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
399:       PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
400:     }
401:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
402:       idiag   = a->idiag+25*a->mbs - 25;
403:       i2      = 5*m - 5;
404:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
405:       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
406:       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
407:       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
408:       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
409:       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
410:       idiag -= 25;
411:       i2    -= 5;
412:       for (i=m-2; i>=0; i--) {
413:         v1    = v10 + 5*(ai[i+1]-1);
414:         v2    = v20 + 5*(ai[i+1]-1);
415:         v3    = v30 + 5*(ai[i+1]-1);
416:         v4    = v40 + 5*(ai[i+1]-1);
417:         v5    = v50 + 5*(ai[i+1]-1);
418:         vi    = aj + ai[i+1] - 1;
419:         nz    = ai[i+1] - diag[i] - 1;
420:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
421:         while (nz--) {
422:           idx  = 5*(*vi--);
423:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
424:           s1  -= v1[4]*x5 + v1[3]*x4 + v1[2]*x3 + v1[1]*x2 + v1[0]*x1;
425:           s2  -= v2[4]*x5 + v2[3]*x4 + v2[2]*x3 + v2[1]*x2 + v2[0]*x1;
426:           s3  -= v3[4]*x5 + v3[3]*x4 + v3[2]*x3 + v3[1]*x2 + v3[0]*x1;
427:           s4  -= v4[4]*x5 + v4[3]*x4 + v4[2]*x3 + v4[1]*x2 + v4[0]*x1;
428:           s5  -= v5[4]*x5 + v5[3]*x4 + v5[2]*x3 + v5[1]*x2 + v5[0]*x1;
429:           v1 -= 5; v2 -= 5; v3 -= 5; v4 -= 5; v5 -= 5;
430:         }
431:         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
432:         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
433:         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
434:         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
435:         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
436:         idiag   -= 25;
437:         i2      -= 5;
438:       }
439:       PetscLogFlops(25.0*(a->nz));
440:     }
441:   } else {
442:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
443:   }
444:   VecRestoreArray(xx,&x);
445:   VecRestoreArray(bb,(PetscScalar**)&b);
446:   return(0);
447: }
448: /*=========================================================*/
449: /*=========================================================*/
452: PetscErrorCode MatMult_SeqBSTRM_4(Mat A,Vec xx,Vec zz)
453: {
454:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
455:   Mat_SeqBSTRM      *bstrm = (Mat_SeqBSTRM *)A->spptr;
456:   PetscScalar       *z = 0,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*zarray;
457:   const PetscScalar *x,*xb;
458:   const MatScalar   *v1, *v2, *v3, *v4;
459:   PetscErrorCode    ierr;
460:   PetscInt          mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0;
461:   PetscBool         usecprow=a->compressedrow.use;
462:   PetscInt slen;

465:   VecGetArray(xx,(PetscScalar**)&x);
466:   VecGetArray(zz,&zarray);

468:   idx = a->j;

470:   if (usecprow){
471:     mbs  = a->compressedrow.nrows;
472:     ii   = a->compressedrow.i;
473:     ridx = a->compressedrow.rindex;
474:   } else {
475:     mbs = a->mbs;
476:     ii  = a->i;
477:     z   = zarray;
478:   }
479: 
480:   slen = 4*(ii[mbs]-ii[0]);
481:   v1 = bstrm->as;
482:   v2 = v1 + slen;
483:   v3 = v2 + slen;
484:   v4 = v3 + slen;

486:   for (i=0; i<mbs; i++) {
487:     n  = ii[1] - ii[0]; ii++;
488:     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
489:     nonzerorow += (n>0);
490:     for (j=0; j<n; j++) {
491:       xb = x + 4*(*idx++);
492:       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
493:       sum1 += v1[0]*x1 + v1[1]*x2 + v1[2]*x3  + v1[3]*x4;
494:       sum2 += v2[0]*x1 + v2[1]*x2 + v2[2]*x3  + v2[3]*x4;
495:       sum3 += v3[0]*x1 + v3[1]*x2 + v3[2]*x3  + v3[3]*x4;
496:       sum4 += v4[0]*x1 + v4[1]*x2 + v4[2]*x3  + v4[3]*x4;
497:       v1 += 4; v2 += 4; v3 += 4; v4 += 4;
498:     }
499:     if (usecprow) z = zarray + 4*ridx[i];
500:     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
501:     if (!usecprow) z += 4;
502:   }
503:   VecRestoreArray(xx,(PetscScalar**)&x);
504:   VecRestoreArray(zz,&zarray);
505:   PetscLogFlops(32*a->nz - 4*nonzerorow);
506:   return(0);
507: }
508: /*=========================================================*/
511: PetscErrorCode MatMult_SeqBSTRM_5(Mat A,Vec xx,Vec zz)
512: {
513:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
514:   Mat_SeqBSTRM      *bstrm = (Mat_SeqBSTRM *)A->spptr;
515:   PetscScalar       *z = 0,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5,*zarray;
516:   const PetscScalar *x,*xb;
517:   const MatScalar   *v1, *v2, *v3, *v4, *v5;
518:   PetscErrorCode    ierr;
519:   PetscInt          mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0;
520:   PetscBool         usecprow=a->compressedrow.use;
521:   PetscInt slen;

524:   VecGetArray(xx,(PetscScalar**)&x);
525:   VecGetArray(zz,&zarray);

527:   idx = a->j;

529:   if (usecprow){
530:     mbs  = a->compressedrow.nrows;
531:     ii   = a->compressedrow.i;
532:     ridx = a->compressedrow.rindex;
533:   } else {
534:     mbs = a->mbs;
535:     ii  = a->i;
536:     z   = zarray;
537:   }
538: 
539:   slen = 5*(ii[mbs]-ii[0]);
540:   v1 = bstrm->as;
541:   v2 = v1 + slen;
542:   v3 = v2 + slen;
543:   v4 = v3 + slen;
544:   v5 = v4 + slen;

546:   for (i=0; i<mbs; i++) {
547:     n  = ii[1] - ii[0]; ii++;
548:     sum1 = sum2 = sum3 = sum4 = sum5 = 0.0;
549:     nonzerorow += (n>0);
550:     PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
551:     PetscPrefetchBlock(v1+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
552:     PetscPrefetchBlock(v2+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
553:     PetscPrefetchBlock(v3+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
554:     PetscPrefetchBlock(v4+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
555:     PetscPrefetchBlock(v5+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */


558:     for (j=0; j<n; j++) {
559:       xb = x + 5*(*idx++);
560:       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
561:       sum1 += v1[0]*x1 + v1[1]*x2 + v1[2]*x3 + v1[3]*x4 + v1[4]*x5;
562:       sum2 += v2[0]*x1 + v2[1]*x2 + v2[2]*x3 + v2[3]*x4 + v2[4]*x5;
563:       sum3 += v3[0]*x1 + v3[1]*x2 + v3[2]*x3 + v3[3]*x4 + v3[4]*x5;
564:       sum4 += v4[0]*x1 + v4[1]*x2 + v4[2]*x3 + v4[3]*x4 + v4[4]*x5;
565:       sum5 += v5[0]*x1 + v5[1]*x2 + v5[2]*x3 + v5[3]*x4 + v5[4]*x5;
566:       v1 += 5; v2 += 5; v3 += 5; v4 += 5; v5 += 5;
567:     }
568:     if (usecprow) z = zarray + 5*ridx[i];
569:     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
570:     if (!usecprow) z += 5;
571:   }
572:   VecRestoreArray(xx,(PetscScalar**)&x);
573:   VecRestoreArray(zz,&zarray);
574:   PetscLogFlops(50.0*a->nz - 5.0*nonzerorow);
575:   return(0);
576: }
577: /*=========================================================*/

579: /*=========================================================*/
582: PetscErrorCode MatMultTranspose_SeqBSTRM_4(Mat A,Vec xx,Vec zz)
583: {
584:   Mat_SeqBAIJ      *a = (Mat_SeqBAIJ*)A->data;
585:   Mat_SeqBSTRM     *sbstrm = (Mat_SeqBSTRM *)A->spptr;
586:   PetscScalar       zero = 0.0;
587:   PetscScalar       x1,x2,x3,x4;
588:   PetscScalar       *x, *xb, *z;
589:   MatScalar         *v1, *v2, *v3, *v4;
590:   PetscErrorCode    ierr;
591:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j;
592:   PetscInt       nonzerorow=0;
593:   PetscInt slen;

596:   VecSet(zz,zero);
597:   VecGetArray(xx,&x);
598:   VecGetArray(zz,&z);

600:   slen = 4*(ai[mbs]-ai[0]);
601:   v1 = sbstrm->as;
602:   v2 = v1 + slen;
603:   v3 = v2 + slen;
604:   v4 = v3 + slen;
605:   xb = x;

607:   for (i=0; i<mbs; i++) {
608:     n  = ai[i+1] - ai[i];
609:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];  xb += 4;
610:     nonzerorow += (n>0);
611:     ib = aj + ai[i];
612: 
613:     for (j=0; j<n; j++) {
614:       cval       = ib[j]*4;
615:       z[cval]     += v1[0]*x1 + v2[0]*x2 + v3[0]*x3 + v4[0]*x4;
616:       z[cval+1]   += v1[1]*x1 + v2[1]*x2 + v3[1]*x3 + v4[1]*x4;
617:       z[cval+2]   += v1[2]*x1 + v2[2]*x2 + v3[2]*x3 + v4[2]*x4;
618:       z[cval+3]   += v1[3]*x1 + v2[3]*x2 + v3[3]*x3 + v4[3]*x4;
619:       v1 += 4; v2 += 4; v3 += 4; v4 += 4;
620:     }
621: 
622:   }
623:   VecRestoreArray(xx,&x);
624:   VecRestoreArray(zz,&z);
625:   PetscLogFlops(32*a->nz - 4*nonzerorow);
626:   return(0);
627: }
628: /*=========================================================*/
631: PetscErrorCode MatMultTranspose_SeqBSTRM_5(Mat A,Vec xx,Vec zz)
632: {
633:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
634:   Mat_SeqBSTRM      *sbstrm = (Mat_SeqBSTRM *)A->spptr;
635:   PetscScalar       zero = 0.0;
636:   PetscScalar       *z = 0;
637:   PetscScalar       *x,*xb;
638:   const MatScalar   *v1, *v2, *v3, *v4, *v5;
639:   PetscScalar       x1, x2, x3, x4, x5;
640:   PetscErrorCode    ierr;
641:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j;
642:   PetscInt       nonzerorow=0;
643:   PetscInt slen;

646:   VecSet(zz,zero);
647:   VecGetArray(xx,&x);
648:   VecGetArray(zz,&z);

650:   slen = 5*(ai[mbs]-ai[0]);

652:   v1 = sbstrm->as;
653:   v2 = v1 + slen;
654:   v3 = v2 + slen;
655:   v4 = v3 + slen;
656:   v5 = v4 + slen;

658:   xb = x;

660:   for (i=0; i<mbs; i++) {
661:     n  = ai[i+1] - ai[i];
662:     nonzerorow += (n>0);

664:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; xb += 5;
665:     ib = aj + ai[i];

667:     PetscPrefetchBlock(ib+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
668:     PetscPrefetchBlock(v1+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
669:     PetscPrefetchBlock(v2+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
670:     PetscPrefetchBlock(v3+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
671:     PetscPrefetchBlock(v4+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
672:     PetscPrefetchBlock(v5+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */

674: 
675:     for (j=0; j<n; j++) {
676:       cval       = ib[j]*5;
677:       z[cval]   += v1[0]*x1 + v2[0]*x2 + v3[0]*x3 + v4[0]*x4 + v5[0]*x5;
678:       z[cval+1] += v1[1]*x1 + v2[1]*x2 + v3[1]*x3 + v4[1]*x4 + v5[1]*x5;
679:       z[cval+2] += v1[2]*x1 + v2[2]*x2 + v3[2]*x3 + v4[2]*x4 + v5[2]*x5;
680:       z[cval+3] += v1[3]*x1 + v2[3]*x2 + v3[3]*x3 + v4[3]*x4 + v5[3]*x5;
681:       z[cval+4] += v1[4]*x1 + v2[4]*x2 + v3[4]*x3 + v4[4]*x4 + v5[4]*x5;
682:       v1 += 5; v2 += 5; v3 += 5; v4 += 5; v5 += 5;
683:     }
684:   }
685:   VecRestoreArray(xx,&x);
686:   VecRestoreArray(zz,&z);
687:   PetscLogFlops(50.0*a->nz - 5.0*nonzerorow);
688:   return(0);
689: }
690: /*=========================================================*/
693: PetscErrorCode MatMultAdd_SeqBSTRM_4(Mat A,Vec xx,Vec yy,Vec zz)
694: {
695:   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
696:   Mat_SeqBSTRM    *bstrm = (Mat_SeqBSTRM *)A->spptr;
697:   PetscScalar    *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*yarray,*zarray;
698:   MatScalar      *v1, *v2, *v3, *v4;
700:   PetscInt       mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
701:   PetscBool      usecprow=a->compressedrow.use;
702:   PetscInt slen;

705:   VecGetArray(xx,&x);
706:   VecGetArray(yy,&yarray);
707:   if (zz != yy) {
708:     VecGetArray(zz,&zarray);
709:   } else {
710:     zarray = yarray;
711:   }

713:   idx   = a->j;
714:   if (usecprow){
715:     if (zz != yy){
716:       PetscMemcpy(zarray,yarray,4*mbs*sizeof(PetscScalar));
717:     }
718:     mbs  = a->compressedrow.nrows;
719:     ii   = a->compressedrow.i;
720:     ridx = a->compressedrow.rindex;
721:   } else {
722:     ii  = a->i;
723:     y   = yarray;
724:     z   = zarray;
725:   }

727:   slen = 4*(ii[mbs]-ii[0]);
728:   v1 = bstrm->as;
729:   v2 = v1 + slen;
730:   v3 = v2 + slen;
731:   v4 = v3 + slen;

733:   for (i=0; i<mbs; i++) {
734:     n  = ii[1] - ii[0]; ii++;
735:     if (usecprow){
736:       z = zarray + 4*ridx[i];
737:       y = yarray + 4*ridx[i];
738:     }
739:     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
740:     for (j=0; j<n; j++) {
741:       xb = x + 4*(*idx++);
742:       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
743:       sum1 += v1[0]*x1 + v1[1]*x2 + v1[2]*x3  + v1[3]*x4;
744:       sum2 += v2[0]*x1 + v2[1]*x2 + v2[2]*x3  + v2[3]*x4;
745:       sum3 += v3[0]*x1 + v3[1]*x2 + v3[2]*x3  + v3[3]*x4;
746:       sum4 += v4[0]*x1 + v4[1]*x2 + v4[2]*x3  + v4[3]*x4;
747:       v1 += 4; v2 += 4; v3 += 4; v4 += 4;
748:     }
749:     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
750:     if (!usecprow){
751:       z += 4; y += 4;
752:     }
753:   }
754:   VecRestoreArray(xx,&x);
755:   VecRestoreArray(yy,&yarray);
756:   if (zz != yy) {
757:     VecRestoreArray(zz,&zarray);
758:   }
759:   PetscLogFlops(32.0*a->nz);
760:   return(0);
761: }

763: /*=========================================================*/
766: PetscErrorCode MatMultAdd_SeqBSTRM_5(Mat A,Vec xx,Vec yy,Vec zz)
767: {
768:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
769:   Mat_SeqBSTRM   *bstrm = (Mat_SeqBSTRM *)A->spptr;
770:   PetscScalar    *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5;
771:   PetscScalar    *yarray,*zarray;
772:   MatScalar      *v1,*v2,*v3,*v4,*v5;
774:   PetscInt       mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
775:   PetscBool      usecprow=a->compressedrow.use;
776:   PetscInt slen;

779:   VecGetArray(xx,&x);
780:   VecGetArray(yy,&yarray);
781:   if (zz != yy) {
782:     VecGetArray(zz,&zarray);
783:   } else {
784:     zarray = yarray;
785:   }


788:   idx = a->j;
789:   if (usecprow){
790:     if (zz != yy){
791:       PetscMemcpy(zarray,yarray,5*mbs*sizeof(PetscScalar));
792:     }
793:     mbs  = a->compressedrow.nrows;
794:     ii   = a->compressedrow.i;
795:     ridx = a->compressedrow.rindex;
796:   } else {
797:     ii  = a->i;
798:     y   = yarray;
799:     z   = zarray;
800:   }

802:   slen = 5*(ii[mbs]-ii[0]);
803:   v1 = bstrm->as;
804:   v2 = v1 + slen;
805:   v3 = v2 + slen;
806:   v4 = v3 + slen;
807:   v5 = v4 + slen;


810:   for (i=0; i<mbs; i++) {
811:     n  = ii[1] - ii[0]; ii++;
812:     if (usecprow){
813:       z = zarray + 5*ridx[i];
814:       y = yarray + 5*ridx[i];
815:     }
816:     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
817:     PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
818:     PetscPrefetchBlock(v1+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
819:     PetscPrefetchBlock(v2+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
820:     PetscPrefetchBlock(v3+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
821:     PetscPrefetchBlock(v4+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
822:     PetscPrefetchBlock(v5+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */


825:     for (j=0; j<n; j++) {
826:       xb = x + 5*(*idx++);
827:       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
828:       sum1 += v1[0]*x1 + v1[1]*x2 + v1[2]*x3 + v1[3]*x4 + v1[4]*x5;
829:       sum2 += v2[0]*x1 + v2[1]*x2 + v2[2]*x3 + v2[3]*x4 + v2[4]*x5;
830:       sum3 += v3[0]*x1 + v3[1]*x2 + v3[2]*x3 + v3[3]*x4 + v3[4]*x5;
831:       sum4 += v4[0]*x1 + v4[1]*x2 + v4[2]*x3 + v4[3]*x4 + v4[4]*x5;
832:       sum5 += v5[0]*x1 + v5[1]*x2 + v5[2]*x3 + v5[3]*x4 + v5[4]*x5;
833:       v1 += 5; v2 += 5; v3 += 5; v4 += 5; v5 += 5;
834:     }
835:     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
836:     if (!usecprow){
837:       z += 5; y += 5;
838:     }
839:   }
840:   VecRestoreArray(xx,&x);
841:   VecRestoreArray(yy,&yarray);
842:   if (zz != yy) {
843:     VecRestoreArray(zz,&zarray);
844:   }
845:   PetscLogFlops(50.0*a->nz);
846:   return(0);
847: }

849: /*=========================================================*/
852: PetscErrorCode MatSeqBSTRM_create_bstrm(Mat A)
853: {
854:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ *) A->data;
855:   Mat_SeqBSTRM   *bstrm = (Mat_SeqBSTRM*) A->spptr;
856:   PetscInt       MROW = a->mbs, bs = A->rmap->bs;
857:   PetscInt       *ai = a->i;
858:   PetscInt       i,j,k;
859:   MatScalar      *aa = a->a;
861:   PetscInt  bs2, rbs,  cbs, blen, slen;
862:   PetscScalar **asp;

865:   bstrm->rbs = bstrm->cbs = bs;

867:   rbs = cbs = bs;
868:   bs2 = rbs*cbs;
869:   blen = ai[MROW]-ai[0];
870:   slen = blen*cbs;

872:   PetscMalloc(bs2*blen*sizeof(PetscScalar), &bstrm->as);

874:   PetscMalloc(rbs*sizeof(PetscScalar *), &asp);
875: 
876:   for(i=0;i<rbs;i++) asp[i] = bstrm->as + i*slen;
877: 
878:   for (k=0; k<blen; k++) {
879:     for (j=0; j<cbs; j++)
880:     for (i=0; i<rbs; i++)
881:         asp[i][k*cbs+j] = aa[k*bs2+j*rbs+i];
882:   }

884:   PetscFree(asp);

886:   switch (bs){
887:     case 4:
888:        A->ops->mult          = MatMult_SeqBSTRM_4;
889:        A->ops->multadd       = MatMultAdd_SeqBSTRM_4;
890:        A->ops->multtranspose = MatMultTranspose_SeqBSTRM_4;
891:        A->ops->sor           = MatSOR_SeqBSTRM_4;
892:        break;
893:     case 5:
894:        A->ops->mult          = MatMult_SeqBSTRM_5;
895:        A->ops->multadd       = MatMultAdd_SeqBSTRM_5;
896:        A->ops->multtranspose = MatMultTranspose_SeqBSTRM_5;
897:        A->ops->sor           = MatSOR_SeqBSTRM_5;
898:        break;
899:     default:
900:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"not supported for block size %D",bs);
901:   }
902:   return(0);
903: }
904: /*=========================================================*/
905: /*=========================================================*/
906: EXTERN_C_BEGIN
909: PetscErrorCode MatSeqBSTRMTransform(Mat A)
910: {
912:     MatSeqBSTRM_convert_bstrm(A);
913:   return(0);
914: }
915: EXTERN_C_END