Actual source code: bstream.c

petsc-3.7.3 2016-08-01
Report Typos and Errors
  1: #define PETSCMAT_DLL

  3: #include <../src/mat/impls/baij/seq/baij.h>
  4: #include <../src/mat/impls/baij/seq/bstream/bstream.h>

  8: PetscErrorCode MatDestroy_SeqBSTRM(Mat A)
  9: {
 11:   Mat_SeqBSTRM   *bstrm = (Mat_SeqBSTRM*) A->spptr;

 13:   if (bstrm) {
 14:     PetscFree(bstrm->as);
 15:   }
 16:   PetscObjectChangeTypeName((PetscObject)A, MATSEQBAIJ);
 17:   MatDestroy_SeqBAIJ(A);
 18:   return(0);
 19: }
 20: /*=========================================================*/
 21: PetscErrorCode MatDuplicate_SeqBSTRM(Mat A, MatDuplicateOption op, Mat *M)
 22: {
 24:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot duplicate STRM matrices yet");
 25:   return(0);
 26: }
 27: /*=========================================================*/
 28: extern PetscErrorCode MatSolve_SeqBSTRM_4(Mat A,Vec bb,Vec xx);
 29: extern PetscErrorCode MatSolve_SeqBSTRM_5(Mat A,Vec bb,Vec xx);


 32: /*=========================================================*/
 35: PetscErrorCode MatSeqBSTRM_convert_bstrm(Mat A)
 36: {
 37:   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*) A->data;
 38:   Mat_SeqBSTRM   *bstrm = (Mat_SeqBSTRM*) A->spptr;
 39:   PetscInt       m      = a->mbs, bs = A->rmap->bs;
 40:   PetscInt       *ai    = a->i, *diag = a->diag;
 41:   PetscInt       i,j,ib,jb;
 42:   MatScalar      *aa = a->a;
 44:   PetscInt       bs2, rbs,  cbs, blen, slen;
 45:   PetscScalar    **asp;

 48:   bstrm->rbs = bs;
 49:   bstrm->cbs = bs;


 52:   rbs  = cbs = bs;
 53:   bs2  = rbs*cbs;
 54:   blen = ai[m]-ai[0]+diag[0] - diag[m];
 55:   slen = blen*cbs;

 57:   PetscFree(bstrm->as);
 58:   PetscMalloc1(bs2*blen, &bstrm->as);

 60:   PetscMalloc1(rbs, &asp);

 62:   for (i=0; i<rbs; i++) asp[i] = bstrm->as + i*slen;

 64:   for (j=0;j<blen;j++) {
 65:     for (jb=0; jb<cbs; jb++) {
 66:       for (ib=0; ib<rbs; ib++) asp[ib][j*cbs+jb] = aa[j*bs2+jb*rbs+ib];
 67:     }
 68:   }

 70:   PetscFree(asp);
 71:   switch (bs) {
 72:   case 4:
 73:     A->ops->solve = MatSolve_SeqBSTRM_4;
 74:     break;
 75:   case 5:
 76:     A->ops->solve = MatSolve_SeqBSTRM_5;
 77:     break;
 78:   default:
 79:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"not supported for block size %D",bs);
 80:   }
 81:   return(0);
 82: }

 84: /*=========================================================*/
 85: extern PetscErrorCode MatSeqBSTRM_create_bstrm(Mat);

 89: PetscErrorCode MatAssemblyEnd_SeqBSTRM(Mat A, MatAssemblyType mode)
 90: {

 94:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);
 95:   MatAssemblyEnd_SeqBAIJ(A, mode);

 97:   MatSeqBSTRM_create_bstrm(A);
 98:   return(0);
 99: }
100: /*=========================================================*/
103: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat A,MatType type,MatReuse reuse,Mat *newmat)
104: {
106:   Mat            B = *newmat;
107:   Mat_SeqBSTRM   *bstrm;

110:   if (reuse == MAT_INITIAL_MATRIX) {
111:     MatDuplicate(A,MAT_COPY_VALUES,&B);
112:   }


115:   PetscNewLog(B,&bstrm);
116:   B->spptr = (void*) bstrm;

118:   /* Set function pointers for methods that we inherit from BAIJ but override. */
119:   B->ops->duplicate   = MatDuplicate_SeqBSTRM;
120:   B->ops->assemblyend = MatAssemblyEnd_SeqBSTRM;
121:   B->ops->destroy     = MatDestroy_SeqBSTRM;

123:   /* If A has already been assembled, compute the permutation. */
124:   if (A->assembled) {
125:     MatSeqBSTRM_create_bstrm(B);
126:   }
127:   PetscObjectChangeTypeName((PetscObject)B,MATSEQBSTRM);
128:   *newmat = B;
129:   return(0);
130: }

132: /*=========================================================*/
135: PetscErrorCode MatCreateSeqBSTRM(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
136: {

140:   MatCreate(comm,A);
141:   MatSetSizes(*A,m,n,m,n);
142:   MatSetType(*A,MATSEQBSTRM);
143:   MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);
144:   (*A)->rmap->bs = bs;
145:   return(0);
146: }
147: /*=========================================================*/

151: PETSC_EXTERN PetscErrorCode MatCreate_SeqBSTRM(Mat A)
152: {

156:   MatSetType(A,MATSEQBAIJ);
157:   MatConvert_SeqBAIJ_SeqBSTRM(A,MATSEQBSTRM,MAT_INPLACE_MATRIX,&A);

159:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",MatConvert_SeqBAIJ_SeqBSTRM);
160:   return(0);
161: }

163: /*=========================================================*/

165: /*=========================================================*/
168: PetscErrorCode MatSOR_SeqBSTRM_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
169: {
170:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
171:   PetscScalar       *x,x1,x2,x3,x4,s1,s2,s3,s4;
172:   const MatScalar   *idiag,*mdiag;
173:   const PetscScalar *b;
174:   PetscErrorCode    ierr;
175:   PetscInt          m = a->mbs,i,i2,nz,idx;
176:   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;

178:   Mat_SeqBSTRM *bstrm = (Mat_SeqBSTRM*)A->spptr;
179:   MatScalar    *v1,*v2,*v3,*v4, *v10,*v20,*v30,*v40;
180:   PetscInt     slen;

183:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
184:   its = its*lits;
185:   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
186:   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
187:   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
188:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
189:   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

191:   if (!a->idiagvalid) {MatInvertBlockDiagonal(A,NULL);}

193:   diag  = a->diag;
194:   idiag = a->idiag;
195:   VecGetArray(xx,&x);
196:   VecGetArray(bb,(PetscScalar**)&b);

198:   slen = 4*(ai[m]-ai[0]);
199:   v10  = bstrm->as;
200:   v20  = v10 + slen;
201:   v30  = v20 + slen;
202:   v40  = v30 + slen;

204:   if (flag & SOR_ZERO_INITIAL_GUESS) {
205:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
206:       x[0]   = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
207:       x[1]   = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
208:       x[2]   = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
209:       x[3]   = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
210:       i2     = 4;
211:       idiag += 16;
212:       for (i=1; i<m; i++) {
213:         v1 = v10 + 4*ai[i];
214:         v2 = v20 + 4*ai[i];
215:         v3 = v30 + 4*ai[i];
216:         v4 = v40 + 4*ai[i];
217:         vi = aj + ai[i];
218:         nz = diag[i] - ai[i];
219:         s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
220:         while (nz--) {
221:           idx = 4*(*vi++);
222:           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
223:           s1 -= v1[0]*x1 + v1[1]*x2 + v1[2]*x3 + v1[3]*x4;
224:           s2 -= v2[0]*x1 + v2[1]*x2 + v2[2]*x3 + v2[3]*x4;
225:           s3 -= v3[0]*x1 + v3[1]*x2 + v3[2]*x3 + v3[3]*x4;
226:           s4 -= v4[0]*x1 + v4[1]*x2 + v4[2]*x3 + v4[3]*x4;
227:           v1 += 4; v2 += 4; v3 += 4; v4 += 4;
228:         }
229:         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
230:         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
231:         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
232:         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
233:         idiag  += 16;
234:         i2     += 4;
235:       }
236:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
237:       PetscLogFlops(16.0*(a->nz));
238:     }
239:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
240:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
241:       i2    = 0;
242:       mdiag = a->idiag+16*a->mbs;
243:       for (i=0; i<m; i++) {
244:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
245:         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
246:         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
247:         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
248:         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
249:         mdiag  += 16;
250:         i2     += 4;
251:       }
252:       PetscLogFlops(28.0*m);
253:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
254:       PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
255:     }
256:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
257:       idiag   = a->idiag+16*a->mbs - 16;
258:       i2      = 4*m - 4;
259:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
260:       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
261:       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
262:       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
263:       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
264:       idiag  -= 16;
265:       i2     -= 4;
266:       for (i=m-2; i>=0; i--) {
267:         v1 = v10 + 4*(ai[i+1]-1);
268:         v2 = v20 + 4*(ai[i+1]-1);
269:         v3 = v30 + 4*(ai[i+1]-1);
270:         v4 = v40 + 4*(ai[i+1]-1);
271:         vi = aj + ai[i+1] - 1;
272:         nz = ai[i+1] - diag[i] - 1;
273:         s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
274:         while (nz--) {
275:           idx = 4*(*vi--);
276:           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
277:           s1 -= v1[3]*x4 + v1[2]*x3 + v1[1]*x2 + v1[0]*x1;
278:           s2 -= v2[3]*x4 + v2[2]*x3 + v2[1]*x2 + v2[0]*x1;
279:           s3 -= v3[3]*x4 + v3[2]*x3 + v3[1]*x2 + v3[0]*x1;
280:           s4 -= v4[3]*x4 + v4[2]*x3 + v4[1]*x2 + v4[0]*x1;
281:           v1 -= 4; v2 -= 4; v3 -= 4; v4 -= 4;
282:         }
283:         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
284:         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
285:         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
286:         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
287:         idiag  -= 16;
288:         i2     -= 4;
289:       }
290:       PetscLogFlops(16.0*(a->nz));
291:     }
292:   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
293:   VecRestoreArray(xx,&x);
294:   VecRestoreArray(bb,(PetscScalar**)&b);
295:   return(0);
296: }
297: /*=========================================================*/
298: /*=========================================================*/
301: PetscErrorCode MatSOR_SeqBSTRM_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
302: {
303:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
304:   PetscScalar       *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
305:   const MatScalar   *idiag,*mdiag;
306:   const PetscScalar *b;
307:   PetscErrorCode    ierr;
308:   PetscInt          m = a->mbs,i,i2,nz,idx;
309:   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;

311:   Mat_SeqBSTRM *bstrm = (Mat_SeqBSTRM*)A->spptr;
312:   MatScalar    *v1, *v2, *v3, *v4, *v5, *v10, *v20, *v30, *v40, *v50;
313:   PetscInt     slen;

316:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
317:   its = its*lits;
318:   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
319:   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
320:   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
321:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
322:   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

324:   if (!a->idiagvalid) {MatInvertBlockDiagonal(A,NULL);}

326:   diag  = a->diag;
327:   idiag = a->idiag;
328:   VecGetArray(xx,&x);
329:   VecGetArray(bb,(PetscScalar**)&b);

331:   slen = 5*(ai[m]-ai[0]);
332:   v10  = bstrm->as;
333:   v20  = v10 + slen;
334:   v30  = v20 + slen;
335:   v40  = v30 + slen;
336:   v50  = v40 + slen;

338:   if (flag & SOR_ZERO_INITIAL_GUESS) {
339:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
340:       x[0]   = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
341:       x[1]   = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
342:       x[2]   = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
343:       x[3]   = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
344:       x[4]   = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
345:       i2     = 5;
346:       idiag += 25;
347:       for (i=1; i<m; i++) {
348:         v1 = v10 + 5*ai[i];
349:         v2 = v20 + 5*ai[i];
350:         v3 = v30 + 5*ai[i];
351:         v4 = v40 + 5*ai[i];
352:         v5 = v50 + 5*ai[i];
353:         vi = aj + ai[i];
354:         nz = diag[i] - ai[i];
355:         s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
356:         while (nz--) {
357:           idx = 5*(*vi++);
358:           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
359:           s1 -= v1[0]*x1 + v1[1]*x2 + v1[2]*x3 + v1[3]*x4 + v1[4]*x5;
360:           s2 -= v2[0]*x1 + v2[1]*x2 + v2[2]*x3 + v2[3]*x4 + v2[4]*x5;
361:           s3 -= v3[0]*x1 + v3[1]*x2 + v3[2]*x3 + v3[3]*x4 + v3[4]*x5;
362:           s4 -= v4[0]*x1 + v4[1]*x2 + v4[2]*x3 + v4[3]*x4 + v4[4]*x5;
363:           s5 -= v5[0]*x1 + v5[1]*x2 + v5[2]*x3 + v5[3]*x4 + v5[4]*x5;
364:           v1 += 5; v2 += 5; v3 += 5; v4 += 5; v5 += 5;
365:         }
366:         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
367:         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
368:         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
369:         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
370:         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
371:         idiag  += 25;
372:         i2     += 5;
373:       }
374:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
375:       PetscLogFlops(25.0*(a->nz));
376:     }
377:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
378:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
379:       i2    = 0;
380:       mdiag = a->idiag+25*a->mbs;
381:       for (i=0; i<m; i++) {
382:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
383:         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
384:         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
385:         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
386:         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
387:         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
388:         mdiag  += 25;
389:         i2     += 5;
390:       }
391:       PetscLogFlops(45.0*m);
392:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
393:       PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
394:     }
395:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
396:       idiag   = a->idiag+25*a->mbs - 25;
397:       i2      = 5*m - 5;
398:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
399:       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
400:       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
401:       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
402:       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
403:       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
404:       idiag  -= 25;
405:       i2     -= 5;
406:       for (i=m-2; i>=0; i--) {
407:         v1 = v10 + 5*(ai[i+1]-1);
408:         v2 = v20 + 5*(ai[i+1]-1);
409:         v3 = v30 + 5*(ai[i+1]-1);
410:         v4 = v40 + 5*(ai[i+1]-1);
411:         v5 = v50 + 5*(ai[i+1]-1);
412:         vi = aj + ai[i+1] - 1;
413:         nz = ai[i+1] - diag[i] - 1;
414:         s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
415:         while (nz--) {
416:           idx = 5*(*vi--);
417:           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
418:           s1 -= v1[4]*x5 + v1[3]*x4 + v1[2]*x3 + v1[1]*x2 + v1[0]*x1;
419:           s2 -= v2[4]*x5 + v2[3]*x4 + v2[2]*x3 + v2[1]*x2 + v2[0]*x1;
420:           s3 -= v3[4]*x5 + v3[3]*x4 + v3[2]*x3 + v3[1]*x2 + v3[0]*x1;
421:           s4 -= v4[4]*x5 + v4[3]*x4 + v4[2]*x3 + v4[1]*x2 + v4[0]*x1;
422:           s5 -= v5[4]*x5 + v5[3]*x4 + v5[2]*x3 + v5[1]*x2 + v5[0]*x1;
423:           v1 -= 5; v2 -= 5; v3 -= 5; v4 -= 5; v5 -= 5;
424:         }
425:         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
426:         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
427:         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
428:         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
429:         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
430:         idiag  -= 25;
431:         i2     -= 5;
432:       }
433:       PetscLogFlops(25.0*(a->nz));
434:     }
435:   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
436:   VecRestoreArray(xx,&x);
437:   VecRestoreArray(bb,(PetscScalar**)&b);
438:   return(0);
439: }
440: /*=========================================================*/
441: /*=========================================================*/
444: PetscErrorCode MatMult_SeqBSTRM_4(Mat A,Vec xx,Vec zz)
445: {
446:   Mat_SeqBAIJ       *a     = (Mat_SeqBAIJ*)A->data;
447:   Mat_SeqBSTRM      *bstrm = (Mat_SeqBSTRM*)A->spptr;
448:   PetscScalar       *z     = 0,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*zarray;
449:   const PetscScalar *x,*xb;
450:   const MatScalar   *v1, *v2, *v3, *v4;
451:   PetscErrorCode    ierr;
452:   PetscInt          mbs,i,*idx,*ii,j,n,*ridx=NULL,nonzerorow=0;
453:   PetscBool         usecprow=a->compressedrow.use;
454:   PetscInt          slen;

457:   VecGetArray(xx,(PetscScalar**)&x);
458:   VecGetArray(zz,&zarray);

460:   idx = a->j;

462:   if (usecprow) {
463:     mbs  = a->compressedrow.nrows;
464:     ii   = a->compressedrow.i;
465:     ridx = a->compressedrow.rindex;
466:   } else {
467:     mbs = a->mbs;
468:     ii  = a->i;
469:     z   = zarray;
470:   }

472:   slen = 4*(ii[mbs]-ii[0]);
473:   v1   = bstrm->as;
474:   v2   = v1 + slen;
475:   v3   = v2 + slen;
476:   v4   = v3 + slen;

478:   for (i=0; i<mbs; i++) {
479:     n           = ii[1] - ii[0]; ii++;
480:     sum1        = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
481:     nonzerorow += (n>0);
482:     for (j=0; j<n; j++) {
483:       xb    = x + 4*(*idx++);
484:       x1    = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
485:       sum1 += v1[0]*x1 + v1[1]*x2 + v1[2]*x3  + v1[3]*x4;
486:       sum2 += v2[0]*x1 + v2[1]*x2 + v2[2]*x3  + v2[3]*x4;
487:       sum3 += v3[0]*x1 + v3[1]*x2 + v3[2]*x3  + v3[3]*x4;
488:       sum4 += v4[0]*x1 + v4[1]*x2 + v4[2]*x3  + v4[3]*x4;
489:       v1   += 4; v2 += 4; v3 += 4; v4 += 4;
490:     }
491:     if (usecprow) z = zarray + 4*ridx[i];
492:     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
493:     if (!usecprow) z += 4;
494:   }
495:   VecRestoreArray(xx,(PetscScalar**)&x);
496:   VecRestoreArray(zz,&zarray);
497:   PetscLogFlops(32*a->nz - 4*nonzerorow);
498:   return(0);
499: }
500: /*=========================================================*/
503: PetscErrorCode MatMult_SeqBSTRM_5(Mat A,Vec xx,Vec zz)
504: {
505:   Mat_SeqBAIJ       *a     = (Mat_SeqBAIJ*)A->data;
506:   Mat_SeqBSTRM      *bstrm = (Mat_SeqBSTRM*)A->spptr;
507:   PetscScalar       *z     = 0,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5,*zarray;
508:   const PetscScalar *x,*xb;
509:   const MatScalar   *v1, *v2, *v3, *v4, *v5;
510:   PetscErrorCode    ierr;
511:   PetscInt          mbs,i,*idx,*ii,j,n,*ridx=NULL,nonzerorow=0;
512:   PetscBool         usecprow=a->compressedrow.use;
513:   PetscInt          slen;

516:   VecGetArray(xx,(PetscScalar**)&x);
517:   VecGetArray(zz,&zarray);

519:   idx = a->j;

521:   if (usecprow) {
522:     mbs  = a->compressedrow.nrows;
523:     ii   = a->compressedrow.i;
524:     ridx = a->compressedrow.rindex;
525:   } else {
526:     mbs = a->mbs;
527:     ii  = a->i;
528:     z   = zarray;
529:   }

531:   slen = 5*(ii[mbs]-ii[0]);
532:   v1   = bstrm->as;
533:   v2   = v1 + slen;
534:   v3   = v2 + slen;
535:   v4   = v3 + slen;
536:   v5   = v4 + slen;

538:   for (i=0; i<mbs; i++) {
539:     n           = ii[1] - ii[0]; ii++;
540:     sum1        = sum2 = sum3 = sum4 = sum5 = 0.0;
541:     nonzerorow += (n>0);
542:     PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
543:     PetscPrefetchBlock(v1+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
544:     PetscPrefetchBlock(v2+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
545:     PetscPrefetchBlock(v3+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
546:     PetscPrefetchBlock(v4+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
547:     PetscPrefetchBlock(v5+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */


550:     for (j=0; j<n; j++) {
551:       xb    = x + 5*(*idx++);
552:       x1    = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
553:       sum1 += v1[0]*x1 + v1[1]*x2 + v1[2]*x3 + v1[3]*x4 + v1[4]*x5;
554:       sum2 += v2[0]*x1 + v2[1]*x2 + v2[2]*x3 + v2[3]*x4 + v2[4]*x5;
555:       sum3 += v3[0]*x1 + v3[1]*x2 + v3[2]*x3 + v3[3]*x4 + v3[4]*x5;
556:       sum4 += v4[0]*x1 + v4[1]*x2 + v4[2]*x3 + v4[3]*x4 + v4[4]*x5;
557:       sum5 += v5[0]*x1 + v5[1]*x2 + v5[2]*x3 + v5[3]*x4 + v5[4]*x5;
558:       v1   += 5; v2 += 5; v3 += 5; v4 += 5; v5 += 5;
559:     }
560:     if (usecprow) z = zarray + 5*ridx[i];
561:     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
562:     if (!usecprow) z += 5;
563:   }
564:   VecRestoreArray(xx,(PetscScalar**)&x);
565:   VecRestoreArray(zz,&zarray);
566:   PetscLogFlops(50.0*a->nz - 5.0*nonzerorow);
567:   return(0);
568: }
569: /*=========================================================*/

571: /*=========================================================*/
574: PetscErrorCode MatMultTranspose_SeqBSTRM_4(Mat A,Vec xx,Vec zz)
575: {
576:   Mat_SeqBAIJ    *a      = (Mat_SeqBAIJ*)A->data;
577:   Mat_SeqBSTRM   *sbstrm = (Mat_SeqBSTRM*)A->spptr;
578:   PetscScalar    zero    = 0.0;
579:   PetscScalar    x1,x2,x3,x4;
580:   PetscScalar    *x, *xb, *z;
581:   MatScalar      *v1, *v2, *v3, *v4;
583:   PetscInt       mbs       =a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j;
584:   PetscInt       nonzerorow=0;
585:   PetscInt       slen;

588:   VecSet(zz,zero);
589:   VecGetArray(xx,&x);
590:   VecGetArray(zz,&z);

592:   slen = 4*(ai[mbs]-ai[0]);
593:   v1   = sbstrm->as;
594:   v2   = v1 + slen;
595:   v3   = v2 + slen;
596:   v4   = v3 + slen;
597:   xb   = x;

599:   for (i=0; i<mbs; i++) {
600:     n           = ai[i+1] - ai[i];
601:     x1          = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];  xb += 4;
602:     nonzerorow += (n>0);
603:     ib          = aj + ai[i];

605:     for (j=0; j<n; j++) {
606:       cval       = ib[j]*4;
607:       z[cval]   += v1[0]*x1 + v2[0]*x2 + v3[0]*x3 + v4[0]*x4;
608:       z[cval+1] += v1[1]*x1 + v2[1]*x2 + v3[1]*x3 + v4[1]*x4;
609:       z[cval+2] += v1[2]*x1 + v2[2]*x2 + v3[2]*x3 + v4[2]*x4;
610:       z[cval+3] += v1[3]*x1 + v2[3]*x2 + v3[3]*x3 + v4[3]*x4;
611:       v1        += 4; v2 += 4; v3 += 4; v4 += 4;
612:     }

614:   }
615:   VecRestoreArray(xx,&x);
616:   VecRestoreArray(zz,&z);
617:   PetscLogFlops(32*a->nz - 4*nonzerorow);
618:   return(0);
619: }
620: /*=========================================================*/
623: PetscErrorCode MatMultTranspose_SeqBSTRM_5(Mat A,Vec xx,Vec zz)
624: {
625:   Mat_SeqBAIJ     *a      = (Mat_SeqBAIJ*)A->data;
626:   Mat_SeqBSTRM    *sbstrm = (Mat_SeqBSTRM*)A->spptr;
627:   PetscScalar     zero    = 0.0;
628:   PetscScalar     *z      = 0;
629:   PetscScalar     *x,*xb;
630:   const MatScalar *v1, *v2, *v3, *v4, *v5;
631:   PetscScalar     x1, x2, x3, x4, x5;
632:   PetscErrorCode  ierr;
633:   PetscInt        mbs       =a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j;
634:   PetscInt        nonzerorow=0;
635:   PetscInt        slen;

638:   VecSet(zz,zero);
639:   VecGetArray(xx,&x);
640:   VecGetArray(zz,&z);

642:   slen = 5*(ai[mbs]-ai[0]);

644:   v1 = sbstrm->as;
645:   v2 = v1 + slen;
646:   v3 = v2 + slen;
647:   v4 = v3 + slen;
648:   v5 = v4 + slen;

650:   xb = x;

652:   for (i=0; i<mbs; i++) {
653:     n           = ai[i+1] - ai[i];
654:     nonzerorow += (n>0);

656:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; xb += 5;
657:     ib = aj + ai[i];

659:     PetscPrefetchBlock(ib+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
660:     PetscPrefetchBlock(v1+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
661:     PetscPrefetchBlock(v2+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
662:     PetscPrefetchBlock(v3+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
663:     PetscPrefetchBlock(v4+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
664:     PetscPrefetchBlock(v5+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */


667:     for (j=0; j<n; j++) {
668:       cval       = ib[j]*5;
669:       z[cval]   += v1[0]*x1 + v2[0]*x2 + v3[0]*x3 + v4[0]*x4 + v5[0]*x5;
670:       z[cval+1] += v1[1]*x1 + v2[1]*x2 + v3[1]*x3 + v4[1]*x4 + v5[1]*x5;
671:       z[cval+2] += v1[2]*x1 + v2[2]*x2 + v3[2]*x3 + v4[2]*x4 + v5[2]*x5;
672:       z[cval+3] += v1[3]*x1 + v2[3]*x2 + v3[3]*x3 + v4[3]*x4 + v5[3]*x5;
673:       z[cval+4] += v1[4]*x1 + v2[4]*x2 + v3[4]*x3 + v4[4]*x4 + v5[4]*x5;
674:       v1        += 5; v2 += 5; v3 += 5; v4 += 5; v5 += 5;
675:     }
676:   }
677:   VecRestoreArray(xx,&x);
678:   VecRestoreArray(zz,&z);
679:   PetscLogFlops(50.0*a->nz - 5.0*nonzerorow);
680:   return(0);
681: }
682: /*=========================================================*/
685: PetscErrorCode MatMultAdd_SeqBSTRM_4(Mat A,Vec xx,Vec yy,Vec zz)
686: {
687:   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
688:   Mat_SeqBSTRM   *bstrm = (Mat_SeqBSTRM*)A->spptr;
689:   PetscScalar    *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*yarray,*zarray;
690:   MatScalar      *v1, *v2, *v3, *v4;
692:   PetscInt       mbs     =a->mbs,i,*idx,*ii,j,n,*ridx=NULL;
693:   PetscBool      usecprow=a->compressedrow.use;
694:   PetscInt       slen;

697:   VecGetArray(xx,&x);
698:   VecGetArray(yy,&yarray);
699:   if (zz != yy) {
700:     VecGetArray(zz,&zarray);
701:   } else {
702:     zarray = yarray;
703:   }

705:   idx = a->j;
706:   if (usecprow) {
707:     if (zz != yy) {
708:       PetscMemcpy(zarray,yarray,4*mbs*sizeof(PetscScalar));
709:     }
710:     mbs  = a->compressedrow.nrows;
711:     ii   = a->compressedrow.i;
712:     ridx = a->compressedrow.rindex;
713:   } else {
714:     ii = a->i;
715:     y  = yarray;
716:     z  = zarray;
717:   }

719:   slen = 4*(ii[mbs]-ii[0]);
720:   v1   = bstrm->as;
721:   v2   = v1 + slen;
722:   v3   = v2 + slen;
723:   v4   = v3 + slen;

725:   for (i=0; i<mbs; i++) {
726:     n = ii[1] - ii[0]; ii++;
727:     if (usecprow) {
728:       z = zarray + 4*ridx[i];
729:       y = yarray + 4*ridx[i];
730:     }
731:     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
732:     for (j=0; j<n; j++) {
733:       xb    = x + 4*(*idx++);
734:       x1    = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
735:       sum1 += v1[0]*x1 + v1[1]*x2 + v1[2]*x3  + v1[3]*x4;
736:       sum2 += v2[0]*x1 + v2[1]*x2 + v2[2]*x3  + v2[3]*x4;
737:       sum3 += v3[0]*x1 + v3[1]*x2 + v3[2]*x3  + v3[3]*x4;
738:       sum4 += v4[0]*x1 + v4[1]*x2 + v4[2]*x3  + v4[3]*x4;
739:       v1   += 4; v2 += 4; v3 += 4; v4 += 4;
740:     }
741:     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
742:     if (!usecprow) {
743:       z += 4; y += 4;
744:     }
745:   }
746:   VecRestoreArray(xx,&x);
747:   VecRestoreArray(yy,&yarray);
748:   if (zz != yy) {
749:     VecRestoreArray(zz,&zarray);
750:   }
751:   PetscLogFlops(32.0*a->nz);
752:   return(0);
753: }

755: /*=========================================================*/
758: PetscErrorCode MatMultAdd_SeqBSTRM_5(Mat A,Vec xx,Vec yy,Vec zz)
759: {
760:   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
761:   Mat_SeqBSTRM   *bstrm = (Mat_SeqBSTRM*)A->spptr;
762:   PetscScalar    *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5;
763:   PetscScalar    *yarray,*zarray;
764:   MatScalar      *v1,*v2,*v3,*v4,*v5;
766:   PetscInt       mbs     =a->mbs,i,*idx,*ii,j,n,*ridx=NULL;
767:   PetscBool      usecprow=a->compressedrow.use;
768:   PetscInt       slen;

771:   VecGetArray(xx,&x);
772:   VecGetArray(yy,&yarray);
773:   if (zz != yy) {
774:     VecGetArray(zz,&zarray);
775:   } else {
776:     zarray = yarray;
777:   }


780:   idx = a->j;
781:   if (usecprow) {
782:     if (zz != yy) {
783:       PetscMemcpy(zarray,yarray,5*mbs*sizeof(PetscScalar));
784:     }
785:     mbs  = a->compressedrow.nrows;
786:     ii   = a->compressedrow.i;
787:     ridx = a->compressedrow.rindex;
788:   } else {
789:     ii = a->i;
790:     y  = yarray;
791:     z  = zarray;
792:   }

794:   slen = 5*(ii[mbs]-ii[0]);
795:   v1   = bstrm->as;
796:   v2   = v1 + slen;
797:   v3   = v2 + slen;
798:   v4   = v3 + slen;
799:   v5   = v4 + slen;


802:   for (i=0; i<mbs; i++) {
803:     n = ii[1] - ii[0]; ii++;
804:     if (usecprow) {
805:       z = zarray + 5*ridx[i];
806:       y = yarray + 5*ridx[i];
807:     }
808:     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
809:     PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
810:     PetscPrefetchBlock(v1+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
811:     PetscPrefetchBlock(v2+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
812:     PetscPrefetchBlock(v3+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
813:     PetscPrefetchBlock(v4+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
814:     PetscPrefetchBlock(v5+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */


817:     for (j=0; j<n; j++) {
818:       xb    = x + 5*(*idx++);
819:       x1    = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
820:       sum1 += v1[0]*x1 + v1[1]*x2 + v1[2]*x3 + v1[3]*x4 + v1[4]*x5;
821:       sum2 += v2[0]*x1 + v2[1]*x2 + v2[2]*x3 + v2[3]*x4 + v2[4]*x5;
822:       sum3 += v3[0]*x1 + v3[1]*x2 + v3[2]*x3 + v3[3]*x4 + v3[4]*x5;
823:       sum4 += v4[0]*x1 + v4[1]*x2 + v4[2]*x3 + v4[3]*x4 + v4[4]*x5;
824:       sum5 += v5[0]*x1 + v5[1]*x2 + v5[2]*x3 + v5[3]*x4 + v5[4]*x5;
825:       v1   += 5; v2 += 5; v3 += 5; v4 += 5; v5 += 5;
826:     }
827:     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
828:     if (!usecprow) {
829:       z += 5; y += 5;
830:     }
831:   }
832:   VecRestoreArray(xx,&x);
833:   VecRestoreArray(yy,&yarray);
834:   if (zz != yy) {
835:     VecRestoreArray(zz,&zarray);
836:   }
837:   PetscLogFlops(50.0*a->nz);
838:   return(0);
839: }

841: /*=========================================================*/
844: PetscErrorCode MatSeqBSTRM_create_bstrm(Mat A)
845: {
846:   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*) A->data;
847:   Mat_SeqBSTRM   *bstrm = (Mat_SeqBSTRM*) A->spptr;
848:   PetscInt       MROW   = a->mbs, bs = A->rmap->bs;
849:   PetscInt       *ai    = a->i;
850:   PetscInt       i,j,k;
851:   MatScalar      *aa = a->a;
853:   PetscInt       bs2, rbs,  cbs, blen, slen;
854:   PetscScalar    **asp;

857:   bstrm->rbs = bstrm->cbs = bs;

859:   rbs  = cbs = bs;
860:   bs2  = rbs*cbs;
861:   blen = ai[MROW]-ai[0];
862:   slen = blen*cbs;

864:   PetscMalloc1(bs2*blen, &bstrm->as);

866:   PetscMalloc1(rbs, &asp);

868:   for (i=0; i<rbs; i++) asp[i] = bstrm->as + i*slen;

870:   for (k=0; k<blen; k++) {
871:     for (j=0; j<cbs; j++) {
872:       for (i=0; i<rbs; i++) asp[i][k*cbs+j] = aa[k*bs2+j*rbs+i];
873:     }
874:   }

876:   PetscFree(asp);

878:   switch (bs) {
879:   case 4:
880:     A->ops->mult          = MatMult_SeqBSTRM_4;
881:     A->ops->multadd       = MatMultAdd_SeqBSTRM_4;
882:     A->ops->multtranspose = MatMultTranspose_SeqBSTRM_4;
883:     A->ops->sor           = MatSOR_SeqBSTRM_4;
884:     break;
885:   case 5:
886:     A->ops->mult          = MatMult_SeqBSTRM_5;
887:     A->ops->multadd       = MatMultAdd_SeqBSTRM_5;
888:     A->ops->multtranspose = MatMultTranspose_SeqBSTRM_5;
889:     A->ops->sor           = MatSOR_SeqBSTRM_5;
890:     break;
891:   default:
892:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"not supported for block size %D",bs);
893:   }
894:   return(0);
895: }
896: /*=========================================================*/
897: /*=========================================================*/

901: PetscErrorCode MatSeqBSTRMTransform(Mat A)
902: {
904:   MatSeqBSTRM_convert_bstrm(A);
905:   return(0);
906: }