Actual source code: sbstream.c
petsc-3.7.3 2016-08-01
1: #define PETSCMAT_DLL
3: #include <../src/mat/impls/sbaij/seq/sbaij.h>
4: #include <../src/mat/impls/sbaij/seq/sbstream/sbstream.h>
6: extern PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat,MatAssemblyType);
11: PetscErrorCode MatDestroy_SeqSBSTRM(Mat A)
12: {
14: Mat_SeqSBSTRM *sbstrm = (Mat_SeqSBSTRM*) A->spptr;
16: if (sbstrm) {
17: PetscFree3(sbstrm->as, sbstrm->asi, sbstrm->asj);
18: }
19: PetscObjectChangeTypeName((PetscObject)A, MATSEQSBAIJ);
20: MatDestroy_SeqSBAIJ(A);
21: return(0);
22: }
23: /*=========================================================*/
24: PetscErrorCode MatDuplicate_SeqSBSTRM(Mat A, MatDuplicateOption op, Mat *M)
25: {
27: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot duplicate STRM matrices yet");
28: return(0);
29: }
30: /*=========================================================*/
34: PetscErrorCode SeqSBSTRM_convert_sbstrm(Mat A)
35: {
36: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*) A->data;
37: Mat_SeqSBSTRM *sbstrm = (Mat_SeqSBSTRM*) A->spptr;
38: PetscInt m = a->mbs, bs = A->rmap->bs;
39: PetscInt *ai = a->i;
40: PetscInt i,j,ib,jb;
41: MatScalar *aa = a->a;
43: PetscInt bs2, rbs, cbs, blen, slen;
44: PetscScalar **asp;
47: sbstrm->rbs = bs;
48: sbstrm->cbs = bs;
51: rbs = cbs = bs;
52: bs2 = rbs*cbs;
53: blen = ai[m]-ai[0];
54: slen = blen*cbs;
56: PetscFree(sbstrm->as);
57: PetscMalloc1(bs2*blen, &sbstrm->as);
59: PetscMalloc1(rbs, &asp);
61: for (i=0; i<rbs; i++) asp[i] = sbstrm->as + i*slen;
63: for (j=0;j<blen;j++) {
64: for (jb=0; jb<cbs; jb++) {
65: for (ib=0; ib<rbs; ib++) asp[ib][j*cbs+jb] = aa[j*bs2+jb*rbs+ib];
66: }
67: }
69: PetscFree(asp);
70: /*
71: switch (bs) {
72: case 4:
73: A->ops->solve = MatSolve_SeqSBSTRM_4;
74: break;
75: case 5:
76: A->ops->solve = MatSolve_SeqSBSTRM_5;
77: break;
78: default:
79: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"not supported for block size %D",bs);
80: }
81: */
82: return(0);
83: }
84: /*=========================================================*/
85: extern PetscErrorCode SeqSBSTRM_create_sbstrm(Mat);
86: /*=========================================================*/
89: PetscErrorCode MatAssemblyEnd_SeqSBSTRM(Mat A, MatAssemblyType mode)
90: {
94: MatAssemblyEnd_SeqSBAIJ(A, mode);
95: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
97: SeqSBSTRM_create_sbstrm(A);
98: return(0);
99: }
100: /*=========================================================*/
103: PETSC_INTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqSBSTRM(Mat A,MatType type,MatReuse reuse,Mat *newmat)
104: {
107: Mat B = *newmat;
108: Mat_SeqSBSTRM *sbstrm;
109: /* PetscInt bs = A->rmap->bs; */
112: if (reuse == MAT_INITIAL_MATRIX) {
113: MatDuplicate(A,MAT_COPY_VALUES,&B);
114: }
117: PetscNewLog(B,&sbstrm);
118: B->spptr = (void*) sbstrm;
120: /* Set function pointers for methods that we inherit from BAIJ but override. */
121: B->ops->duplicate = MatDuplicate_SeqSBSTRM;
122: B->ops->assemblyend = MatAssemblyEnd_SeqSBSTRM;
123: B->ops->destroy = MatDestroy_SeqSBSTRM;
124: /*B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_sbstrm;
125: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_sbstrm; */
127: /* If A has already been assembled, compute the permutation. */
128: if (A->assembled) {
129: SeqSBSTRM_create_sbstrm(B);
130: }
131: PetscObjectChangeTypeName((PetscObject)B,MATSEQSBSTRM);
132: *newmat = B;
133: return(0);
134: }
136: /*=========================================================*/
139: PetscErrorCode MatCreateSeqSBSTRM(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
140: {
144: MatCreate(comm,A);
145: MatSetSizes(*A,m,n,m,n);
146: MatSetType(*A,MATSEQSBSTRM);
147: MatSeqSBAIJSetPreallocation_SeqSBAIJ(*A,bs,nz,(PetscInt*)nnz);
148: (*A)->rmap->bs = bs;
149: return(0);
150: }
151: /*=========================================================*/
155: PETSC_EXTERN PetscErrorCode MatCreate_SeqSBSTRM(Mat A)
156: {
160: MatSetType(A,MATSEQSBAIJ);
161: MatConvert_SeqSBAIJ_SeqSBSTRM(A,MATSEQSBSTRM,MAT_INPLACE_MATRIX,&A);
163: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqisbaij_seqsbstrm_C",MatConvert_SeqSBAIJ_SeqSBSTRM);
164: return(0);
165: }
167: /*=========================================================*/
168: /*=========================================================*/
171: PetscErrorCode MatMult_SeqSBSTRM_4(Mat A,Vec xx,Vec zz)
172: {
173: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
174: Mat_SeqSBSTRM *sbstrm = (Mat_SeqSBSTRM*)A->spptr;
175: PetscScalar zero = 0.0;
176: PetscScalar sum1,sum2,sum3,sum4,x1,x2,x3,x4, xr1,xr2,xr3,xr4;
177: PetscScalar *x, *xb, *z;
178: MatScalar *v1, *v2, *v3, *v4;
180: PetscInt mbs =a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
181: PetscInt nonzerorow=0;
182: PetscInt slen;
185: VecSet(zz,zero);
186: VecGetArray(xx,&x);
187: VecGetArray(zz,&z);
189: slen = 4*(ai[mbs]-ai[0]);
190: v1 = sbstrm->as;
191: v2 = v1 + slen;
192: v3 = v2 + slen;
193: v4 = v3 + slen;
195: xb = x;
197: for (i=0; i<mbs; i++) {
198: n = ai[i+1] - ai[i];
199: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
200: sum1 = z[4*i]; sum2 = z[4*i+1]; sum3 = z[4*i+2]; sum4 = z[4*i+3];
201: nonzerorow += (n>0);
202: jmin = 0;
203: ib = aj + ai[i];
204: if (*ib == i) { /* (diag of A)*x */
205: sum1 += v1[0]*x1 + v1[1]*x2 + v1[2]*x3 + v1[3]*x4;
206: sum2 += v1[1]*x1 + v2[1]*x2 + v2[2]*x3 + v2[3]*x4;
207: sum3 += v1[2]*x1 + v2[2]*x2 + v3[2]*x3 + v3[3]*x4;
208: sum4 += v1[3]*x1 + v2[3]*x2 + v3[3]*x3 + v4[3]*x4;
209: v1 += 4; v2 += 4; v3 += 4; v4 += 4;
210: jmin++;
211: }
213: for (j=jmin; j<n; j++) {
214: cval = ib[j]*4;
215: z[cval] += v1[0]*x1 + v2[0]*x2 + v3[0]*x3 + v4[0]*x4;
216: z[cval+1] += v1[1]*x1 + v2[1]*x2 + v3[1]*x3 + v4[1]*x4;
217: z[cval+2] += v1[2]*x1 + v2[2]*x2 + v3[2]*x3 + v4[2]*x4;
218: z[cval+3] += v1[3]*x1 + v2[3]*x2 + v3[3]*x3 + v4[3]*x4;
220: xr1 = x[cval]; xr2 = x[cval+1]; xr3 = x[cval+2]; xr4 = x[cval+3];
221: sum1 += v1[0]*xr1 + v1[1]*xr2 + v1[2]*xr3 + v1[3]*xr4;
222: sum2 += v2[0]*xr1 + v2[1]*xr2 + v2[2]*xr3 + v2[3]*xr4;
223: sum3 += v3[0]*xr1 + v3[1]*xr2 + v3[2]*xr3 + v3[3]*xr4;
224: sum4 += v4[0]*xr1 + v4[1]*xr2 + v4[2]*xr3 + v4[3]*xr4;
225: v1 += 4; v2 += 4; v3 += 4; v4 += 4;
226: }
227: z[4*i] = sum1;
228: z[4*i+1] = sum2;
229: z[4*i+2] = sum3;
230: z[4*i+3] = sum4;
231: xb +=4;
233: }
234: VecRestoreArray(xx,&x);
235: VecRestoreArray(zz,&z);
236: PetscLogFlops(32.0*(a->nz*2.0 - nonzerorow) - nonzerorow);
237: return(0);
238: }
239: /*=========================================================*/
242: PetscErrorCode MatMult_SeqSBSTRM_5(Mat A,Vec xx,Vec zz)
243: {
244: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
245: Mat_SeqSBSTRM *sbstrm = (Mat_SeqSBSTRM*)A->spptr;
246: PetscScalar zero = 0.0;
247: PetscScalar *z = 0;
248: PetscScalar *x,*xb;
249: const MatScalar *v1, *v2, *v3, *v4, *v5;
250: PetscScalar x1, x2, x3, x4, x5;
251: PetscScalar xr1, xr2, xr3, xr4, xr5;
252: PetscScalar sum1, sum2, sum3, sum4, sum5;
253: PetscErrorCode ierr;
254: PetscInt mbs =a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
255: PetscInt nonzerorow=0;
256: PetscInt slen;
259: VecSet(zz,zero);
260: VecGetArray(xx,&x);
261: VecGetArray(zz,&z);
263: slen = 5*(ai[mbs]-ai[0]);
265: v1 = sbstrm->as;
266: v2 = v1 + slen;
267: v3 = v2 + slen;
268: v4 = v3 + slen;
269: v5 = v4 + slen;
271: xb = x;
273: for (i=0; i<mbs; i++) {
274: n = ai[i+1] - ai[i];
276: nonzerorow += (n>0);
278: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
280: sum1 = z[5*i];
281: sum2 = z[5*i+1];
282: sum3 = z[5*i+2];
283: sum4 = z[5*i+3];
284: sum5 = z[5*i+4];
285: jmin = 0;
286: ib = aj + ai[i];
287: if (*ib == i) { /* (diag of A)*x */
288: sum1 += v1[0]*x1 + v1[1]*x2 + v1[2]*x3 + v1[3]*x4 + v1[4]*x5;
289: sum2 += v1[1]*x1 + v2[1]*x2 + v2[2]*x3 + v2[3]*x4 + v2[4]*x5;
290: sum3 += v1[2]*x1 + v2[2]*x2 + v3[2]*x3 + v3[3]*x4 + v3[4]*x5;
291: sum4 += v1[3]*x1 + v2[3]*x2 + v3[3]*x3 + v4[3]*x4 + v4[4]*x5;
292: sum5 += v1[4]*x1 + v2[4]*x2 + v3[4]*x3 + v4[4]*x4 + v5[4]*x5;
294: v1 += 5; v2 += 5; v3 += 5; v4 += 5; v5 += 5;
295: jmin++;
296: }
298: PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
299: PetscPrefetchBlock(v1+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
300: PetscPrefetchBlock(v2+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
301: PetscPrefetchBlock(v3+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
302: PetscPrefetchBlock(v4+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
303: PetscPrefetchBlock(v5+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
305: for (j=jmin; j<n; j++) {
306: cval = ib[j]*5;
307: z[cval] += v1[0]*x1 + v2[0]*x2 + v3[0]*x3 + v4[0]*x4 + v5[0]*x5;
308: z[cval+1] += v1[1]*x1 + v2[1]*x2 + v3[1]*x3 + v4[1]*x4 + v5[1]*x5;
309: z[cval+2] += v1[2]*x1 + v2[2]*x2 + v3[2]*x3 + v4[2]*x4 + v5[2]*x5;
310: z[cval+3] += v1[3]*x1 + v2[3]*x2 + v3[3]*x3 + v4[3]*x4 + v5[3]*x5;
311: z[cval+4] += v1[4]*x1 + v2[4]*x2 + v3[4]*x3 + v4[4]*x4 + v5[4]*x5;
313: xr1 = x[cval]; xr2 = x[cval+1]; xr3 = x[cval+2]; xr4 = x[cval+3]; xr5 = x[cval+4];
315: sum1 += v1[0]*xr1 + v1[1]*xr2 + v1[2]*xr3 + v1[3]*xr4 + v1[4]*xr5;
316: sum2 += v2[0]*xr1 + v2[1]*xr2 + v2[2]*xr3 + v2[3]*xr4 + v2[4]*xr5;
317: sum3 += v3[0]*xr1 + v3[1]*xr2 + v3[2]*xr3 + v3[3]*xr4 + v3[4]*xr5;
318: sum4 += v4[0]*xr1 + v4[1]*xr2 + v4[2]*xr3 + v4[3]*xr4 + v4[4]*xr5;
319: sum5 += v5[0]*xr1 + v5[1]*xr2 + v5[2]*xr3 + v5[3]*xr4 + v5[4]*xr5;
321: v1 += 5; v2 += 5; v3 += 5; v4 += 5; v5 += 5;
322: }
323: z[5*i] = sum1; z[5*i+1] = sum2; z[5*i+2] = sum3; z[5*i+3] = sum4; z[5*i+4] = sum5;
324: xb += 5;
325: }
326: VecRestoreArray(xx,&x);
327: VecRestoreArray(zz,&z);
328: PetscLogFlops(50.0*(a->nz*2.0 - nonzerorow) - nonzerorow);
329: return(0);
330: }
331: /*=========================================================*/
334: PetscErrorCode MatMultAdd_SeqSBSTRM_4(Mat A,Vec xx,Vec yy,Vec zz)
335: {
337: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
338: Mat_SeqSBSTRM *sbstrm = (Mat_SeqSBSTRM*)A->spptr;
339: PetscScalar sum1,sum2,sum3,sum4,x1,x2,x3,x4, xr1,xr2,xr3,xr4;
340: PetscScalar *x,*z, *xb;
341: MatScalar *v1, *v2, *v3, *v4;
343: PetscInt mbs =a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
344: PetscInt nonzerorow=0;
345: PetscInt slen;
348: VecCopy(yy,zz);
349: VecGetArray(xx,&x);
350: VecGetArray(zz,&z);
352: slen = 4*(ai[mbs]-ai[0]);
353: v1 = sbstrm->as;
354: v2 = v1 + slen;
355: v3 = v2 + slen;
356: v4 = v3 + slen;
358: xb = x;
360: for (i=0; i<mbs; i++) {
361: n = ai[i+1] - ai[i];
362: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; xb += 4;
364: sum1 = z[4*i];
365: sum2 = z[4*i + 1];
366: sum3 = z[4*i + 2];
367: sum4 = z[4*i + 3];
369: nonzerorow += (n>0);
371: jmin = 0;
372: ib = aj + ai[i];
373: if (*ib == i) { /* (diag of A)*x */
374: sum1 += v1[0]*x1 + v1[1]*x2 + v1[2]*x3 + v1[3]*x4;
375: sum2 += v1[1]*x1 + v2[1]*x2 + v2[2]*x3 + v2[3]*x4;
376: sum3 += v1[2]*x1 + v2[2]*x2 + v3[2]*x3 + v3[3]*x4;
377: sum4 += v1[3]*x1 + v2[3]*x2 + v3[3]*x3 + v4[3]*x4;
379: v1 += 4; v2 += 4; v3 += 4; v4 += 4;
380: jmin++;
381: }
383: for (j=jmin; j<n; j++) {
384: cval = ib[j]*4;
385: z[cval] += v1[0]*x1 + v2[0]*x2 + v3[0]*x3 + v4[0]*x4;
386: z[cval+1] += v1[1]*x1 + v2[1]*x2 + v3[1]*x3 + v4[1]*x4;
387: z[cval+2] += v1[2]*x1 + v2[2]*x2 + v3[2]*x3 + v4[2]*x4;
388: z[cval+3] += v1[3]*x1 + v2[3]*x2 + v3[3]*x3 + v4[3]*x4;
390: xr1 = x[cval]; xr2 = x[cval+1]; xr3 = x[cval+2]; xr4 = x[cval+3];
392: sum1 += v1[0]*xr1 + v1[1]*xr2 + v1[2]*xr3 + v1[3]*xr4;
393: sum2 += v2[0]*xr1 + v2[1]*xr2 + v2[2]*xr3 + v2[3]*xr4;
394: sum3 += v3[0]*xr1 + v3[1]*xr2 + v3[2]*xr3 + v3[3]*xr4;
395: sum4 += v4[0]*xr1 + v4[1]*xr2 + v4[2]*xr3 + v4[3]*xr4;
397: v1 += 4; v2 += 4; v3 += 4; v4 += 4;
398: }
399: z[4*i] = sum1;
400: z[4*i+1] = sum2;
401: z[4*i+2] = sum3;
402: z[4*i+3] = sum4;
404: }
405: VecRestoreArray(xx,&x);
406: VecRestoreArray(zz,&z);
407: PetscLogFlops(32.0*(a->nz*2.0 - nonzerorow));
408: return(0);
409: }
411: /*=========================================================*/
414: PetscErrorCode MatMultAdd_SeqSBSTRM_5(Mat A,Vec xx,Vec yy,Vec zz)
415: {
416: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
417: Mat_SeqSBSTRM *sbstrm = (Mat_SeqSBSTRM*)A->spptr;
418: PetscScalar *x,*xb, *z;
419: MatScalar *v1, *v2, *v3, *v4, *v5;
420: PetscScalar x1, x2, x3, x4, x5;
421: PetscScalar xr1, xr2, xr3, xr4, xr5;
422: PetscScalar sum1, sum2, sum3, sum4, sum5;
424: PetscInt mbs =a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
425: PetscInt nonzerorow=0;
426: PetscInt slen;
429: VecCopy(yy,zz);
430: VecGetArray(xx,&x);
431: VecGetArray(zz,&z);
434: slen = 5*(ai[mbs]-ai[0]);
435: v1 = sbstrm->as;
436: v2 = v1 + slen;
437: v3 = v2 + slen;
438: v4 = v3 + slen;
439: v5 = v4 + slen;
441: xb = x;
443: for (i=0; i<mbs; i++) {
444: n = ai[i+1] - ai[i];
445: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; xb += 5;
447: sum1 = z[5*i];
448: sum2 = z[5*i+1];
449: sum3 = z[5*i+2];
450: sum4 = z[5*i+3];
451: sum5 = z[5*i+4];
453: nonzerorow += (n>0);
455: jmin = 0;
456: ib = aj + ai[i];
457: if (*ib == i) { /* (diag of A)*x, only upper triangular part is used */
458: sum1 += v1[0]*x1 + v1[1]*x2 + v1[2]*x3 + v1[3]*x4 + v1[4]*x5;
459: sum2 += v1[1]*x1 + v2[1]*x2 + v2[2]*x3 + v2[3]*x4 + v2[4]*x5;
460: sum3 += v1[2]*x1 + v2[2]*x2 + v3[2]*x3 + v3[3]*x4 + v3[4]*x5;
461: sum4 += v1[3]*x1 + v2[3]*x2 + v3[3]*x3 + v4[3]*x4 + v4[4]*x5;
462: sum5 += v1[4]*x1 + v2[4]*x2 + v3[4]*x3 + v4[4]*x4 + v5[4]*x5;
464: v1 += 5; v2 += 5; v3 += 5; v4 += 5; v5 += 5;
465: jmin++;
466: }
468: for (j=jmin; j<n; j++) {
469: cval = ib[j]*5;
470: z[cval] += v1[0]*x1 + v2[0]*x2 + v3[0]*x3 + v4[0]*x4 + v5[0]*x5;
471: z[cval+1] += v1[1]*x1 + v2[1]*x2 + v3[1]*x3 + v4[1]*x4 + v5[1]*x5;
472: z[cval+2] += v1[2]*x1 + v2[2]*x2 + v3[2]*x3 + v4[2]*x4 + v5[2]*x5;
473: z[cval+3] += v1[3]*x1 + v2[3]*x2 + v3[3]*x3 + v4[3]*x4 + v5[3]*x5;
474: z[cval+4] += v1[4]*x1 + v2[4]*x2 + v3[4]*x3 + v4[4]*x4 + v5[4]*x5;
476: xr1 = x[cval]; xr2 = x[cval+1]; xr3 = x[cval+2]; xr4 = x[cval+3]; xr5 = x[cval+4];
478: sum1 += v1[0]*xr1 + v1[1]*xr2 + v1[2]*xr3 + v1[3]*xr4 + v1[4]*xr5;
479: sum2 += v2[0]*xr1 + v2[1]*xr2 + v2[2]*xr3 + v2[3]*xr4 + v2[4]*xr5;
480: sum3 += v3[0]*xr1 + v3[1]*xr2 + v3[2]*xr3 + v3[3]*xr4 + v3[4]*xr5;
481: sum4 += v4[0]*xr1 + v4[1]*xr2 + v4[2]*xr3 + v4[3]*xr4 + v4[4]*xr5;
482: sum5 += v5[0]*xr1 + v5[1]*xr2 + v5[2]*xr3 + v5[3]*xr4 + v5[4]*xr5;
484: v1 += 5; v2 += 5; v3 += 5; v4 += 5; v5 += 5;
485: }
486: z[5*i] = sum1;
487: z[5*i+1] = sum2;
488: z[5*i+2] = sum3;
489: z[5*i+3] = sum4;
490: z[5*i+4] = sum5;
491: }
492: VecRestoreArray(xx,&x);
493: VecRestoreArray(zz,&z);
494: PetscLogFlops(50.0*(a->nz*2.0 - nonzerorow));
495: return(0);
496: }
498: /*=========================================================*/
501: PetscErrorCode SeqSBSTRM_create_sbstrm(Mat A)
502: {
503: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*) A->data;
504: Mat_SeqSBSTRM *sbstrm = (Mat_SeqSBSTRM*) A->spptr;
505: PetscInt MROW = a->mbs, bs = A->rmap->bs;
506: PetscInt *ai = a->i;
507: PetscInt i,j,k;
508: MatScalar *aa = a->a;
510: PetscInt bs2, rbs, cbs, blen, slen;
511: PetscScalar **asp;
514: sbstrm->rbs = sbstrm->cbs = bs;
516: rbs = cbs = bs;
517: bs2 = rbs*cbs;
518: blen = ai[MROW]-ai[0];
519: slen = blen*cbs;
521: PetscMalloc1(bs2*blen, &sbstrm->as);
523: PetscMalloc1(rbs, &asp);
525: for (i=0; i<rbs; i++) asp[i] = sbstrm->as + i*slen;
527: for (k=0; k<blen; k++) {
528: for (j=0; j<cbs; j++) {
529: for (i=0; i<rbs; i++) asp[i][k*cbs+j] = aa[k*bs2+j*rbs+i];
530: }
531: }
533: /* PetscFree(a->a); */
535: switch (bs) {
536: case 4:
537: A->ops->mult = MatMult_SeqSBSTRM_4;
538: A->ops->multadd = MatMultAdd_SeqSBSTRM_4;
539: /** A->ops->sor = MatSOR_SeqSBSTRM_4; **/
540: break;
541: case 5:
542: A->ops->mult = MatMult_SeqSBSTRM_5;
543: A->ops->multadd = MatMultAdd_SeqSBSTRM_5;
544: /** A->ops->sor = MatSOR_SeqSBSTRM_5; **/
545: break;
546: default:
547: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"not supported for block size %D",bs);
548: }
549: return(0);
550: }
551: /*=========================================================*/
555: PetscErrorCode MatSeqSBSTRMTransform(Mat A)
556: {
558: SeqSBSTRM_convert_sbstrm(A);
559: return(0);
560: }