Actual source code: sbstream.c
petsc-3.3-p7 2013-05-11
1: #define PETSCMAT_DLL
3: #include ../src/mat/impls/sbaij/seq/sbaij.h
4: #include ../src/mat/impls/sbaij/seq/sbstream/sbstream.h
6: #if 0
7: extern PetscErrorCode MatFactorGetSolverPackage_seqsbaij_sbstrm(Mat, const MatSolverPackage *);
8: extern PetscErrorCode MatICCFactorSymbolic_sbstrm(Mat,Mat, IS, const MatFactorInfo *);
9: extern PetscErrorCode MatCholeskyFactorSymbolic_sbstrm(Mat, Mat, IS, const MatFactorInfo *);
10: extern PetscErrorCode MatCholeskyFactorNumeric_sbstrm (Mat, Mat, const MatFactorInfo *);
11: #endif
13: extern PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat,MatAssemblyType);
18: PetscErrorCode MatDestroy_SeqSBSTRM(Mat A)
19: {
21: Mat_SeqSBSTRM *sbstrm = (Mat_SeqSBSTRM *) A->spptr;
23: if (sbstrm) {
24: PetscFree3(sbstrm->as, sbstrm->asi, sbstrm->asj);
25: }
26: PetscObjectChangeTypeName( (PetscObject)A, MATSEQSBAIJ);
27: MatDestroy_SeqSBAIJ(A);
28: return(0);
29: }
30: /*=========================================================*/
31: PetscErrorCode MatDuplicate_SeqSBSTRM(Mat A, MatDuplicateOption op, Mat *M)
32: {
34: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot duplicate STRM matrices yet");
35: return(0);
36: }
37: /*=========================================================*/
41: PetscErrorCode SeqSBSTRM_convert_sbstrm(Mat A)
42: {
43: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *) A->data;
44: Mat_SeqSBSTRM *sbstrm = (Mat_SeqSBSTRM*) A->spptr;
45: PetscInt m = a->mbs, bs = A->rmap->bs;
46: PetscInt *ai = a->i;
47: PetscInt i,j,ib,jb;
48: MatScalar *aa = a->a;
50: PetscInt bs2, rbs, cbs, blen, slen;
51: PetscScalar **asp;
54: sbstrm->rbs = bs;
55: sbstrm->cbs = bs;
58: rbs = cbs = bs;
59: bs2 = rbs*cbs;
60: blen = ai[m]-ai[0];
61: slen = blen*cbs;
63: PetscFree(sbstrm->as);
64: PetscMalloc(bs2*blen*sizeof(MatScalar), &sbstrm->as);
66: PetscMalloc(rbs*sizeof(MatScalar *), &asp);
67:
68: for(i=0;i<rbs;i++) asp[i] = sbstrm->as + i*slen;
70: for(j=0;j<blen;j++) {
71: for (jb=0; jb<cbs; jb++){
72: for (ib=0; ib<rbs; ib++){
73: asp[ib][j*cbs+jb] = aa[j*bs2+jb*rbs+ib];
74: }}
75: }
77: PetscFree(asp);
78: /*
79: switch (bs){
80: case 4:
81: A->ops->solve = MatSolve_SeqSBSTRM_4;
82: break;
83: case 5:
84: A->ops->solve = MatSolve_SeqSBSTRM_5;
85: break;
86: default:
87: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"not supported for block size %D",bs);
88: }
89: */
90: return(0);
91: }
92: /*=========================================================*/
93: extern PetscErrorCode SeqSBSTRM_create_sbstrm(Mat);
94: /*=========================================================*/
97: PetscErrorCode MatAssemblyEnd_SeqSBSTRM(Mat A, MatAssemblyType mode)
98: {
102: MatAssemblyEnd_SeqSBAIJ(A, mode);
103: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
105: SeqSBSTRM_create_sbstrm(A);
106: return(0);
107: }
108: /*=========================================================*/
109: EXTERN_C_BEGIN
112: PetscErrorCode MatConvert_SeqSBAIJ_SeqSBSTRM(Mat A,const MatType type,MatReuse reuse,Mat *newmat)
113: {
116: Mat B = *newmat;
117: Mat_SeqSBSTRM *sbstrm;
118: /* PetscInt bs = A->rmap->bs; */
121: if (reuse == MAT_INITIAL_MATRIX) {
122: MatDuplicate(A,MAT_COPY_VALUES,&B);
123: }
124:
126: PetscNewLog(B,Mat_SeqSBSTRM,&sbstrm);
127: B->spptr = (void *) sbstrm;
129: /* Set function pointers for methods that we inherit from BAIJ but override. */
130: B->ops->duplicate = MatDuplicate_SeqSBSTRM;
131: B->ops->assemblyend = MatAssemblyEnd_SeqSBSTRM;
132: B->ops->destroy = MatDestroy_SeqSBSTRM;
133: /*B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_sbstrm;
134: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_sbstrm; */
136: /* If A has already been assembled, compute the permutation. */
137: if (A->assembled) {
138: SeqSBSTRM_create_sbstrm(B);
139: }
140: PetscObjectChangeTypeName((PetscObject)B,MATSEQSBSTRM);
141: *newmat = B;
142: return(0);
143: }
144: EXTERN_C_END
146: /*=========================================================*/
149: PetscErrorCode MatCreateSeqSBSTRM(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
150: {
153: MatCreate(comm,A);
154: MatSetSizes(*A,m,n,m,n);
155: MatSetType(*A,MATSEQSBSTRM);
156: MatSeqSBAIJSetPreallocation_SeqSBAIJ(*A,bs,nz,(PetscInt*)nnz);
157: (*A)->rmap->bs = bs;
158: return(0);
159: }
160: /*=========================================================*/
161: EXTERN_C_BEGIN
164: PetscErrorCode MatCreate_SeqSBSTRM(Mat A)
165: {
169: MatSetType(A,MATSEQSBAIJ);
170: MatConvert_SeqSBAIJ_SeqSBSTRM(A,MATSEQSBSTRM,MAT_REUSE_MATRIX,&A);
172: PetscObjectComposeFunctionDynamic((PetscObject)A,"MatConvert_seqisbaij_seqsbstrm_C",
173: "MatConvert_SeqSBAIJ_SeqSBSTRM",
174: MatConvert_SeqSBAIJ_SeqSBSTRM);
175: return(0);
176: }
177: EXTERN_C_END
178: /*=========================================================*/
179: /*=========================================================*/
182: PetscErrorCode MatMult_SeqSBSTRM_4(Mat A,Vec xx,Vec zz)
183: {
184: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
185: Mat_SeqSBSTRM *sbstrm = (Mat_SeqSBSTRM *)A->spptr;
186: PetscScalar zero = 0.0;
187: PetscScalar sum1,sum2,sum3,sum4,x1,x2,x3,x4, xr1,xr2,xr3,xr4;
188: PetscScalar *x, *xb, *z;
189: MatScalar *v1, *v2, *v3, *v4;
190: PetscErrorCode ierr;
191: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
192: PetscInt nonzerorow=0;
193: PetscInt slen;
196: VecSet(zz,zero);
197: VecGetArray(xx,&x);
198: VecGetArray(zz,&z);
200: slen = 4*(ai[mbs]-ai[0]);
201: v1 = sbstrm->as;
202: v2 = v1 + slen;
203: v3 = v2 + slen;
204: v4 = v3 + slen;
206: xb = x;
208: for (i=0; i<mbs; i++) {
209: n = ai[i+1] - ai[i];
210: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
211: sum1 = z[4*i]; sum2 = z[4*i+1]; sum3 = z[4*i+2]; sum4 = z[4*i+3];
212: nonzerorow += (n>0);
213: jmin = 0;
214: ib = aj + ai[i];
215: if (*ib == i){ /* (diag of A)*x */
216: sum1 += v1[0]*x1 + v1[1]*x2 + v1[2]*x3 + v1[3]*x4;
217: sum2 += v1[1]*x1 + v2[1]*x2 + v2[2]*x3 + v2[3]*x4;
218: sum3 += v1[2]*x1 + v2[2]*x2 + v3[2]*x3 + v3[3]*x4;
219: sum4 += v1[3]*x1 + v2[3]*x2 + v3[3]*x3 + v4[3]*x4;
220: v1 += 4; v2 += 4; v3 += 4; v4 += 4;
221: jmin++;
222: }
223:
224: for (j=jmin; j<n; j++) {
225: cval = ib[j]*4;
226: z[cval] += v1[0]*x1 + v2[0]*x2 + v3[0]*x3 + v4[0]*x4;
227: z[cval+1] += v1[1]*x1 + v2[1]*x2 + v3[1]*x3 + v4[1]*x4;
228: z[cval+2] += v1[2]*x1 + v2[2]*x2 + v3[2]*x3 + v4[2]*x4;
229: z[cval+3] += v1[3]*x1 + v2[3]*x2 + v3[3]*x3 + v4[3]*x4;
231: xr1 = x[cval]; xr2 = x[cval+1]; xr3 = x[cval+2]; xr4 = x[cval+3];
232: sum1 += v1[0]*xr1 + v1[1]*xr2 + v1[2]*xr3 + v1[3]*xr4;
233: sum2 += v2[0]*xr1 + v2[1]*xr2 + v2[2]*xr3 + v2[3]*xr4;
234: sum3 += v3[0]*xr1 + v3[1]*xr2 + v3[2]*xr3 + v3[3]*xr4;
235: sum4 += v4[0]*xr1 + v4[1]*xr2 + v4[2]*xr3 + v4[3]*xr4;
236: v1 += 4; v2 += 4; v3 += 4; v4 += 4;
237: }
238: z[4*i] = sum1;
239: z[4*i+1] = sum2;
240: z[4*i+2] = sum3;
241: z[4*i+3] = sum4;
242: xb +=4;
243:
244: }
245: VecRestoreArray(xx,&x);
246: VecRestoreArray(zz,&z);
247: PetscLogFlops(32.0*(a->nz*2.0 - nonzerorow) - nonzerorow);
248: return(0);
249: }
250: /*=========================================================*/
253: PetscErrorCode MatMult_SeqSBSTRM_5(Mat A,Vec xx,Vec zz)
254: {
255: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
256: Mat_SeqSBSTRM *sbstrm = (Mat_SeqSBSTRM *)A->spptr;
257: PetscScalar zero = 0.0;
258: PetscScalar *z = 0;
259: PetscScalar *x,*xb;
260: const MatScalar *v1, *v2, *v3, *v4, *v5;
261: PetscScalar x1, x2, x3, x4, x5;
262: PetscScalar xr1, xr2, xr3, xr4, xr5;
263: PetscScalar sum1, sum2, sum3, sum4, sum5;
264: PetscErrorCode ierr;
265: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
266: PetscInt nonzerorow=0;
267: PetscInt slen;
271: VecSet(zz,zero);
272: VecGetArray(xx,&x);
273: VecGetArray(zz,&z);
275: slen = 5*(ai[mbs]-ai[0]);
277: v1 = sbstrm->as;
278: v2 = v1 + slen;
279: v3 = v2 + slen;
280: v4 = v3 + slen;
281: v5 = v4 + slen;
283: xb = x;
285: for (i=0; i<mbs; i++) {
286: n = ai[i+1] - ai[i];
287: nonzerorow += (n>0);
289: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
290: sum1 = z[5*i];
291: sum2 = z[5*i+1];
292: sum3 = z[5*i+2];
293: sum4 = z[5*i+3];
294: sum5 = z[5*i+4];
295: jmin = 0;
296: ib = aj + ai[i];
297: if (*ib == i){ /* (diag of A)*x */
298: sum1 += v1[0]*x1 + v1[1]*x2 + v1[2]*x3 + v1[3]*x4 + v1[4]*x5;
299: sum2 += v1[1]*x1 + v2[1]*x2 + v2[2]*x3 + v2[3]*x4 + v2[4]*x5;
300: sum3 += v1[2]*x1 + v2[2]*x2 + v3[2]*x3 + v3[3]*x4 + v3[4]*x5;
301: sum4 += v1[3]*x1 + v2[3]*x2 + v3[3]*x3 + v4[3]*x4 + v4[4]*x5;
302: sum5 += v1[4]*x1 + v2[4]*x2 + v3[4]*x3 + v4[4]*x4 + v5[4]*x5;
303: v1 += 5; v2 += 5; v3 += 5; v4 += 5; v5 += 5;
304: jmin++;
305: }
306:
307: PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
308: PetscPrefetchBlock(v1+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
309: PetscPrefetchBlock(v2+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
310: PetscPrefetchBlock(v3+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
311: PetscPrefetchBlock(v4+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
312: PetscPrefetchBlock(v5+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
314: for (j=jmin; j<n; j++) {
315: cval = ib[j]*5;
316: z[cval] += v1[0]*x1 + v2[0]*x2 + v3[0]*x3 + v4[0]*x4 + v5[0]*x5;
317: z[cval+1] += v1[1]*x1 + v2[1]*x2 + v3[1]*x3 + v4[1]*x4 + v5[1]*x5;
318: z[cval+2] += v1[2]*x1 + v2[2]*x2 + v3[2]*x3 + v4[2]*x4 + v5[2]*x5;
319: z[cval+3] += v1[3]*x1 + v2[3]*x2 + v3[3]*x3 + v4[3]*x4 + v5[3]*x5;
320: z[cval+4] += v1[4]*x1 + v2[4]*x2 + v3[4]*x3 + v4[4]*x4 + v5[4]*x5;
322: xr1 = x[cval]; xr2 = x[cval+1]; xr3 = x[cval+2]; xr4 = x[cval+3]; xr5 = x[cval+4];
323: sum1 += v1[0]*xr1 + v1[1]*xr2 + v1[2]*xr3 + v1[3]*xr4 + v1[4]*xr5;
324: sum2 += v2[0]*xr1 + v2[1]*xr2 + v2[2]*xr3 + v2[3]*xr4 + v2[4]*xr5;
325: sum3 += v3[0]*xr1 + v3[1]*xr2 + v3[2]*xr3 + v3[3]*xr4 + v3[4]*xr5;
326: sum4 += v4[0]*xr1 + v4[1]*xr2 + v4[2]*xr3 + v4[3]*xr4 + v4[4]*xr5;
327: sum5 += v5[0]*xr1 + v5[1]*xr2 + v5[2]*xr3 + v5[3]*xr4 + v5[4]*xr5;
328: v1 += 5; v2 += 5; v3 += 5; v4 += 5; v5 += 5;
329: }
330: z[5*i] = sum1; z[5*i+1] = sum2; z[5*i+2] = sum3; z[5*i+3] = sum4; z[5*i+4] = sum5;
331: xb += 5;
332: }
333: VecRestoreArray(xx,&x);
334: VecRestoreArray(zz,&z);
335: PetscLogFlops(50.0*(a->nz*2.0 - nonzerorow) - nonzerorow);
336: return(0);
337: }
338: /*=========================================================*/
341: PetscErrorCode MatMultAdd_SeqSBSTRM_4(Mat A,Vec xx,Vec yy,Vec zz)
342: {
344: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
345: Mat_SeqSBSTRM *sbstrm = (Mat_SeqSBSTRM *)A->spptr;
346: PetscScalar sum1,sum2,sum3,sum4,x1,x2,x3,x4, xr1,xr2,xr3,xr4;
347: PetscScalar *x,*z, *xb;
348: MatScalar *v1, *v2, *v3, *v4;
349: PetscErrorCode ierr;
350: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
351: PetscInt nonzerorow=0;
352: PetscInt slen;
355: VecCopy(yy,zz);
356: VecGetArray(xx,&x);
357: VecGetArray(zz,&z);
359: slen = 4*(ai[mbs]-ai[0]);
360: v1 = sbstrm->as;
361: v2 = v1 + slen;
362: v3 = v2 + slen;
363: v4 = v3 + slen;
365: xb = x;
367: for (i=0; i<mbs; i++) {
368: n = ai[i+1] - ai[i];
369: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; xb += 4;
370: sum1 = z[4*i];
371: sum2 = z[4*i + 1];
372: sum3 = z[4*i + 2];
373: sum4 = z[4*i + 3];
374: nonzerorow += (n>0);
375: jmin = 0;
376: ib = aj + ai[i];
377: if (*ib == i){ /* (diag of A)*x */
378: sum1 += v1[0]*x1 + v1[1]*x2 + v1[2]*x3 + v1[3]*x4;
379: sum2 += v1[1]*x1 + v2[1]*x2 + v2[2]*x3 + v2[3]*x4;
380: sum3 += v1[2]*x1 + v2[2]*x2 + v3[2]*x3 + v3[3]*x4;
381: sum4 += v1[3]*x1 + v2[3]*x2 + v3[3]*x3 + v4[3]*x4;
382: v1 += 4; v2 += 4; v3 += 4; v4 += 4;
383: jmin++;
384: }
385:
386: for (j=jmin; j<n; j++) {
387: cval = ib[j]*4;
388: z[cval] += v1[0]*x1 + v2[0]*x2 + v3[0]*x3 + v4[0]*x4;
389: z[cval+1] += v1[1]*x1 + v2[1]*x2 + v3[1]*x3 + v4[1]*x4;
390: z[cval+2] += v1[2]*x1 + v2[2]*x2 + v3[2]*x3 + v4[2]*x4;
391: z[cval+3] += v1[3]*x1 + v2[3]*x2 + v3[3]*x3 + v4[3]*x4;
393: xr1 = x[cval]; xr2 = x[cval+1]; xr3 = x[cval+2]; xr4 = x[cval+3];
394: sum1 += v1[0]*xr1 + v1[1]*xr2 + v1[2]*xr3 + v1[3]*xr4;
395: sum2 += v2[0]*xr1 + v2[1]*xr2 + v2[2]*xr3 + v2[3]*xr4;
396: sum3 += v3[0]*xr1 + v3[1]*xr2 + v3[2]*xr3 + v3[3]*xr4;
397: sum4 += v4[0]*xr1 + v4[1]*xr2 + v4[2]*xr3 + v4[3]*xr4;
398: v1 += 4; v2 += 4; v3 += 4; v4 += 4;
399: }
400: z[4*i] = sum1;
401: z[4*i+1] = sum2;
402: z[4*i+2] = sum3;
403: z[4*i+3] = sum4;
404:
405: }
406: VecRestoreArray(xx,&x);
407: VecRestoreArray(zz,&z);
408: PetscLogFlops(32.0*(a->nz*2.0 - nonzerorow));
409: return(0);
410: }
412: /*=========================================================*/
415: PetscErrorCode MatMultAdd_SeqSBSTRM_5(Mat A,Vec xx,Vec yy,Vec zz)
416: {
417: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
418: Mat_SeqSBSTRM *sbstrm = (Mat_SeqSBSTRM *)A->spptr;
419: PetscScalar *x,*xb, *z;
420: MatScalar *v1, *v2, *v3, *v4, *v5;
421: PetscScalar x1, x2, x3, x4, x5;
422: PetscScalar xr1, xr2, xr3, xr4, xr5;
423: PetscScalar sum1, sum2, sum3, sum4, sum5;
424: PetscErrorCode ierr;
425: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
426: PetscInt nonzerorow=0;
427: PetscInt slen;
430: VecCopy(yy,zz);
431: VecGetArray(xx,&x);
432: VecGetArray(zz,&z);
435: slen = 5*(ai[mbs]-ai[0]);
436: v1 = sbstrm->as;
437: v2 = v1 + slen;
438: v3 = v2 + slen;
439: v4 = v3 + slen;
440: v5 = v4 + slen;
442: xb = x;
444: for (i=0; i<mbs; i++) {
445: n = ai[i+1] - ai[i];
446: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; xb += 5;
447: sum1 = z[5*i];
448: sum2 = z[5*i+1];
449: sum3 = z[5*i+2];
450: sum4 = z[5*i+3];
451: sum5 = z[5*i+4];
452: nonzerorow += (n>0);
453: jmin = 0;
454: ib = aj + ai[i];
455: if (*ib == i){ /* (diag of A)*x, only upper triangular part is used */
456: sum1 += v1[0]*x1 + v1[1]*x2 + v1[2]*x3 + v1[3]*x4 + v1[4]*x5;
457: sum2 += v1[1]*x1 + v2[1]*x2 + v2[2]*x3 + v2[3]*x4 + v2[4]*x5;
458: sum3 += v1[2]*x1 + v2[2]*x2 + v3[2]*x3 + v3[3]*x4 + v3[4]*x5;
459: sum4 += v1[3]*x1 + v2[3]*x2 + v3[3]*x3 + v4[3]*x4 + v4[4]*x5;
460: sum5 += v1[4]*x1 + v2[4]*x2 + v3[4]*x3 + v4[4]*x4 + v5[4]*x5;
461: v1 += 5; v2 += 5; v3 += 5; v4 += 5; v5 += 5;
462: jmin++;
463: }
464:
465: for (j=jmin; j<n; j++) {
466: cval = ib[j]*5;
467: z[cval] += v1[0]*x1 + v2[0]*x2 + v3[0]*x3 + v4[0]*x4 + v5[0]*x5;
468: z[cval+1] += v1[1]*x1 + v2[1]*x2 + v3[1]*x3 + v4[1]*x4 + v5[1]*x5;
469: z[cval+2] += v1[2]*x1 + v2[2]*x2 + v3[2]*x3 + v4[2]*x4 + v5[2]*x5;
470: z[cval+3] += v1[3]*x1 + v2[3]*x2 + v3[3]*x3 + v4[3]*x4 + v5[3]*x5;
471: z[cval+4] += v1[4]*x1 + v2[4]*x2 + v3[4]*x3 + v4[4]*x4 + v5[4]*x5;
473: xr1 = x[cval]; xr2 = x[cval+1]; xr3 = x[cval+2]; xr4 = x[cval+3]; xr5 = x[cval+4];
474: sum1 += v1[0]*xr1 + v1[1]*xr2 + v1[2]*xr3 + v1[3]*xr4 + v1[4]*xr5;
475: sum2 += v2[0]*xr1 + v2[1]*xr2 + v2[2]*xr3 + v2[3]*xr4 + v2[4]*xr5;
476: sum3 += v3[0]*xr1 + v3[1]*xr2 + v3[2]*xr3 + v3[3]*xr4 + v3[4]*xr5;
477: sum4 += v4[0]*xr1 + v4[1]*xr2 + v4[2]*xr3 + v4[3]*xr4 + v4[4]*xr5;
478: sum5 += v5[0]*xr1 + v5[1]*xr2 + v5[2]*xr3 + v5[3]*xr4 + v5[4]*xr5;
479: v1 += 5; v2 += 5; v3 += 5; v4 += 5; v5 += 5;
480: }
481: z[5*i] = sum1;
482: z[5*i+1] = sum2;
483: z[5*i+2] = sum3;
484: z[5*i+3] = sum4;
485: z[5*i+4] = sum5;
486: }
487: VecRestoreArray(xx,&x);
488: VecRestoreArray(zz,&z);
489: PetscLogFlops(50.0*(a->nz*2.0 - nonzerorow));
490: return(0);
491: }
493: /*=========================================================*/
496: PetscErrorCode SeqSBSTRM_create_sbstrm(Mat A)
497: {
498: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *) A->data;
499: Mat_SeqSBSTRM *sbstrm = (Mat_SeqSBSTRM*) A->spptr;
500: PetscInt MROW = a->mbs, bs = A->rmap->bs;
501: PetscInt *ai = a->i;
502: PetscInt i,j,k;
503: MatScalar *aa = a->a;
505: PetscInt bs2, rbs, cbs, blen, slen;
506: PetscScalar **asp;
509: sbstrm->rbs = sbstrm->cbs = bs;
511: rbs = cbs = bs;
512: bs2 = rbs*cbs;
513: blen = ai[MROW]-ai[0];
514: slen = blen*cbs;
516: PetscMalloc(bs2*blen*sizeof(PetscScalar), &sbstrm->as);
518: PetscMalloc(rbs*sizeof(PetscScalar *), &asp);
519:
520: for(i=0;i<rbs;i++) asp[i] = sbstrm->as + i*slen;
521:
522: for (k=0; k<blen; k++) {
523: for (j=0; j<cbs; j++)
524: for (i=0; i<rbs; i++)
525: asp[i][k*cbs+j] = aa[k*bs2+j*rbs+i];
526: }
528: /* PetscFree(a->a); */
530: switch (bs){
531: case 4:
532: A->ops->mult = MatMult_SeqSBSTRM_4;
533: A->ops->multadd = MatMultAdd_SeqSBSTRM_4;
534: /** A->ops->sor = MatSOR_SeqSBSTRM_4; **/
535: break;
536: case 5:
537: A->ops->mult = MatMult_SeqSBSTRM_5;
538: A->ops->multadd = MatMultAdd_SeqSBSTRM_5;
539: /** A->ops->sor = MatSOR_SeqSBSTRM_5; **/
540: break;
541: default:
542: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"not supported for block size %D",bs);
543: }
544: return(0);
545: }
546: /*=========================================================*/
547: EXTERN_C_BEGIN
550: PetscErrorCode MatSeqSBSTRMTransform(Mat A)
551: {
553: SeqSBSTRM_convert_sbstrm(A);
554: return(0);
555: }
556: EXTERN_C_END