Actual source code: sbaij2.c
petsc-3.6.4 2016-04-12
2: #include <../src/mat/impls/baij/seq/baij.h>
3: #include <petsc/private/kernels/blockinvert.h>
4: #include <petscbt.h>
5: #include <../src/mat/impls/sbaij/seq/sbaij.h>
6: #include <petscblaslapack.h>
10: PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov)
11: {
12: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
14: PetscInt brow,i,j,k,l,mbs,n,*nidx,isz,bcol,bcol_max,start,end,*ai,*aj,bs,*nidx2;
15: const PetscInt *idx;
16: PetscBT table_out,table_in;
19: if (ov < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
20: mbs = a->mbs;
21: ai = a->i;
22: aj = a->j;
23: bs = A->rmap->bs;
24: PetscBTCreate(mbs,&table_out);
25: PetscMalloc1(mbs+1,&nidx);
26: PetscMalloc1(A->rmap->N+1,&nidx2);
27: PetscBTCreate(mbs,&table_in);
29: for (i=0; i<is_max; i++) { /* for each is */
30: isz = 0;
31: PetscBTMemzero(mbs,table_out);
33: /* Extract the indices, assume there can be duplicate entries */
34: ISGetIndices(is[i],&idx);
35: ISGetLocalSize(is[i],&n);
37: /* Enter these into the temp arrays i.e mark table_out[brow], enter brow into new index */
38: bcol_max = 0;
39: for (j=0; j<n; ++j) {
40: brow = idx[j]/bs; /* convert the indices into block indices */
41: if (brow >= mbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
42: if (!PetscBTLookupSet(table_out,brow)) {
43: nidx[isz++] = brow;
44: if (bcol_max < brow) bcol_max = brow;
45: }
46: }
47: ISRestoreIndices(is[i],&idx);
48: ISDestroy(&is[i]);
50: k = 0;
51: for (j=0; j<ov; j++) { /* for each overlap */
52: /* set table_in for lookup - only mark entries that are added onto nidx in (j-1)-th overlap */
53: PetscBTMemzero(mbs,table_in);
54: for (l=k; l<isz; l++) { PetscBTSet(table_in,nidx[l]); }
56: n = isz; /* length of the updated is[i] */
57: for (brow=0; brow<mbs; brow++) {
58: start = ai[brow]; end = ai[brow+1];
59: if (PetscBTLookup(table_in,brow)) { /* brow is on nidx - row search: collect all bcol in this brow */
60: for (l = start; l<end; l++) {
61: bcol = aj[l];
62: if (!PetscBTLookupSet(table_out,bcol)) {
63: nidx[isz++] = bcol;
64: if (bcol_max < bcol) bcol_max = bcol;
65: }
66: }
67: k++;
68: if (k >= n) break; /* for (brow=0; brow<mbs; brow++) */
69: } else { /* brow is not on nidx - col serach: add brow onto nidx if there is a bcol in nidx */
70: for (l = start; l<end; l++) {
71: bcol = aj[l];
72: if (bcol > bcol_max) break;
73: if (PetscBTLookup(table_in,bcol)) {
74: if (!PetscBTLookupSet(table_out,brow)) nidx[isz++] = brow;
75: break; /* for l = start; l<end ; l++) */
76: }
77: }
78: }
79: }
80: } /* for each overlap */
82: /* expand the Index Set */
83: for (j=0; j<isz; j++) {
84: for (k=0; k<bs; k++) nidx2[j*bs+k] = nidx[j]*bs+k;
85: }
86: ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,PETSC_COPY_VALUES,is+i);
87: }
88: PetscBTDestroy(&table_out);
89: PetscFree(nidx);
90: PetscFree(nidx2);
91: PetscBTDestroy(&table_in);
92: return(0);
93: }
97: PetscErrorCode MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,MatReuse scall,Mat *B)
98: {
99: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*c;
100: PetscErrorCode ierr;
101: PetscInt *smap,i,k,kstart,kend,oldcols = a->mbs,*lens;
102: PetscInt row,mat_i,*mat_j,tcol,*mat_ilen;
103: PetscInt nrows,*ssmap,bs=A->rmap->bs,bs2=a->bs2;
104: const PetscInt *irow,*aj = a->j,*ai = a->i;
105: MatScalar *mat_a;
106: Mat C;
107: PetscBool flag,sorted;
110: ISSorted(isrow,&sorted);
111: if (!sorted) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");
113: ISGetIndices(isrow,&irow);
114: ISGetSize(isrow,&nrows);
116: PetscMalloc1(oldcols,&smap);
117: PetscMemzero(smap,oldcols*sizeof(PetscInt));
118: ssmap = smap;
119: PetscMalloc1(1+nrows,&lens);
120: for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */
121: /* determine lens of each row */
122: for (i=0; i<nrows; i++) {
123: kstart = ai[irow[i]];
124: kend = kstart + a->ilen[irow[i]];
125: lens[i] = 0;
126: for (k=kstart; k<kend; k++) {
127: if (ssmap[aj[k]]) lens[i]++;
128: }
129: }
130: /* Create and fill new matrix */
131: if (scall == MAT_REUSE_MATRIX) {
132: c = (Mat_SeqSBAIJ*)((*B)->data);
134: if (c->mbs!=nrows || (*B)->rmap->bs!=bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
135: PetscMemcmp(c->ilen,lens,c->mbs *sizeof(PetscInt),&flag);
136: if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
137: PetscMemzero(c->ilen,c->mbs*sizeof(PetscInt));
138: C = *B;
139: } else {
140: MatCreate(PetscObjectComm((PetscObject)A),&C);
141: MatSetSizes(C,nrows*bs,nrows*bs,PETSC_DETERMINE,PETSC_DETERMINE);
142: MatSetType(C,((PetscObject)A)->type_name);
143: MatSeqSBAIJSetPreallocation_SeqSBAIJ(C,bs,0,lens);
144: }
145: c = (Mat_SeqSBAIJ*)(C->data);
146: for (i=0; i<nrows; i++) {
147: row = irow[i];
148: kstart = ai[row];
149: kend = kstart + a->ilen[row];
150: mat_i = c->i[i];
151: mat_j = c->j + mat_i;
152: mat_a = c->a + mat_i*bs2;
153: mat_ilen = c->ilen + i;
154: for (k=kstart; k<kend; k++) {
155: if ((tcol=ssmap[a->j[k]])) {
156: *mat_j++ = tcol - 1;
157: PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));
158: mat_a += bs2;
159: (*mat_ilen)++;
160: }
161: }
162: }
164: /* Free work space */
165: PetscFree(smap);
166: PetscFree(lens);
167: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
168: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
170: ISRestoreIndices(isrow,&irow);
171: *B = C;
172: return(0);
173: }
177: PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
178: {
179: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
180: IS is1;
182: PetscInt *vary,*iary,nrows,i,bs=A->rmap->bs,count;
183: const PetscInt *irow;
186: if (isrow != iscol) {
187: PetscBool isequal;
188: ISEqual(isrow,iscol,&isequal);
189: if (!isequal)
190: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow");
191: }
193: ISGetIndices(isrow,&irow);
194: ISGetSize(isrow,&nrows);
196: /* Verify if the indices corespond to each element in a block
197: and form the IS with compressed IS */
198: PetscMalloc2(a->mbs,&vary,a->mbs,&iary);
199: PetscMemzero(vary,a->mbs*sizeof(PetscInt));
200: for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
202: count = 0;
203: for (i=0; i<a->mbs; i++) {
204: if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Index set does not match blocks");
205: if (vary[i]==bs) iary[count++] = i;
206: }
207: ISRestoreIndices(isrow,&irow);
208: ISCreateGeneral(PETSC_COMM_SELF,count,iary,PETSC_COPY_VALUES,&is1);
209: PetscFree2(vary,iary);
211: MatGetSubMatrix_SeqSBAIJ_Private(A,is1,scall,B);
212: ISDestroy(&is1);
213: return(0);
214: }
218: PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
219: {
221: PetscInt i;
224: if (scall == MAT_INITIAL_MATRIX) {
225: PetscMalloc1(n+1,B);
226: }
228: for (i=0; i<n; i++) {
229: MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],scall,&(*B)[i]);
230: }
231: return(0);
232: }
234: /* -------------------------------------------------------*/
235: /* Should check that shapes of vectors and matrices match */
236: /* -------------------------------------------------------*/
240: PetscErrorCode MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz)
241: {
242: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
243: PetscScalar *z,x1,x2,zero=0.0;
244: const PetscScalar *x,*xb;
245: const MatScalar *v;
246: PetscErrorCode ierr;
247: PetscInt mbs = a->mbs,i,n,cval,j,jmin;
248: const PetscInt *aj=a->j,*ai=a->i,*ib;
249: PetscInt nonzerorow=0;
252: VecSet(zz,zero);
253: VecGetArrayRead(xx,&x);
254: VecGetArray(zz,&z);
256: v = a->a;
257: xb = x;
259: for (i=0; i<mbs; i++) {
260: n = ai[1] - ai[0]; /* length of i_th block row of A */
261: x1 = xb[0]; x2 = xb[1];
262: ib = aj + *ai;
263: jmin = 0;
264: nonzerorow += (n>0);
265: if (*ib == i) { /* (diag of A)*x */
266: z[2*i] += v[0]*x1 + v[2]*x2;
267: z[2*i+1] += v[2]*x1 + v[3]*x2;
268: v += 4; jmin++;
269: }
270: PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
271: PetscPrefetchBlock(v+4*n,4*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
272: for (j=jmin; j<n; j++) {
273: /* (strict lower triangular part of A)*x */
274: cval = ib[j]*2;
275: z[cval] += v[0]*x1 + v[1]*x2;
276: z[cval+1] += v[2]*x1 + v[3]*x2;
277: /* (strict upper triangular part of A)*x */
278: z[2*i] += v[0]*x[cval] + v[2]*x[cval+1];
279: z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
280: v += 4;
281: }
282: xb +=2; ai++;
283: }
285: VecRestoreArrayRead(xx,&x);
286: VecRestoreArray(zz,&z);
287: PetscLogFlops(8.0*(a->nz*2.0 - nonzerorow) - nonzerorow);
288: return(0);
289: }
293: PetscErrorCode MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz)
294: {
295: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
296: PetscScalar *z,x1,x2,x3,zero=0.0;
297: const PetscScalar *x,*xb;
298: const MatScalar *v;
299: PetscErrorCode ierr;
300: PetscInt mbs = a->mbs,i,n,cval,j,jmin;
301: const PetscInt *aj = a->j,*ai = a->i,*ib;
302: PetscInt nonzerorow=0;
305: VecSet(zz,zero);
306: VecGetArrayRead(xx,&x);
307: VecGetArray(zz,&z);
309: v = a->a;
310: xb = x;
312: for (i=0; i<mbs; i++) {
313: n = ai[1] - ai[0]; /* length of i_th block row of A */
314: x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
315: ib = aj + *ai;
316: jmin = 0;
317: nonzerorow += (n>0);
318: if (*ib == i) { /* (diag of A)*x */
319: z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3;
320: z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
321: z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
322: v += 9; jmin++;
323: }
324: PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
325: PetscPrefetchBlock(v+9*n,9*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
326: for (j=jmin; j<n; j++) {
327: /* (strict lower triangular part of A)*x */
328: cval = ib[j]*3;
329: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3;
330: z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3;
331: z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
332: /* (strict upper triangular part of A)*x */
333: z[3*i] += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
334: z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
335: z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
336: v += 9;
337: }
338: xb +=3; ai++;
339: }
341: VecRestoreArrayRead(xx,&x);
342: VecRestoreArray(zz,&z);
343: PetscLogFlops(18.0*(a->nz*2.0 - nonzerorow) - nonzerorow);
344: return(0);
345: }
349: PetscErrorCode MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz)
350: {
351: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
352: PetscScalar *z,x1,x2,x3,x4,zero=0.0;
353: const PetscScalar *x,*xb;
354: const MatScalar *v;
355: PetscErrorCode ierr;
356: PetscInt mbs = a->mbs,i,n,cval,j,jmin;
357: const PetscInt *aj = a->j,*ai = a->i,*ib;
358: PetscInt nonzerorow = 0;
361: VecSet(zz,zero);
362: VecGetArrayRead(xx,&x);
363: VecGetArray(zz,&z);
365: v = a->a;
366: xb = x;
368: for (i=0; i<mbs; i++) {
369: n = ai[1] - ai[0]; /* length of i_th block row of A */
370: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
371: ib = aj + *ai;
372: jmin = 0;
373: nonzerorow += (n>0);
374: if (*ib == i) { /* (diag of A)*x */
375: z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4;
376: z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4;
377: z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
378: z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
379: v += 16; jmin++;
380: }
381: PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
382: PetscPrefetchBlock(v+16*n,16*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
383: for (j=jmin; j<n; j++) {
384: /* (strict lower triangular part of A)*x */
385: cval = ib[j]*4;
386: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
387: z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
388: z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
389: z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
390: /* (strict upper triangular part of A)*x */
391: z[4*i] += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
392: z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
393: z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
394: z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
395: v += 16;
396: }
397: xb +=4; ai++;
398: }
400: VecRestoreArrayRead(xx,&x);
401: VecRestoreArray(zz,&z);
402: PetscLogFlops(32.0*(a->nz*2.0 - nonzerorow) - nonzerorow);
403: return(0);
404: }
408: PetscErrorCode MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz)
409: {
410: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
411: PetscScalar *z,x1,x2,x3,x4,x5,zero=0.0;
412: const PetscScalar *x,*xb;
413: const MatScalar *v;
414: PetscErrorCode ierr;
415: PetscInt mbs = a->mbs,i,n,cval,j,jmin;
416: const PetscInt *aj = a->j,*ai = a->i,*ib;
417: PetscInt nonzerorow=0;
420: VecSet(zz,zero);
421: VecGetArrayRead(xx,&x);
422: VecGetArray(zz,&z);
424: v = a->a;
425: xb = x;
427: for (i=0; i<mbs; i++) {
428: n = ai[1] - ai[0]; /* length of i_th block row of A */
429: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
430: ib = aj + *ai;
431: jmin = 0;
432: nonzerorow += (n>0);
433: if (*ib == i) { /* (diag of A)*x */
434: z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
435: z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
436: z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
437: z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
438: z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
439: v += 25; jmin++;
440: }
441: PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
442: PetscPrefetchBlock(v+25*n,25*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
443: for (j=jmin; j<n; j++) {
444: /* (strict lower triangular part of A)*x */
445: cval = ib[j]*5;
446: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
447: z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
448: z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
449: z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
450: z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
451: /* (strict upper triangular part of A)*x */
452: z[5*i] +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
453: z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
454: z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
455: z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
456: z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
457: v += 25;
458: }
459: xb +=5; ai++;
460: }
462: VecRestoreArrayRead(xx,&x);
463: VecRestoreArray(zz,&z);
464: PetscLogFlops(50.0*(a->nz*2.0 - nonzerorow) - nonzerorow);
465: return(0);
466: }
471: PetscErrorCode MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz)
472: {
473: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
474: PetscScalar *z,x1,x2,x3,x4,x5,x6,zero=0.0;
475: const PetscScalar *x,*xb;
476: const MatScalar *v;
477: PetscErrorCode ierr;
478: PetscInt mbs = a->mbs,i,n,cval,j,jmin;
479: const PetscInt *aj=a->j,*ai=a->i,*ib;
480: PetscInt nonzerorow=0;
483: VecSet(zz,zero);
484: VecGetArrayRead(xx,&x);
485: VecGetArray(zz,&z);
487: v = a->a;
488: xb = x;
490: for (i=0; i<mbs; i++) {
491: n = ai[1] - ai[0]; /* length of i_th block row of A */
492: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
493: ib = aj + *ai;
494: jmin = 0;
495: nonzerorow += (n>0);
496: if (*ib == i) { /* (diag of A)*x */
497: z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
498: z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
499: z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
500: z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
501: z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
502: z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
503: v += 36; jmin++;
504: }
505: PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
506: PetscPrefetchBlock(v+36*n,36*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
507: for (j=jmin; j<n; j++) {
508: /* (strict lower triangular part of A)*x */
509: cval = ib[j]*6;
510: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
511: z[cval+1] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
512: z[cval+2] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
513: z[cval+3] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
514: z[cval+4] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
515: z[cval+5] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
516: /* (strict upper triangular part of A)*x */
517: z[6*i] +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
518: z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
519: z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
520: z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
521: z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
522: z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
523: v += 36;
524: }
525: xb +=6; ai++;
526: }
528: VecRestoreArrayRead(xx,&x);
529: VecRestoreArray(zz,&z);
530: PetscLogFlops(72.0*(a->nz*2.0 - nonzerorow) - nonzerorow);
531: return(0);
532: }
535: PetscErrorCode MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz)
536: {
537: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
538: PetscScalar *z,x1,x2,x3,x4,x5,x6,x7,zero=0.0;
539: const PetscScalar *x,*xb;
540: const MatScalar *v;
541: PetscErrorCode ierr;
542: PetscInt mbs = a->mbs,i,n,cval,j,jmin;
543: const PetscInt *aj=a->j,*ai=a->i,*ib;
544: PetscInt nonzerorow=0;
547: VecSet(zz,zero);
548: VecGetArrayRead(xx,&x);
549: VecGetArray(zz,&z);
551: v = a->a;
552: xb = x;
554: for (i=0; i<mbs; i++) {
555: n = ai[1] - ai[0]; /* length of i_th block row of A */
556: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
557: ib = aj + *ai;
558: jmin = 0;
559: nonzerorow += (n>0);
560: if (*ib == i) { /* (diag of A)*x */
561: z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
562: z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
563: z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
564: z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
565: z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
566: z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
567: z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
568: v += 49; jmin++;
569: }
570: PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
571: PetscPrefetchBlock(v+49*n,49*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
572: for (j=jmin; j<n; j++) {
573: /* (strict lower triangular part of A)*x */
574: cval = ib[j]*7;
575: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
576: z[cval+1] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
577: z[cval+2] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
578: z[cval+3] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
579: z[cval+4] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
580: z[cval+5] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
581: z[cval+6] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
582: /* (strict upper triangular part of A)*x */
583: z[7*i] +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
584: z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
585: z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
586: z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
587: z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
588: z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
589: z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
590: v += 49;
591: }
592: xb +=7; ai++;
593: }
594: VecRestoreArrayRead(xx,&x);
595: VecRestoreArray(zz,&z);
596: PetscLogFlops(98.0*(a->nz*2.0 - nonzerorow) - nonzerorow);
597: return(0);
598: }
600: /*
601: This will not work with MatScalar == float because it calls the BLAS
602: */
605: PetscErrorCode MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz)
606: {
607: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
608: PetscScalar *z,*z_ptr,*zb,*work,*workt,zero=0.0;
609: const PetscScalar *x,*x_ptr,*xb;
610: const MatScalar *v;
611: PetscErrorCode ierr;
612: PetscInt mbs =a->mbs,i,bs=A->rmap->bs,j,n,bs2=a->bs2,ncols,k;
613: const PetscInt *idx,*aj,*ii;
614: PetscInt nonzerorow=0;
617: VecSet(zz,zero);
618: VecGetArrayRead(xx,&x);x_ptr = x;
619: VecGetArray(zz,&z); z_ptr=z;
621: aj = a->j;
622: v = a->a;
623: ii = a->i;
625: if (!a->mult_work) {
626: PetscMalloc1(A->rmap->N+1,&a->mult_work);
627: }
628: work = a->mult_work;
630: for (i=0; i<mbs; i++) {
631: n = ii[1] - ii[0]; ncols = n*bs;
632: workt = work; idx=aj+ii[0];
633: nonzerorow += (n>0);
635: /* upper triangular part */
636: for (j=0; j<n; j++) {
637: xb = x_ptr + bs*(*idx++);
638: for (k=0; k<bs; k++) workt[k] = xb[k];
639: workt += bs;
640: }
641: /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
642: PetscKernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
644: /* strict lower triangular part */
645: idx = aj+ii[0];
646: if (*idx == i) {
647: ncols -= bs; v += bs2; idx++; n--;
648: }
650: if (ncols > 0) {
651: workt = work;
652: PetscMemzero(workt,ncols*sizeof(PetscScalar));
653: PetscKernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
654: for (j=0; j<n; j++) {
655: zb = z_ptr + bs*(*idx++);
656: for (k=0; k<bs; k++) zb[k] += workt[k];
657: workt += bs;
658: }
659: }
660: x += bs; v += n*bs2; z += bs; ii++;
661: }
663: VecRestoreArrayRead(xx,&x);
664: VecRestoreArray(zz,&z);
665: PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow)*bs2 - nonzerorow);
666: return(0);
667: }
671: PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
672: {
673: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
674: PetscScalar *z,x1;
675: const PetscScalar *x,*xb;
676: const MatScalar *v;
677: PetscErrorCode ierr;
678: PetscInt mbs =a->mbs,i,n,cval,j,jmin;
679: const PetscInt *aj=a->j,*ai=a->i,*ib;
680: PetscInt nonzerorow=0;
683: VecCopy(yy,zz);
684: VecGetArrayRead(xx,&x);
685: VecGetArray(zz,&z);
686: v = a->a;
687: xb = x;
689: for (i=0; i<mbs; i++) {
690: n = ai[1] - ai[0]; /* length of i_th row of A */
691: x1 = xb[0];
692: ib = aj + *ai;
693: jmin = 0;
694: nonzerorow += (n>0);
695: if (*ib == i) { /* (diag of A)*x */
696: z[i] += *v++ * x[*ib++]; jmin++;
697: }
698: for (j=jmin; j<n; j++) {
699: cval = *ib;
700: z[cval] += *v * x1; /* (strict lower triangular part of A)*x */
701: z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x */
702: }
703: xb++; ai++;
704: }
706: VecRestoreArrayRead(xx,&x);
707: VecRestoreArray(zz,&z);
709: PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow));
710: return(0);
711: }
715: PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
716: {
717: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
718: PetscScalar *z,x1,x2;
719: const PetscScalar *x,*xb;
720: const MatScalar *v;
721: PetscErrorCode ierr;
722: PetscInt mbs =a->mbs,i,n,cval,j,jmin;
723: const PetscInt *aj=a->j,*ai=a->i,*ib;
724: PetscInt nonzerorow=0;
727: VecCopy(yy,zz);
728: VecGetArrayRead(xx,&x);
729: VecGetArray(zz,&z);
731: v = a->a;
732: xb = x;
734: for (i=0; i<mbs; i++) {
735: n = ai[1] - ai[0]; /* length of i_th block row of A */
736: x1 = xb[0]; x2 = xb[1];
737: ib = aj + *ai;
738: jmin = 0;
739: nonzerorow += (n>0);
740: if (*ib == i) { /* (diag of A)*x */
741: z[2*i] += v[0]*x1 + v[2]*x2;
742: z[2*i+1] += v[2]*x1 + v[3]*x2;
743: v += 4; jmin++;
744: }
745: PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
746: PetscPrefetchBlock(v+4*n,4*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
747: for (j=jmin; j<n; j++) {
748: /* (strict lower triangular part of A)*x */
749: cval = ib[j]*2;
750: z[cval] += v[0]*x1 + v[1]*x2;
751: z[cval+1] += v[2]*x1 + v[3]*x2;
752: /* (strict upper triangular part of A)*x */
753: z[2*i] += v[0]*x[cval] + v[2]*x[cval+1];
754: z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
755: v += 4;
756: }
757: xb +=2; ai++;
758: }
759: VecRestoreArrayRead(xx,&x);
760: VecRestoreArray(zz,&z);
762: PetscLogFlops(4.0*(a->nz*2.0 - nonzerorow));
763: return(0);
764: }
768: PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
769: {
770: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
771: PetscScalar *z,x1,x2,x3;
772: const PetscScalar *x,*xb;
773: const MatScalar *v;
774: PetscErrorCode ierr;
775: PetscInt mbs = a->mbs,i,n,cval,j,jmin;
776: const PetscInt *aj=a->j,*ai=a->i,*ib;
777: PetscInt nonzerorow=0;
780: VecCopy(yy,zz);
781: VecGetArrayRead(xx,&x);
782: VecGetArray(zz,&z);
784: v = a->a;
785: xb = x;
787: for (i=0; i<mbs; i++) {
788: n = ai[1] - ai[0]; /* length of i_th block row of A */
789: x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
790: ib = aj + *ai;
791: jmin = 0;
792: nonzerorow += (n>0);
793: if (*ib == i) { /* (diag of A)*x */
794: z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3;
795: z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
796: z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
797: v += 9; jmin++;
798: }
799: PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
800: PetscPrefetchBlock(v+9*n,9*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
801: for (j=jmin; j<n; j++) {
802: /* (strict lower triangular part of A)*x */
803: cval = ib[j]*3;
804: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3;
805: z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3;
806: z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
807: /* (strict upper triangular part of A)*x */
808: z[3*i] += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
809: z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
810: z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
811: v += 9;
812: }
813: xb +=3; ai++;
814: }
816: VecRestoreArrayRead(xx,&x);
817: VecRestoreArray(zz,&z);
819: PetscLogFlops(18.0*(a->nz*2.0 - nonzerorow));
820: return(0);
821: }
825: PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
826: {
827: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
828: PetscScalar *z,x1,x2,x3,x4;
829: const PetscScalar *x,*xb;
830: const MatScalar *v;
831: PetscErrorCode ierr;
832: PetscInt mbs = a->mbs,i,n,cval,j,jmin;
833: const PetscInt *aj=a->j,*ai=a->i,*ib;
834: PetscInt nonzerorow=0;
837: VecCopy(yy,zz);
838: VecGetArrayRead(xx,&x);
839: VecGetArray(zz,&z);
841: v = a->a;
842: xb = x;
844: for (i=0; i<mbs; i++) {
845: n = ai[1] - ai[0]; /* length of i_th block row of A */
846: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
847: ib = aj + *ai;
848: jmin = 0;
849: nonzerorow += (n>0);
850: if (*ib == i) { /* (diag of A)*x */
851: z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4;
852: z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4;
853: z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
854: z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
855: v += 16; jmin++;
856: }
857: PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
858: PetscPrefetchBlock(v+16*n,16*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
859: for (j=jmin; j<n; j++) {
860: /* (strict lower triangular part of A)*x */
861: cval = ib[j]*4;
862: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
863: z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
864: z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
865: z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
866: /* (strict upper triangular part of A)*x */
867: z[4*i] += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
868: z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
869: z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
870: z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
871: v += 16;
872: }
873: xb +=4; ai++;
874: }
876: VecRestoreArrayRead(xx,&x);
877: VecRestoreArray(zz,&z);
879: PetscLogFlops(32.0*(a->nz*2.0 - nonzerorow));
880: return(0);
881: }
885: PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
886: {
887: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
888: PetscScalar *z,x1,x2,x3,x4,x5;
889: const PetscScalar *x,*xb;
890: const MatScalar *v;
891: PetscErrorCode ierr;
892: PetscInt mbs = a->mbs,i,n,cval,j,jmin;
893: const PetscInt *aj=a->j,*ai=a->i,*ib;
894: PetscInt nonzerorow=0;
897: VecCopy(yy,zz);
898: VecGetArrayRead(xx,&x);
899: VecGetArray(zz,&z);
901: v = a->a;
902: xb = x;
904: for (i=0; i<mbs; i++) {
905: n = ai[1] - ai[0]; /* length of i_th block row of A */
906: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
907: ib = aj + *ai;
908: jmin = 0;
909: nonzerorow += (n>0);
910: if (*ib == i) { /* (diag of A)*x */
911: z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
912: z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
913: z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
914: z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
915: z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
916: v += 25; jmin++;
917: }
918: PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
919: PetscPrefetchBlock(v+25*n,25*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
920: for (j=jmin; j<n; j++) {
921: /* (strict lower triangular part of A)*x */
922: cval = ib[j]*5;
923: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
924: z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
925: z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
926: z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
927: z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
928: /* (strict upper triangular part of A)*x */
929: z[5*i] +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
930: z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
931: z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
932: z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
933: z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
934: v += 25;
935: }
936: xb +=5; ai++;
937: }
939: VecRestoreArrayRead(xx,&x);
940: VecRestoreArray(zz,&z);
942: PetscLogFlops(50.0*(a->nz*2.0 - nonzerorow));
943: return(0);
944: }
947: PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
948: {
949: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
950: PetscScalar *z,x1,x2,x3,x4,x5,x6;
951: const PetscScalar *x,*xb;
952: const MatScalar *v;
953: PetscErrorCode ierr;
954: PetscInt mbs = a->mbs,i,n,cval,j,jmin;
955: const PetscInt *aj=a->j,*ai=a->i,*ib;
956: PetscInt nonzerorow=0;
959: VecCopy(yy,zz);
960: VecGetArrayRead(xx,&x);
961: VecGetArray(zz,&z);
963: v = a->a;
964: xb = x;
966: for (i=0; i<mbs; i++) {
967: n = ai[1] - ai[0]; /* length of i_th block row of A */
968: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
969: ib = aj + *ai;
970: jmin = 0;
971: nonzerorow += (n>0);
972: if (*ib == i) { /* (diag of A)*x */
973: z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
974: z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
975: z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
976: z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
977: z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
978: z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
979: v += 36; jmin++;
980: }
981: PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
982: PetscPrefetchBlock(v+36*n,36*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
983: for (j=jmin; j<n; j++) {
984: /* (strict lower triangular part of A)*x */
985: cval = ib[j]*6;
986: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
987: z[cval+1] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
988: z[cval+2] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
989: z[cval+3] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
990: z[cval+4] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
991: z[cval+5] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
992: /* (strict upper triangular part of A)*x */
993: z[6*i] +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
994: z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
995: z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
996: z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
997: z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
998: z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
999: v += 36;
1000: }
1001: xb +=6; ai++;
1002: }
1004: VecRestoreArrayRead(xx,&x);
1005: VecRestoreArray(zz,&z);
1007: PetscLogFlops(72.0*(a->nz*2.0 - nonzerorow));
1008: return(0);
1009: }
1013: PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1014: {
1015: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1016: PetscScalar *z,x1,x2,x3,x4,x5,x6,x7;
1017: const PetscScalar *x,*xb;
1018: const MatScalar *v;
1019: PetscErrorCode ierr;
1020: PetscInt mbs = a->mbs,i,n,cval,j,jmin;
1021: const PetscInt *aj=a->j,*ai=a->i,*ib;
1022: PetscInt nonzerorow=0;
1025: VecCopy(yy,zz);
1026: VecGetArrayRead(xx,&x);
1027: VecGetArray(zz,&z);
1029: v = a->a;
1030: xb = x;
1032: for (i=0; i<mbs; i++) {
1033: n = ai[1] - ai[0]; /* length of i_th block row of A */
1034: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
1035: ib = aj + *ai;
1036: jmin = 0;
1037: nonzerorow += (n>0);
1038: if (*ib == i) { /* (diag of A)*x */
1039: z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
1040: z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
1041: z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
1042: z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
1043: z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
1044: z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
1045: z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
1046: v += 49; jmin++;
1047: }
1048: PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
1049: PetscPrefetchBlock(v+49*n,49*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
1050: for (j=jmin; j<n; j++) {
1051: /* (strict lower triangular part of A)*x */
1052: cval = ib[j]*7;
1053: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
1054: z[cval+1] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
1055: z[cval+2] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
1056: z[cval+3] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
1057: z[cval+4] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
1058: z[cval+5] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
1059: z[cval+6] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
1060: /* (strict upper triangular part of A)*x */
1061: z[7*i] +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
1062: z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
1063: z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
1064: z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
1065: z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
1066: z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
1067: z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
1068: v += 49;
1069: }
1070: xb +=7; ai++;
1071: }
1073: VecRestoreArrayRead(xx,&x);
1074: VecRestoreArray(zz,&z);
1076: PetscLogFlops(98.0*(a->nz*2.0 - nonzerorow));
1077: return(0);
1078: }
1082: PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1083: {
1084: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1085: PetscScalar *z,*z_ptr=0,*zb,*work,*workt;
1086: const PetscScalar *x,*x_ptr,*xb;
1087: const MatScalar *v;
1088: PetscErrorCode ierr;
1089: PetscInt mbs = a->mbs,i,bs=A->rmap->bs,j,n,bs2=a->bs2,ncols,k;
1090: const PetscInt *idx,*aj,*ii;
1091: PetscInt nonzerorow=0;
1094: VecCopy(yy,zz);
1095: VecGetArrayRead(xx,&x); x_ptr=x;
1096: VecGetArray(zz,&z); z_ptr=z;
1098: aj = a->j;
1099: v = a->a;
1100: ii = a->i;
1102: if (!a->mult_work) {
1103: PetscMalloc1(A->rmap->n+1,&a->mult_work);
1104: }
1105: work = a->mult_work;
1108: for (i=0; i<mbs; i++) {
1109: n = ii[1] - ii[0]; ncols = n*bs;
1110: workt = work; idx=aj+ii[0];
1111: nonzerorow += (n>0);
1113: /* upper triangular part */
1114: for (j=0; j<n; j++) {
1115: xb = x_ptr + bs*(*idx++);
1116: for (k=0; k<bs; k++) workt[k] = xb[k];
1117: workt += bs;
1118: }
1119: /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
1120: PetscKernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
1122: /* strict lower triangular part */
1123: idx = aj+ii[0];
1124: if (*idx == i) {
1125: ncols -= bs; v += bs2; idx++; n--;
1126: }
1127: if (ncols > 0) {
1128: workt = work;
1129: PetscMemzero(workt,ncols*sizeof(PetscScalar));
1130: PetscKernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
1131: for (j=0; j<n; j++) {
1132: zb = z_ptr + bs*(*idx++);
1133: for (k=0; k<bs; k++) zb[k] += workt[k];
1134: workt += bs;
1135: }
1136: }
1138: x += bs; v += n*bs2; z += bs; ii++;
1139: }
1141: VecRestoreArrayRead(xx,&x);
1142: VecRestoreArray(zz,&z);
1144: PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow));
1145: return(0);
1146: }
1150: PetscErrorCode MatScale_SeqSBAIJ(Mat inA,PetscScalar alpha)
1151: {
1152: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
1153: PetscScalar oalpha = alpha;
1155: PetscBLASInt one = 1,totalnz;
1158: PetscBLASIntCast(a->bs2*a->nz,&totalnz);
1159: PetscStackCallBLAS("BLASscal",BLASscal_(&totalnz,&oalpha,a->a,&one));
1160: PetscLogFlops(totalnz);
1161: return(0);
1162: }
1166: PetscErrorCode MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm)
1167: {
1168: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1169: const MatScalar *v = a->a;
1170: PetscReal sum_diag = 0.0, sum_off = 0.0, *sum;
1171: PetscInt i,j,k,bs = A->rmap->bs,bs2=a->bs2,k1,mbs=a->mbs,jmin,jmax,nexti,ik,*jl,*il;
1172: PetscErrorCode ierr;
1173: const PetscInt *aj=a->j,*col;
1176: if (type == NORM_FROBENIUS) {
1177: for (k=0; k<mbs; k++) {
1178: jmin = a->i[k]; jmax = a->i[k+1];
1179: col = aj + jmin;
1180: if (*col == k) { /* diagonal block */
1181: for (i=0; i<bs2; i++) {
1182: sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++;
1183: }
1184: jmin++;
1185: }
1186: for (j=jmin; j<jmax; j++) { /* off-diagonal blocks */
1187: for (i=0; i<bs2; i++) {
1188: sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++;
1189: }
1190: }
1191: }
1192: *norm = PetscSqrtReal(sum_diag + 2*sum_off);
1193: } else if (type == NORM_INFINITY || type == NORM_1) { /* maximum row/column sum */
1194: PetscMalloc3(bs,&sum,mbs,&il,mbs,&jl);
1195: for (i=0; i<mbs; i++) jl[i] = mbs;
1196: il[0] = 0;
1198: *norm = 0.0;
1199: for (k=0; k<mbs; k++) { /* k_th block row */
1200: for (j=0; j<bs; j++) sum[j]=0.0;
1201: /*-- col sum --*/
1202: i = jl[k]; /* first |A(i,k)| to be added */
1203: /* jl[k]=i: first nozero element in row i for submatrix A(1:k,k:n) (active window)
1204: at step k */
1205: while (i<mbs) {
1206: nexti = jl[i]; /* next block row to be added */
1207: ik = il[i]; /* block index of A(i,k) in the array a */
1208: for (j=0; j<bs; j++) {
1209: v = a->a + ik*bs2 + j*bs;
1210: for (k1=0; k1<bs; k1++) {
1211: sum[j] += PetscAbsScalar(*v); v++;
1212: }
1213: }
1214: /* update il, jl */
1215: jmin = ik + 1; /* block index of array a: points to the next nonzero of A in row i */
1216: jmax = a->i[i+1];
1217: if (jmin < jmax) {
1218: il[i] = jmin;
1219: j = a->j[jmin];
1220: jl[i] = jl[j]; jl[j]=i;
1221: }
1222: i = nexti;
1223: }
1224: /*-- row sum --*/
1225: jmin = a->i[k]; jmax = a->i[k+1];
1226: for (i=jmin; i<jmax; i++) {
1227: for (j=0; j<bs; j++) {
1228: v = a->a + i*bs2 + j;
1229: for (k1=0; k1<bs; k1++) {
1230: sum[j] += PetscAbsScalar(*v); v += bs;
1231: }
1232: }
1233: }
1234: /* add k_th block row to il, jl */
1235: col = aj+jmin;
1236: if (*col == k) jmin++;
1237: if (jmin < jmax) {
1238: il[k] = jmin;
1239: j = a->j[jmin]; jl[k] = jl[j]; jl[j] = k;
1240: }
1241: for (j=0; j<bs; j++) {
1242: if (sum[j] > *norm) *norm = sum[j];
1243: }
1244: }
1245: PetscFree3(sum,il,jl);
1246: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
1247: return(0);
1248: }
1252: PetscErrorCode MatEqual_SeqSBAIJ(Mat A,Mat B,PetscBool * flg)
1253: {
1254: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ*)B->data;
1258: /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */
1259: if ((A->rmap->N != B->rmap->N) || (A->cmap->n != B->cmap->n) || (A->rmap->bs != B->rmap->bs)|| (a->nz != b->nz)) {
1260: *flg = PETSC_FALSE;
1261: return(0);
1262: }
1264: /* if the a->i are the same */
1265: PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);
1266: if (!*flg) return(0);
1268: /* if a->j are the same */
1269: PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);
1270: if (!*flg) return(0);
1272: /* if a->a are the same */
1273: PetscMemcmp(a->a,b->a,(a->nz)*(A->rmap->bs)*(A->rmap->bs)*sizeof(PetscScalar),flg);
1274: return(0);
1275: }
1279: PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat A,Vec v)
1280: {
1281: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1282: PetscErrorCode ierr;
1283: PetscInt i,j,k,row,bs,ambs,bs2;
1284: const PetscInt *ai,*aj;
1285: PetscScalar *x,zero = 0.0;
1286: const MatScalar *aa,*aa_j;
1289: bs = A->rmap->bs;
1290: if (A->factortype && bs>1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix with bs>1");
1292: aa = a->a;
1293: ambs = a->mbs;
1295: if (A->factortype == MAT_FACTOR_CHOLESKY || A->factortype == MAT_FACTOR_ICC) {
1296: PetscInt *diag=a->diag;
1297: aa = a->a;
1298: ambs = a->mbs;
1299: VecGetArray(v,&x);
1300: for (i=0; i<ambs; i++) x[i] = 1.0/aa[diag[i]];
1301: VecRestoreArray(v,&x);
1302: return(0);
1303: }
1305: ai = a->i;
1306: aj = a->j;
1307: bs2 = a->bs2;
1308: VecSet(v,zero);
1309: VecGetArray(v,&x);
1310: for (i=0; i<ambs; i++) {
1311: j=ai[i];
1312: if (aj[j] == i) { /* if this is a diagonal element */
1313: row = i*bs;
1314: aa_j = aa + j*bs2;
1315: for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1316: }
1317: }
1318: VecRestoreArray(v,&x);
1319: return(0);
1320: }
1324: PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr)
1325: {
1326: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1327: PetscScalar x;
1328: const PetscScalar *l,*li,*ri;
1329: MatScalar *aa,*v;
1330: PetscErrorCode ierr;
1331: PetscInt i,j,k,lm,M,m,*ai,*aj,mbs,tmp,bs,bs2;
1332: PetscBool flg;
1335: if (ll != rr) {
1336: VecEqual(ll,rr,&flg);
1337: if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1338: }
1339: if (!ll) return(0);
1340: ai = a->i;
1341: aj = a->j;
1342: aa = a->a;
1343: m = A->rmap->N;
1344: bs = A->rmap->bs;
1345: mbs = a->mbs;
1346: bs2 = a->bs2;
1348: VecGetArrayRead(ll,&l);
1349: VecGetLocalSize(ll,&lm);
1350: if (lm != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
1351: for (i=0; i<mbs; i++) { /* for each block row */
1352: M = ai[i+1] - ai[i];
1353: li = l + i*bs;
1354: v = aa + bs2*ai[i];
1355: for (j=0; j<M; j++) { /* for each block */
1356: ri = l + bs*aj[ai[i]+j];
1357: for (k=0; k<bs; k++) {
1358: x = ri[k];
1359: for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x;
1360: }
1361: }
1362: }
1363: VecRestoreArrayRead(ll,&l);
1364: PetscLogFlops(2.0*a->nz);
1365: return(0);
1366: }
1370: PetscErrorCode MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info)
1371: {
1372: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1375: info->block_size = a->bs2;
1376: info->nz_allocated = a->bs2*a->maxnz; /*num. of nonzeros in upper triangular part */
1377: info->nz_used = a->bs2*a->nz; /*num. of nonzeros in upper triangular part */
1378: info->nz_unneeded = (double)(info->nz_allocated - info->nz_used);
1379: info->assemblies = A->num_ass;
1380: info->mallocs = A->info.mallocs;
1381: info->memory = ((PetscObject)A)->mem;
1382: if (A->factortype) {
1383: info->fill_ratio_given = A->info.fill_ratio_given;
1384: info->fill_ratio_needed = A->info.fill_ratio_needed;
1385: info->factor_mallocs = A->info.factor_mallocs;
1386: } else {
1387: info->fill_ratio_given = 0;
1388: info->fill_ratio_needed = 0;
1389: info->factor_mallocs = 0;
1390: }
1391: return(0);
1392: }
1397: PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat A)
1398: {
1399: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1403: PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));
1404: return(0);
1405: }
1409: /*
1410: This code does not work since it only checks the upper triangular part of
1411: the matrix. Hence it is not listed in the function table.
1412: */
1413: PetscErrorCode MatGetRowMaxAbs_SeqSBAIJ(Mat A,Vec v,PetscInt idx[])
1414: {
1415: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1416: PetscErrorCode ierr;
1417: PetscInt i,j,n,row,col,bs,mbs;
1418: const PetscInt *ai,*aj;
1419: PetscReal atmp;
1420: const MatScalar *aa;
1421: PetscScalar *x;
1422: PetscInt ncols,brow,bcol,krow,kcol;
1425: if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
1426: if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1427: bs = A->rmap->bs;
1428: aa = a->a;
1429: ai = a->i;
1430: aj = a->j;
1431: mbs = a->mbs;
1433: VecSet(v,0.0);
1434: VecGetArray(v,&x);
1435: VecGetLocalSize(v,&n);
1436: if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1437: for (i=0; i<mbs; i++) {
1438: ncols = ai[1] - ai[0]; ai++;
1439: brow = bs*i;
1440: for (j=0; j<ncols; j++) {
1441: bcol = bs*(*aj);
1442: for (kcol=0; kcol<bs; kcol++) {
1443: col = bcol + kcol; /* col index */
1444: for (krow=0; krow<bs; krow++) {
1445: atmp = PetscAbsScalar(*aa); aa++;
1446: row = brow + krow; /* row index */
1447: if (PetscRealPart(x[row]) < atmp) x[row] = atmp;
1448: if (*aj > i && PetscRealPart(x[col]) < atmp) x[col] = atmp;
1449: }
1450: }
1451: aj++;
1452: }
1453: }
1454: VecRestoreArray(v,&x);
1455: return(0);
1456: }