Actual source code: sbaij2.c
petsc-3.3-p7 2013-05-11
2: #include <../src/mat/impls/baij/seq/baij.h>
3: #include <../src/mat/blockinvert.h>
4: #include <petscbt.h>
5: #include <../src/mat/impls/sbaij/seq/sbaij.h>
6: #include <petscblaslapack.h>
10: PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov)
11: {
12: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
14: PetscInt brow,i,j,k,l,mbs,n,*nidx,isz,bcol,bcol_max,start,end,*ai,*aj,bs,*nidx2;
15: const PetscInt *idx;
16: PetscBT table_out,table_in;
19: if (ov < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
20: mbs = a->mbs;
21: ai = a->i;
22: aj = a->j;
23: bs = A->rmap->bs;
24: PetscBTCreate(mbs,&table_out);
25: PetscMalloc((mbs+1)*sizeof(PetscInt),&nidx);
26: PetscMalloc((A->rmap->N+1)*sizeof(PetscInt),&nidx2);
27: PetscBTCreate(mbs,&table_in);
29: for (i=0; i<is_max; i++) { /* for each is */
30: isz = 0;
31: PetscBTMemzero(mbs,table_out);
32:
33: /* Extract the indices, assume there can be duplicate entries */
34: ISGetIndices(is[i],&idx);
35: ISGetLocalSize(is[i],&n);
37: /* Enter these into the temp arrays i.e mark table_out[brow], enter brow into new index */
38: bcol_max = 0;
39: for (j=0; j<n ; ++j){
40: brow = idx[j]/bs; /* convert the indices into block indices */
41: if (brow >= mbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
42: if(!PetscBTLookupSet(table_out,brow)) {
43: nidx[isz++] = brow;
44: if (bcol_max < brow) bcol_max = brow;
45: }
46: }
47: ISRestoreIndices(is[i],&idx);
48: ISDestroy(&is[i]);
49:
50: k = 0;
51: for (j=0; j<ov; j++){ /* for each overlap */
52: /* set table_in for lookup - only mark entries that are added onto nidx in (j-1)-th overlap */
53: PetscBTMemzero(mbs,table_in);
54: for (l=k; l<isz; l++) { PetscBTSet(table_in,nidx[l]); }
56: n = isz; /* length of the updated is[i] */
57: for (brow=0; brow<mbs; brow++){
58: start = ai[brow]; end = ai[brow+1];
59: if (PetscBTLookup(table_in,brow)){ /* brow is on nidx - row search: collect all bcol in this brow */
60: for (l = start; l<end ; l++){
61: bcol = aj[l];
62: if (!PetscBTLookupSet(table_out,bcol)) {
63: nidx[isz++] = bcol;
64: if (bcol_max < bcol) bcol_max = bcol;
65: }
66: }
67: k++;
68: if (k >= n) break; /* for (brow=0; brow<mbs; brow++) */
69: } else { /* brow is not on nidx - col serach: add brow onto nidx if there is a bcol in nidx */
70: for (l = start; l<end ; l++){
71: bcol = aj[l];
72: if (bcol > bcol_max) break;
73: if (PetscBTLookup(table_in,bcol)){
74: if (!PetscBTLookupSet(table_out,brow)) {nidx[isz++] = brow;}
75: break; /* for l = start; l<end ; l++) */
76: }
77: }
78: }
79: }
80: } /* for each overlap */
82: /* expand the Index Set */
83: for (j=0; j<isz; j++) {
84: for (k=0; k<bs; k++)
85: nidx2[j*bs+k] = nidx[j]*bs+k;
86: }
87: ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,PETSC_COPY_VALUES,is+i);
88: }
89: PetscBTDestroy(&table_out);
90: PetscFree(nidx);
91: PetscFree(nidx2);
92: PetscBTDestroy(&table_in);
93: return(0);
94: }
98: PetscErrorCode MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
99: {
100: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*c;
102: PetscInt *smap,i,k,kstart,kend,oldcols = a->mbs,*lens;
103: PetscInt row,mat_i,*mat_j,tcol,*mat_ilen;
104: PetscInt nrows,*ssmap,bs=A->rmap->bs,bs2=a->bs2;
105: const PetscInt *irow,*aj = a->j,*ai = a->i;
106: MatScalar *mat_a;
107: Mat C;
108: PetscBool flag,sorted;
111: if (isrow != iscol) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
112: ISSorted(iscol,&sorted);
113: if (!sorted) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");
115: ISGetIndices(isrow,&irow);
116: ISGetSize(isrow,&nrows);
117:
118: PetscMalloc(oldcols*sizeof(PetscInt),&smap);
119: PetscMemzero(smap,oldcols*sizeof(PetscInt));
120: ssmap = smap;
121: PetscMalloc((1+nrows)*sizeof(PetscInt),&lens);
122: for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */
123: /* determine lens of each row */
124: for (i=0; i<nrows; i++) {
125: kstart = ai[irow[i]];
126: kend = kstart + a->ilen[irow[i]];
127: lens[i] = 0;
128: for (k=kstart; k<kend; k++) {
129: if (ssmap[aj[k]]) {
130: lens[i]++;
131: }
132: }
133: }
134: /* Create and fill new matrix */
135: if (scall == MAT_REUSE_MATRIX) {
136: c = (Mat_SeqSBAIJ *)((*B)->data);
138: if (c->mbs!=nrows || (*B)->rmap->bs!=bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
139: PetscMemcmp(c->ilen,lens,c->mbs *sizeof(PetscInt),&flag);
140: if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
141: PetscMemzero(c->ilen,c->mbs*sizeof(PetscInt));
142: C = *B;
143: } else {
144: MatCreate(((PetscObject)A)->comm,&C);
145: MatSetSizes(C,nrows*bs,nrows*bs,PETSC_DETERMINE,PETSC_DETERMINE);
146: MatSetType(C,((PetscObject)A)->type_name);
147: MatSeqSBAIJSetPreallocation_SeqSBAIJ(C,bs,0,lens);
148: }
149: c = (Mat_SeqSBAIJ *)(C->data);
150: for (i=0; i<nrows; i++) {
151: row = irow[i];
152: kstart = ai[row];
153: kend = kstart + a->ilen[row];
154: mat_i = c->i[i];
155: mat_j = c->j + mat_i;
156: mat_a = c->a + mat_i*bs2;
157: mat_ilen = c->ilen + i;
158: for (k=kstart; k<kend; k++) {
159: if ((tcol=ssmap[a->j[k]])) {
160: *mat_j++ = tcol - 1;
161: PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));
162: mat_a += bs2;
163: (*mat_ilen)++;
164: }
165: }
166: }
167:
168: /* Free work space */
169: PetscFree(smap);
170: PetscFree(lens);
171: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
172: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
173:
174: ISRestoreIndices(isrow,&irow);
175: *B = C;
176: return(0);
177: }
181: PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
182: {
183: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
184: IS is1;
186: PetscInt *vary,*iary,nrows,i,bs=A->rmap->bs,count;
187: const PetscInt *irow;
190: if (isrow != iscol) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
191:
192: ISGetIndices(isrow,&irow);
193: ISGetSize(isrow,&nrows);
194:
195: /* Verify if the indices corespond to each element in a block
196: and form the IS with compressed IS */
197: PetscMalloc2(a->mbs,PetscInt,&vary,a->mbs,PetscInt,&iary);
198: PetscMemzero(vary,a->mbs*sizeof(PetscInt));
199: for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
200:
201: count = 0;
202: for (i=0; i<a->mbs; i++) {
203: if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Index set does not match blocks");
204: if (vary[i]==bs) iary[count++] = i;
205: }
206: ISRestoreIndices(isrow,&irow);
207: ISCreateGeneral(PETSC_COMM_SELF,count,iary,PETSC_COPY_VALUES,&is1);
208: PetscFree2(vary,iary);
210: MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,scall,B);
211: ISDestroy(&is1);
212: return(0);
213: }
217: PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
218: {
220: PetscInt i;
223: if (scall == MAT_INITIAL_MATRIX) {
224: PetscMalloc((n+1)*sizeof(Mat),B);
225: }
227: for (i=0; i<n; i++) {
228: MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],scall,&(*B)[i]);
229: }
230: return(0);
231: }
233: /* -------------------------------------------------------*/
234: /* Should check that shapes of vectors and matrices match */
235: /* -------------------------------------------------------*/
239: PetscErrorCode MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz)
240: {
241: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
242: PetscScalar *x,*z,*xb,x1,x2,zero=0.0;
243: MatScalar *v;
245: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
246: PetscInt nonzerorow=0;
249: VecSet(zz,zero);
250: VecGetArray(xx,&x);
251: VecGetArray(zz,&z);
252:
253: v = a->a;
254: xb = x;
256: for (i=0; i<mbs; i++) {
257: n = ai[1] - ai[0]; /* length of i_th block row of A */
258: x1 = xb[0]; x2 = xb[1];
259: ib = aj + *ai;
260: jmin = 0;
261: nonzerorow += (n>0);
262: if (*ib == i){ /* (diag of A)*x */
263: z[2*i] += v[0]*x1 + v[2]*x2;
264: z[2*i+1] += v[2]*x1 + v[3]*x2;
265: v += 4; jmin++;
266: }
267: PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
268: PetscPrefetchBlock(v+4*n,4*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
269: for (j=jmin; j<n; j++) {
270: /* (strict lower triangular part of A)*x */
271: cval = ib[j]*2;
272: z[cval] += v[0]*x1 + v[1]*x2;
273: z[cval+1] += v[2]*x1 + v[3]*x2;
274: /* (strict upper triangular part of A)*x */
275: z[2*i] += v[0]*x[cval] + v[2]*x[cval+1];
276: z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
277: v += 4;
278: }
279: xb +=2; ai++;
280: }
282: VecRestoreArray(xx,&x);
283: VecRestoreArray(zz,&z);
284: PetscLogFlops(8.0*(a->nz*2.0 - nonzerorow) - nonzerorow);
285: return(0);
286: }
290: PetscErrorCode MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz)
291: {
292: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
293: PetscScalar *x,*z,*xb,x1,x2,x3,zero=0.0;
294: MatScalar *v;
296: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
297: PetscInt nonzerorow=0;
300: VecSet(zz,zero);
301: VecGetArray(xx,&x);
302: VecGetArray(zz,&z);
303:
304: v = a->a;
305: xb = x;
307: for (i=0; i<mbs; i++) {
308: n = ai[1] - ai[0]; /* length of i_th block row of A */
309: x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
310: ib = aj + *ai;
311: jmin = 0;
312: nonzerorow += (n>0);
313: if (*ib == i){ /* (diag of A)*x */
314: z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3;
315: z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
316: z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
317: v += 9; jmin++;
318: }
319: PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
320: PetscPrefetchBlock(v+9*n,9*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
321: for (j=jmin; j<n; j++) {
322: /* (strict lower triangular part of A)*x */
323: cval = ib[j]*3;
324: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3;
325: z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3;
326: z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
327: /* (strict upper triangular part of A)*x */
328: z[3*i] += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
329: z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
330: z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
331: v += 9;
332: }
333: xb +=3; ai++;
334: }
336: VecRestoreArray(xx,&x);
337: VecRestoreArray(zz,&z);
338: PetscLogFlops(18.0*(a->nz*2.0 - nonzerorow) - nonzerorow);
339: return(0);
340: }
344: PetscErrorCode MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz)
345: {
346: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
347: PetscScalar *x,*z,*xb,x1,x2,x3,x4,zero=0.0;
348: MatScalar *v;
350: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
351: PetscInt nonzerorow=0;
354: VecSet(zz,zero);
355: VecGetArray(xx,&x);
356: VecGetArray(zz,&z);
357:
358: v = a->a;
359: xb = x;
361: for (i=0; i<mbs; i++) {
362: n = ai[1] - ai[0]; /* length of i_th block row of A */
363: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
364: ib = aj + *ai;
365: jmin = 0;
366: nonzerorow += (n>0);
367: if (*ib == i){ /* (diag of A)*x */
368: z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4;
369: z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4;
370: z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
371: z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
372: v += 16; jmin++;
373: }
374: PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
375: PetscPrefetchBlock(v+16*n,16*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
376: for (j=jmin; j<n; j++) {
377: /* (strict lower triangular part of A)*x */
378: cval = ib[j]*4;
379: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
380: z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
381: z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
382: z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
383: /* (strict upper triangular part of A)*x */
384: z[4*i] += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
385: z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
386: z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
387: z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
388: v += 16;
389: }
390: xb +=4; ai++;
391: }
393: VecRestoreArray(xx,&x);
394: VecRestoreArray(zz,&z);
395: PetscLogFlops(32.0*(a->nz*2.0 - nonzerorow) - nonzerorow);
396: return(0);
397: }
401: PetscErrorCode MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz)
402: {
403: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
404: PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0;
405: MatScalar *v;
407: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
408: PetscInt nonzerorow=0;
411: VecSet(zz,zero);
412: VecGetArray(xx,&x);
413: VecGetArray(zz,&z);
414:
415: v = a->a;
416: xb = x;
418: for (i=0; i<mbs; i++) {
419: n = ai[1] - ai[0]; /* length of i_th block row of A */
420: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
421: ib = aj + *ai;
422: jmin = 0;
423: nonzerorow += (n>0);
424: if (*ib == i){ /* (diag of A)*x */
425: z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
426: z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
427: z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
428: z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
429: z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
430: v += 25; jmin++;
431: }
432: PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
433: PetscPrefetchBlock(v+25*n,25*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
434: for (j=jmin; j<n; j++) {
435: /* (strict lower triangular part of A)*x */
436: cval = ib[j]*5;
437: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
438: z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
439: z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
440: z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
441: z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
442: /* (strict upper triangular part of A)*x */
443: z[5*i] +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
444: z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
445: z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
446: z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
447: z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
448: v += 25;
449: }
450: xb +=5; ai++;
451: }
453: VecRestoreArray(xx,&x);
454: VecRestoreArray(zz,&z);
455: PetscLogFlops(50.0*(a->nz*2.0 - nonzerorow) - nonzerorow);
456: return(0);
457: }
462: PetscErrorCode MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz)
463: {
464: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
465: PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0;
466: MatScalar *v;
468: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
469: PetscInt nonzerorow=0;
472: VecSet(zz,zero);
473: VecGetArray(xx,&x);
474: VecGetArray(zz,&z);
475:
476: v = a->a;
477: xb = x;
479: for (i=0; i<mbs; i++) {
480: n = ai[1] - ai[0]; /* length of i_th block row of A */
481: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
482: ib = aj + *ai;
483: jmin = 0;
484: nonzerorow += (n>0);
485: if (*ib == i){ /* (diag of A)*x */
486: z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
487: z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
488: z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
489: z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
490: z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
491: z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
492: v += 36; jmin++;
493: }
494: PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
495: PetscPrefetchBlock(v+36*n,36*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
496: for (j=jmin; j<n; j++) {
497: /* (strict lower triangular part of A)*x */
498: cval = ib[j]*6;
499: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
500: z[cval+1] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
501: z[cval+2] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
502: z[cval+3] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
503: z[cval+4] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
504: z[cval+5] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
505: /* (strict upper triangular part of A)*x */
506: z[6*i] +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
507: z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
508: z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
509: z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
510: z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
511: z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
512: v += 36;
513: }
514: xb +=6; ai++;
515: }
517: VecRestoreArray(xx,&x);
518: VecRestoreArray(zz,&z);
519: PetscLogFlops(72.0*(a->nz*2.0 - nonzerorow) - nonzerorow);
520: return(0);
521: }
524: PetscErrorCode MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz)
525: {
526: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
527: PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0;
528: MatScalar *v;
530: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
531: PetscInt nonzerorow=0;
534: VecSet(zz,zero);
535: VecGetArray(xx,&x);
536: VecGetArray(zz,&z);
537:
538: v = a->a;
539: xb = x;
541: for (i=0; i<mbs; i++) {
542: n = ai[1] - ai[0]; /* length of i_th block row of A */
543: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
544: ib = aj + *ai;
545: jmin = 0;
546: nonzerorow += (n>0);
547: if (*ib == i){ /* (diag of A)*x */
548: z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
549: z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
550: z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
551: z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
552: z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
553: z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
554: z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
555: v += 49; jmin++;
556: }
557: PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
558: PetscPrefetchBlock(v+49*n,49*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
559: for (j=jmin; j<n; j++) {
560: /* (strict lower triangular part of A)*x */
561: cval = ib[j]*7;
562: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
563: z[cval+1] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
564: z[cval+2] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
565: z[cval+3] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
566: z[cval+4] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
567: z[cval+5] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
568: z[cval+6] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
569: /* (strict upper triangular part of A)*x */
570: z[7*i] +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
571: z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
572: z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
573: z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
574: z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
575: z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
576: z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
577: v += 49;
578: }
579: xb +=7; ai++;
580: }
581: VecRestoreArray(xx,&x);
582: VecRestoreArray(zz,&z);
583: PetscLogFlops(98.0*(a->nz*2.0 - nonzerorow) - nonzerorow);
584: return(0);
585: }
587: /*
588: This will not work with MatScalar == float because it calls the BLAS
589: */
592: PetscErrorCode MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz)
593: {
594: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
595: PetscScalar *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0;
596: MatScalar *v;
598: PetscInt mbs=a->mbs,i,*idx,*aj,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2,ncols,k;
599: PetscInt nonzerorow=0;
602: VecSet(zz,zero);
603: VecGetArray(xx,&x); x_ptr=x;
604: VecGetArray(zz,&z); z_ptr=z;
606: aj = a->j;
607: v = a->a;
608: ii = a->i;
610: if (!a->mult_work) {
611: PetscMalloc((A->rmap->N+1)*sizeof(PetscScalar),&a->mult_work);
612: }
613: work = a->mult_work;
614:
615: for (i=0; i<mbs; i++) {
616: n = ii[1] - ii[0]; ncols = n*bs;
617: workt = work; idx=aj+ii[0];
618: nonzerorow += (n>0);
620: /* upper triangular part */
621: for (j=0; j<n; j++) {
622: xb = x_ptr + bs*(*idx++);
623: for (k=0; k<bs; k++) workt[k] = xb[k];
624: workt += bs;
625: }
626: /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
627: PetscKernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
628:
629: /* strict lower triangular part */
630: idx = aj+ii[0];
631: if (*idx == i){
632: ncols -= bs; v += bs2; idx++; n--;
633: }
634:
635: if (ncols > 0){
636: workt = work;
637: PetscMemzero(workt,ncols*sizeof(PetscScalar));
638: PetscKernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
639: for (j=0; j<n; j++) {
640: zb = z_ptr + bs*(*idx++);
641: for (k=0; k<bs; k++) zb[k] += workt[k] ;
642: workt += bs;
643: }
644: }
645: x += bs; v += n*bs2; z += bs; ii++;
646: }
647:
648: VecRestoreArray(xx,&x);
649: VecRestoreArray(zz,&z);
650: PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow)*bs2 - nonzerorow);
651: return(0);
652: }
656: PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
657: {
658: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
659: PetscScalar *x,*z,*xb,x1;
660: MatScalar *v;
662: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
663: PetscInt nonzerorow=0;
666: VecCopy(yy,zz);
667: VecGetArray(xx,&x);
668: VecGetArray(zz,&z);
669: v = a->a;
670: xb = x;
672: for (i=0; i<mbs; i++) {
673: n = ai[1] - ai[0]; /* length of i_th row of A */
674: x1 = xb[0];
675: ib = aj + *ai;
676: jmin = 0;
677: nonzerorow += (n>0);
678: if (*ib == i) { /* (diag of A)*x */
679: z[i] += *v++ * x[*ib++]; jmin++;
680: }
681: for (j=jmin; j<n; j++) {
682: cval = *ib;
683: z[cval] += *v * x1; /* (strict lower triangular part of A)*x */
684: z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x */
685: }
686: xb++; ai++;
687: }
689: VecRestoreArray(xx,&x);
690: VecRestoreArray(zz,&z);
691:
692: PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow));
693: return(0);
694: }
698: PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
699: {
700: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
701: PetscScalar *x,*z,*xb,x1,x2;
702: MatScalar *v;
704: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
705: PetscInt nonzerorow=0;
708: VecCopy(yy,zz);
709: VecGetArray(xx,&x);
710: VecGetArray(zz,&z);
712: v = a->a;
713: xb = x;
715: for (i=0; i<mbs; i++) {
716: n = ai[1] - ai[0]; /* length of i_th block row of A */
717: x1 = xb[0]; x2 = xb[1];
718: ib = aj + *ai;
719: jmin = 0;
720: nonzerorow += (n>0);
721: if (*ib == i){ /* (diag of A)*x */
722: z[2*i] += v[0]*x1 + v[2]*x2;
723: z[2*i+1] += v[2]*x1 + v[3]*x2;
724: v += 4; jmin++;
725: }
726: PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
727: PetscPrefetchBlock(v+4*n,4*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
728: for (j=jmin; j<n; j++) {
729: /* (strict lower triangular part of A)*x */
730: cval = ib[j]*2;
731: z[cval] += v[0]*x1 + v[1]*x2;
732: z[cval+1] += v[2]*x1 + v[3]*x2;
733: /* (strict upper triangular part of A)*x */
734: z[2*i] += v[0]*x[cval] + v[2]*x[cval+1];
735: z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
736: v += 4;
737: }
738: xb +=2; ai++;
739: }
740: VecRestoreArray(xx,&x);
741: VecRestoreArray(zz,&z);
743: PetscLogFlops(4.0*(a->nz*2.0 - nonzerorow));
744: return(0);
745: }
749: PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
750: {
751: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
752: PetscScalar *x,*z,*xb,x1,x2,x3;
753: MatScalar *v;
755: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
756: PetscInt nonzerorow=0;
759: VecCopy(yy,zz);
760: VecGetArray(xx,&x);
761: VecGetArray(zz,&z);
763: v = a->a;
764: xb = x;
766: for (i=0; i<mbs; i++) {
767: n = ai[1] - ai[0]; /* length of i_th block row of A */
768: x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
769: ib = aj + *ai;
770: jmin = 0;
771: nonzerorow += (n>0);
772: if (*ib == i){ /* (diag of A)*x */
773: z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3;
774: z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
775: z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
776: v += 9; jmin++;
777: }
778: PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
779: PetscPrefetchBlock(v+9*n,9*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
780: for (j=jmin; j<n; j++) {
781: /* (strict lower triangular part of A)*x */
782: cval = ib[j]*3;
783: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3;
784: z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3;
785: z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
786: /* (strict upper triangular part of A)*x */
787: z[3*i] += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
788: z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
789: z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
790: v += 9;
791: }
792: xb +=3; ai++;
793: }
795: VecRestoreArray(xx,&x);
796: VecRestoreArray(zz,&z);
798: PetscLogFlops(18.0*(a->nz*2.0 - nonzerorow));
799: return(0);
800: }
804: PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
805: {
806: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
807: PetscScalar *x,*z,*xb,x1,x2,x3,x4;
808: MatScalar *v;
810: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
811: PetscInt nonzerorow=0;
814: VecCopy(yy,zz);
815: VecGetArray(xx,&x);
816: VecGetArray(zz,&z);
818: v = a->a;
819: xb = x;
821: for (i=0; i<mbs; i++) {
822: n = ai[1] - ai[0]; /* length of i_th block row of A */
823: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
824: ib = aj + *ai;
825: jmin = 0;
826: nonzerorow += (n>0);
827: if (*ib == i){ /* (diag of A)*x */
828: z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4;
829: z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4;
830: z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
831: z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
832: v += 16; jmin++;
833: }
834: PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
835: PetscPrefetchBlock(v+16*n,16*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
836: for (j=jmin; j<n; j++) {
837: /* (strict lower triangular part of A)*x */
838: cval = ib[j]*4;
839: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
840: z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
841: z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
842: z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
843: /* (strict upper triangular part of A)*x */
844: z[4*i] += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
845: z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
846: z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
847: z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
848: v += 16;
849: }
850: xb +=4; ai++;
851: }
853: VecRestoreArray(xx,&x);
854: VecRestoreArray(zz,&z);
856: PetscLogFlops(32.0*(a->nz*2.0 - nonzerorow));
857: return(0);
858: }
862: PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
863: {
864: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
865: PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5;
866: MatScalar *v;
868: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
869: PetscInt nonzerorow=0;
872: VecCopy(yy,zz);
873: VecGetArray(xx,&x);
874: VecGetArray(zz,&z);
876: v = a->a;
877: xb = x;
879: for (i=0; i<mbs; i++) {
880: n = ai[1] - ai[0]; /* length of i_th block row of A */
881: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
882: ib = aj + *ai;
883: jmin = 0;
884: nonzerorow += (n>0);
885: if (*ib == i){ /* (diag of A)*x */
886: z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
887: z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
888: z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
889: z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
890: z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
891: v += 25; jmin++;
892: }
893: PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
894: PetscPrefetchBlock(v+25*n,25*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
895: for (j=jmin; j<n; j++) {
896: /* (strict lower triangular part of A)*x */
897: cval = ib[j]*5;
898: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
899: z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
900: z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
901: z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
902: z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
903: /* (strict upper triangular part of A)*x */
904: z[5*i] +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
905: z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
906: z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
907: z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
908: z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
909: v += 25;
910: }
911: xb +=5; ai++;
912: }
914: VecRestoreArray(xx,&x);
915: VecRestoreArray(zz,&z);
917: PetscLogFlops(50.0*(a->nz*2.0 - nonzerorow));
918: return(0);
919: }
922: PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
923: {
924: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
925: PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6;
926: MatScalar *v;
928: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
929: PetscInt nonzerorow=0;
932: VecCopy(yy,zz);
933: VecGetArray(xx,&x);
934: VecGetArray(zz,&z);
936: v = a->a;
937: xb = x;
939: for (i=0; i<mbs; i++) {
940: n = ai[1] - ai[0]; /* length of i_th block row of A */
941: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
942: ib = aj + *ai;
943: jmin = 0;
944: nonzerorow += (n>0);
945: if (*ib == i){ /* (diag of A)*x */
946: z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
947: z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
948: z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
949: z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
950: z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
951: z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
952: v += 36; jmin++;
953: }
954: PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
955: PetscPrefetchBlock(v+36*n,36*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
956: for (j=jmin; j<n; j++) {
957: /* (strict lower triangular part of A)*x */
958: cval = ib[j]*6;
959: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
960: z[cval+1] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
961: z[cval+2] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
962: z[cval+3] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
963: z[cval+4] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
964: z[cval+5] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
965: /* (strict upper triangular part of A)*x */
966: z[6*i] +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
967: z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
968: z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
969: z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
970: z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
971: z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
972: v += 36;
973: }
974: xb +=6; ai++;
975: }
977: VecRestoreArray(xx,&x);
978: VecRestoreArray(zz,&z);
980: PetscLogFlops(72.0*(a->nz*2.0 - nonzerorow));
981: return(0);
982: }
986: PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
987: {
988: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
989: PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7;
990: MatScalar *v;
992: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
993: PetscInt nonzerorow=0;
996: VecCopy(yy,zz);
997: VecGetArray(xx,&x);
998: VecGetArray(zz,&z);
1000: v = a->a;
1001: xb = x;
1003: for (i=0; i<mbs; i++) {
1004: n = ai[1] - ai[0]; /* length of i_th block row of A */
1005: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
1006: ib = aj + *ai;
1007: jmin = 0;
1008: nonzerorow += (n>0);
1009: if (*ib == i){ /* (diag of A)*x */
1010: z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
1011: z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
1012: z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
1013: z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
1014: z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
1015: z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
1016: z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
1017: v += 49; jmin++;
1018: }
1019: PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
1020: PetscPrefetchBlock(v+49*n,49*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
1021: for (j=jmin; j<n; j++) {
1022: /* (strict lower triangular part of A)*x */
1023: cval = ib[j]*7;
1024: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
1025: z[cval+1] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
1026: z[cval+2] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
1027: z[cval+3] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
1028: z[cval+4] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
1029: z[cval+5] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
1030: z[cval+6] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
1031: /* (strict upper triangular part of A)*x */
1032: z[7*i] +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
1033: z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
1034: z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
1035: z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
1036: z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
1037: z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
1038: z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
1039: v += 49;
1040: }
1041: xb +=7; ai++;
1042: }
1044: VecRestoreArray(xx,&x);
1045: VecRestoreArray(zz,&z);
1047: PetscLogFlops(98.0*(a->nz*2.0 - nonzerorow));
1048: return(0);
1049: }
1053: PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1054: {
1055: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1056: PetscScalar *x,*x_ptr,*z,*z_ptr=0,*xb,*zb,*work,*workt;
1057: MatScalar *v;
1059: PetscInt mbs=a->mbs,i,*idx,*aj,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2,ncols,k;
1060: PetscInt nonzerorow=0;
1063: VecCopy(yy,zz);
1064: VecGetArray(xx,&x); x_ptr=x;
1065: VecGetArray(zz,&z); z_ptr=z;
1067: aj = a->j;
1068: v = a->a;
1069: ii = a->i;
1071: if (!a->mult_work) {
1072: PetscMalloc((A->rmap->n+1)*sizeof(PetscScalar),&a->mult_work);
1073: }
1074: work = a->mult_work;
1075:
1076:
1077: for (i=0; i<mbs; i++) {
1078: n = ii[1] - ii[0]; ncols = n*bs;
1079: workt = work; idx=aj+ii[0];
1080: nonzerorow += (n>0);
1082: /* upper triangular part */
1083: for (j=0; j<n; j++) {
1084: xb = x_ptr + bs*(*idx++);
1085: for (k=0; k<bs; k++) workt[k] = xb[k];
1086: workt += bs;
1087: }
1088: /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
1089: PetscKernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
1091: /* strict lower triangular part */
1092: idx = aj+ii[0];
1093: if (*idx == i){
1094: ncols -= bs; v += bs2; idx++; n--;
1095: }
1096: if (ncols > 0){
1097: workt = work;
1098: PetscMemzero(workt,ncols*sizeof(PetscScalar));
1099: PetscKernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
1100: for (j=0; j<n; j++) {
1101: zb = z_ptr + bs*(*idx++);
1102: for (k=0; k<bs; k++) zb[k] += workt[k] ;
1103: workt += bs;
1104: }
1105: }
1107: x += bs; v += n*bs2; z += bs; ii++;
1108: }
1110: VecRestoreArray(xx,&x);
1111: VecRestoreArray(zz,&z);
1113: PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow));
1114: return(0);
1115: }
1119: PetscErrorCode MatScale_SeqSBAIJ(Mat inA,PetscScalar alpha)
1120: {
1121: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
1122: PetscScalar oalpha = alpha;
1124: PetscBLASInt one = 1,totalnz = PetscBLASIntCast(a->bs2*a->nz);
1127: BLASscal_(&totalnz,&oalpha,a->a,&one);
1128: PetscLogFlops(totalnz);
1129: return(0);
1130: }
1134: PetscErrorCode MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm)
1135: {
1136: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1137: MatScalar *v = a->a;
1138: PetscReal sum_diag = 0.0, sum_off = 0.0, *sum;
1139: PetscInt i,j,k,bs = A->rmap->bs,bs2=a->bs2,k1,mbs=a->mbs,*aj=a->j;
1141: PetscInt *jl,*il,jmin,jmax,nexti,ik,*col;
1142:
1144: if (type == NORM_FROBENIUS) {
1145: for (k=0; k<mbs; k++){
1146: jmin = a->i[k]; jmax = a->i[k+1];
1147: col = aj + jmin;
1148: if (*col == k){ /* diagonal block */
1149: for (i=0; i<bs2; i++){
1150: #if defined(PETSC_USE_COMPLEX)
1151: sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++;
1152: #else
1153: sum_diag += (*v)*(*v); v++;
1154: #endif
1155: }
1156: jmin++;
1157: }
1158: for (j=jmin; j<jmax; j++){ /* off-diagonal blocks */
1159: for (i=0; i<bs2; i++){
1160: #if defined(PETSC_USE_COMPLEX)
1161: sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++;
1162: #else
1163: sum_off += (*v)*(*v); v++;
1164: #endif
1165: }
1166: }
1167: }
1168: *norm = PetscSqrtReal(sum_diag + 2*sum_off);
1169: } else if (type == NORM_INFINITY || type == NORM_1) { /* maximum row/column sum */
1170: PetscMalloc3(bs,PetscReal,&sum,mbs,PetscInt,&il,mbs,PetscInt,&jl);
1171: for (i=0; i<mbs; i++) jl[i] = mbs;
1172: il[0] = 0;
1174: *norm = 0.0;
1175: for (k=0; k<mbs; k++) { /* k_th block row */
1176: for (j=0; j<bs; j++) sum[j]=0.0;
1177: /*-- col sum --*/
1178: i = jl[k]; /* first |A(i,k)| to be added */
1179: /* jl[k]=i: first nozero element in row i for submatrix A(1:k,k:n) (active window)
1180: at step k */
1181: while (i<mbs){
1182: nexti = jl[i]; /* next block row to be added */
1183: ik = il[i]; /* block index of A(i,k) in the array a */
1184: for (j=0; j<bs; j++){
1185: v = a->a + ik*bs2 + j*bs;
1186: for (k1=0; k1<bs; k1++) {
1187: sum[j] += PetscAbsScalar(*v); v++;
1188: }
1189: }
1190: /* update il, jl */
1191: jmin = ik + 1; /* block index of array a: points to the next nonzero of A in row i */
1192: jmax = a->i[i+1];
1193: if (jmin < jmax){
1194: il[i] = jmin;
1195: j = a->j[jmin];
1196: jl[i] = jl[j]; jl[j]=i;
1197: }
1198: i = nexti;
1199: }
1200: /*-- row sum --*/
1201: jmin = a->i[k]; jmax = a->i[k+1];
1202: for (i=jmin; i<jmax; i++) {
1203: for (j=0; j<bs; j++){
1204: v = a->a + i*bs2 + j;
1205: for (k1=0; k1<bs; k1++){
1206: sum[j] += PetscAbsScalar(*v); v += bs;
1207: }
1208: }
1209: }
1210: /* add k_th block row to il, jl */
1211: col = aj+jmin;
1212: if (*col == k) jmin++;
1213: if (jmin < jmax){
1214: il[k] = jmin;
1215: j = a->j[jmin]; jl[k] = jl[j]; jl[j] = k;
1216: }
1217: for (j=0; j<bs; j++){
1218: if (sum[j] > *norm) *norm = sum[j];
1219: }
1220: }
1221: PetscFree3(sum,il,jl);
1222: } else {
1223: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
1224: }
1225: return(0);
1226: }
1230: PetscErrorCode MatEqual_SeqSBAIJ(Mat A,Mat B,PetscBool * flg)
1231: {
1232: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data,*b = (Mat_SeqSBAIJ *)B->data;
1237: /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */
1238: if ((A->rmap->N != B->rmap->N) || (A->cmap->n != B->cmap->n) || (A->rmap->bs != B->rmap->bs)|| (a->nz != b->nz)) {
1239: *flg = PETSC_FALSE;
1240: return(0);
1241: }
1242:
1243: /* if the a->i are the same */
1244: PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);
1245: if (!*flg) {
1246: return(0);
1247: }
1248:
1249: /* if a->j are the same */
1250: PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);
1251: if (!*flg) {
1252: return(0);
1253: }
1254: /* if a->a are the same */
1255: PetscMemcmp(a->a,b->a,(a->nz)*(A->rmap->bs)*(A->rmap->bs)*sizeof(PetscScalar),flg);
1256: return(0);
1257: }
1261: PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat A,Vec v)
1262: {
1263: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1265: PetscInt i,j,k,row,bs,*ai,*aj,ambs,bs2;
1266: PetscScalar *x,zero = 0.0;
1267: MatScalar *aa,*aa_j;
1270: bs = A->rmap->bs;
1271: if (A->factortype && bs>1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix with bs>1");
1272:
1273: aa = a->a;
1274: ambs = a->mbs;
1276: if (A->factortype == MAT_FACTOR_CHOLESKY || A->factortype == MAT_FACTOR_ICC){
1277: PetscInt *diag=a->diag;
1278: aa = a->a;
1279: ambs = a->mbs;
1280: VecGetArray(v,&x);
1281: for (i=0; i<ambs; i++) x[i] = 1.0/aa[diag[i]];
1282: VecRestoreArray(v,&x);
1283: return(0);
1284: }
1285:
1286: ai = a->i;
1287: aj = a->j;
1288: bs2 = a->bs2;
1289: VecSet(v,zero);
1290: VecGetArray(v,&x);
1291: for (i=0; i<ambs; i++) {
1292: j=ai[i];
1293: if (aj[j] == i) { /* if this is a diagonal element */
1294: row = i*bs;
1295: aa_j = aa + j*bs2;
1296: for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1297: }
1298: }
1299: VecRestoreArray(v,&x);
1300: return(0);
1301: }
1305: PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr)
1306: {
1307: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1308: PetscScalar *l,x,*li,*ri;
1309: MatScalar *aa,*v;
1311: PetscInt i,j,k,lm,M,m,*ai,*aj,mbs,tmp,bs,bs2;
1312: PetscBool flg;
1315: if (ll != rr){
1316: VecEqual(ll,rr,&flg);
1317: if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1318: }
1319: if (!ll) return(0);
1320: ai = a->i;
1321: aj = a->j;
1322: aa = a->a;
1323: m = A->rmap->N;
1324: bs = A->rmap->bs;
1325: mbs = a->mbs;
1326: bs2 = a->bs2;
1328: VecGetArray(ll,&l);
1329: VecGetLocalSize(ll,&lm);
1330: if (lm != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
1331: for (i=0; i<mbs; i++) { /* for each block row */
1332: M = ai[i+1] - ai[i];
1333: li = l + i*bs;
1334: v = aa + bs2*ai[i];
1335: for (j=0; j<M; j++) { /* for each block */
1336: ri = l + bs*aj[ai[i]+j];
1337: for (k=0; k<bs; k++) {
1338: x = ri[k];
1339: for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x;
1340: }
1341: }
1342: }
1343: VecRestoreArray(ll,&l);
1344: PetscLogFlops(2.0*a->nz);
1345: return(0);
1346: }
1350: PetscErrorCode MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info)
1351: {
1352: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1355: info->block_size = a->bs2;
1356: info->nz_allocated = a->bs2*a->maxnz; /*num. of nonzeros in upper triangular part */
1357: info->nz_used = a->bs2*a->nz; /*num. of nonzeros in upper triangular part */
1358: info->nz_unneeded = (double)(info->nz_allocated - info->nz_used);
1359: info->assemblies = A->num_ass;
1360: info->mallocs = A->info.mallocs;
1361: info->memory = ((PetscObject)A)->mem;
1362: if (A->factortype) {
1363: info->fill_ratio_given = A->info.fill_ratio_given;
1364: info->fill_ratio_needed = A->info.fill_ratio_needed;
1365: info->factor_mallocs = A->info.factor_mallocs;
1366: } else {
1367: info->fill_ratio_given = 0;
1368: info->fill_ratio_needed = 0;
1369: info->factor_mallocs = 0;
1370: }
1371: return(0);
1372: }
1377: PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat A)
1378: {
1379: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1383: PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));
1384: return(0);
1385: }
1389: /*
1390: This code does not work since it only checks the upper triangular part of
1391: the matrix. Hence it is not listed in the function table.
1392: */
1393: PetscErrorCode MatGetRowMaxAbs_SeqSBAIJ(Mat A,Vec v,PetscInt idx[])
1394: {
1395: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1397: PetscInt i,j,n,row,col,bs,*ai,*aj,mbs;
1398: PetscReal atmp;
1399: MatScalar *aa;
1400: PetscScalar *x;
1401: PetscInt ncols,brow,bcol,krow,kcol;
1404: if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
1405: if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1406: bs = A->rmap->bs;
1407: aa = a->a;
1408: ai = a->i;
1409: aj = a->j;
1410: mbs = a->mbs;
1412: VecSet(v,0.0);
1413: VecGetArray(v,&x);
1414: VecGetLocalSize(v,&n);
1415: if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1416: for (i=0; i<mbs; i++) {
1417: ncols = ai[1] - ai[0]; ai++;
1418: brow = bs*i;
1419: for (j=0; j<ncols; j++){
1420: bcol = bs*(*aj);
1421: for (kcol=0; kcol<bs; kcol++){
1422: col = bcol + kcol; /* col index */
1423: for (krow=0; krow<bs; krow++){
1424: atmp = PetscAbsScalar(*aa); aa++;
1425: row = brow + krow; /* row index */
1426: if (PetscRealPart(x[row]) < atmp) x[row] = atmp;
1427: if (*aj > i && PetscRealPart(x[col]) < atmp) x[col] = atmp;
1428: }
1429: }
1430: aj++;
1431: }
1432: }
1433: VecRestoreArray(v,&x);
1434: return(0);
1435: }