Actual source code: sbaij2.c
petsc-3.4.5 2014-06-29
2: #include <../src/mat/impls/baij/seq/baij.h>
3: #include <petsc-private/kernels/blockinvert.h>
4: #include <petscbt.h>
5: #include <../src/mat/impls/sbaij/seq/sbaij.h>
6: #include <petscblaslapack.h>
10: PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov)
11: {
12: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
14: PetscInt brow,i,j,k,l,mbs,n,*nidx,isz,bcol,bcol_max,start,end,*ai,*aj,bs,*nidx2;
15: const PetscInt *idx;
16: PetscBT table_out,table_in;
19: if (ov < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
20: mbs = a->mbs;
21: ai = a->i;
22: aj = a->j;
23: bs = A->rmap->bs;
24: PetscBTCreate(mbs,&table_out);
25: PetscMalloc((mbs+1)*sizeof(PetscInt),&nidx);
26: PetscMalloc((A->rmap->N+1)*sizeof(PetscInt),&nidx2);
27: PetscBTCreate(mbs,&table_in);
29: for (i=0; i<is_max; i++) { /* for each is */
30: isz = 0;
31: PetscBTMemzero(mbs,table_out);
33: /* Extract the indices, assume there can be duplicate entries */
34: ISGetIndices(is[i],&idx);
35: ISGetLocalSize(is[i],&n);
37: /* Enter these into the temp arrays i.e mark table_out[brow], enter brow into new index */
38: bcol_max = 0;
39: for (j=0; j<n; ++j) {
40: brow = idx[j]/bs; /* convert the indices into block indices */
41: if (brow >= mbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
42: if (!PetscBTLookupSet(table_out,brow)) {
43: nidx[isz++] = brow;
44: if (bcol_max < brow) bcol_max = brow;
45: }
46: }
47: ISRestoreIndices(is[i],&idx);
48: ISDestroy(&is[i]);
50: k = 0;
51: for (j=0; j<ov; j++) { /* for each overlap */
52: /* set table_in for lookup - only mark entries that are added onto nidx in (j-1)-th overlap */
53: PetscBTMemzero(mbs,table_in);
54: for (l=k; l<isz; l++) { PetscBTSet(table_in,nidx[l]); }
56: n = isz; /* length of the updated is[i] */
57: for (brow=0; brow<mbs; brow++) {
58: start = ai[brow]; end = ai[brow+1];
59: if (PetscBTLookup(table_in,brow)) { /* brow is on nidx - row search: collect all bcol in this brow */
60: for (l = start; l<end; l++) {
61: bcol = aj[l];
62: if (!PetscBTLookupSet(table_out,bcol)) {
63: nidx[isz++] = bcol;
64: if (bcol_max < bcol) bcol_max = bcol;
65: }
66: }
67: k++;
68: if (k >= n) break; /* for (brow=0; brow<mbs; brow++) */
69: } else { /* brow is not on nidx - col serach: add brow onto nidx if there is a bcol in nidx */
70: for (l = start; l<end; l++) {
71: bcol = aj[l];
72: if (bcol > bcol_max) break;
73: if (PetscBTLookup(table_in,bcol)) {
74: if (!PetscBTLookupSet(table_out,brow)) nidx[isz++] = brow;
75: break; /* for l = start; l<end ; l++) */
76: }
77: }
78: }
79: }
80: } /* for each overlap */
82: /* expand the Index Set */
83: for (j=0; j<isz; j++) {
84: for (k=0; k<bs; k++) nidx2[j*bs+k] = nidx[j]*bs+k;
85: }
86: ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,PETSC_COPY_VALUES,is+i);
87: }
88: PetscBTDestroy(&table_out);
89: PetscFree(nidx);
90: PetscFree(nidx2);
91: PetscBTDestroy(&table_in);
92: return(0);
93: }
97: PetscErrorCode MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
98: {
99: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*c;
101: PetscInt *smap,i,k,kstart,kend,oldcols = a->mbs,*lens;
102: PetscInt row,mat_i,*mat_j,tcol,*mat_ilen;
103: PetscInt nrows,*ssmap,bs=A->rmap->bs,bs2=a->bs2;
104: const PetscInt *irow,*aj = a->j,*ai = a->i;
105: MatScalar *mat_a;
106: Mat C;
107: PetscBool flag,sorted;
110: if (isrow != iscol) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
111: ISSorted(iscol,&sorted);
112: if (!sorted) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");
114: ISGetIndices(isrow,&irow);
115: ISGetSize(isrow,&nrows);
117: PetscMalloc(oldcols*sizeof(PetscInt),&smap);
118: PetscMemzero(smap,oldcols*sizeof(PetscInt));
119: ssmap = smap;
120: PetscMalloc((1+nrows)*sizeof(PetscInt),&lens);
121: for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */
122: /* determine lens of each row */
123: for (i=0; i<nrows; i++) {
124: kstart = ai[irow[i]];
125: kend = kstart + a->ilen[irow[i]];
126: lens[i] = 0;
127: for (k=kstart; k<kend; k++) {
128: if (ssmap[aj[k]]) lens[i]++;
129: }
130: }
131: /* Create and fill new matrix */
132: if (scall == MAT_REUSE_MATRIX) {
133: c = (Mat_SeqSBAIJ*)((*B)->data);
135: if (c->mbs!=nrows || (*B)->rmap->bs!=bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
136: PetscMemcmp(c->ilen,lens,c->mbs *sizeof(PetscInt),&flag);
137: if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
138: PetscMemzero(c->ilen,c->mbs*sizeof(PetscInt));
139: C = *B;
140: } else {
141: MatCreate(PetscObjectComm((PetscObject)A),&C);
142: MatSetSizes(C,nrows*bs,nrows*bs,PETSC_DETERMINE,PETSC_DETERMINE);
143: MatSetType(C,((PetscObject)A)->type_name);
144: MatSeqSBAIJSetPreallocation_SeqSBAIJ(C,bs,0,lens);
145: }
146: c = (Mat_SeqSBAIJ*)(C->data);
147: for (i=0; i<nrows; i++) {
148: row = irow[i];
149: kstart = ai[row];
150: kend = kstart + a->ilen[row];
151: mat_i = c->i[i];
152: mat_j = c->j + mat_i;
153: mat_a = c->a + mat_i*bs2;
154: mat_ilen = c->ilen + i;
155: for (k=kstart; k<kend; k++) {
156: if ((tcol=ssmap[a->j[k]])) {
157: *mat_j++ = tcol - 1;
158: PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));
159: mat_a += bs2;
160: (*mat_ilen)++;
161: }
162: }
163: }
165: /* Free work space */
166: PetscFree(smap);
167: PetscFree(lens);
168: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
169: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
171: ISRestoreIndices(isrow,&irow);
172: *B = C;
173: return(0);
174: }
178: PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
179: {
180: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
181: IS is1;
183: PetscInt *vary,*iary,nrows,i,bs=A->rmap->bs,count;
184: const PetscInt *irow;
187: if (isrow != iscol) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
189: ISGetIndices(isrow,&irow);
190: ISGetSize(isrow,&nrows);
192: /* Verify if the indices corespond to each element in a block
193: and form the IS with compressed IS */
194: PetscMalloc2(a->mbs,PetscInt,&vary,a->mbs,PetscInt,&iary);
195: PetscMemzero(vary,a->mbs*sizeof(PetscInt));
196: for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
198: count = 0;
199: for (i=0; i<a->mbs; i++) {
200: if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Index set does not match blocks");
201: if (vary[i]==bs) iary[count++] = i;
202: }
203: ISRestoreIndices(isrow,&irow);
204: ISCreateGeneral(PETSC_COMM_SELF,count,iary,PETSC_COPY_VALUES,&is1);
205: PetscFree2(vary,iary);
207: MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,scall,B);
208: ISDestroy(&is1);
209: return(0);
210: }
214: PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
215: {
217: PetscInt i;
220: if (scall == MAT_INITIAL_MATRIX) {
221: PetscMalloc((n+1)*sizeof(Mat),B);
222: }
224: for (i=0; i<n; i++) {
225: MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],scall,&(*B)[i]);
226: }
227: return(0);
228: }
230: /* -------------------------------------------------------*/
231: /* Should check that shapes of vectors and matrices match */
232: /* -------------------------------------------------------*/
236: PetscErrorCode MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz)
237: {
238: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
239: PetscScalar *x,*z,*xb,x1,x2,zero=0.0;
240: MatScalar *v;
242: PetscInt mbs =a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
243: PetscInt nonzerorow=0;
246: VecSet(zz,zero);
247: VecGetArray(xx,&x);
248: VecGetArray(zz,&z);
250: v = a->a;
251: xb = x;
253: for (i=0; i<mbs; i++) {
254: n = ai[1] - ai[0]; /* length of i_th block row of A */
255: x1 = xb[0]; x2 = xb[1];
256: ib = aj + *ai;
257: jmin = 0;
258: nonzerorow += (n>0);
259: if (*ib == i) { /* (diag of A)*x */
260: z[2*i] += v[0]*x1 + v[2]*x2;
261: z[2*i+1] += v[2]*x1 + v[3]*x2;
262: v += 4; jmin++;
263: }
264: PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
265: PetscPrefetchBlock(v+4*n,4*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
266: for (j=jmin; j<n; j++) {
267: /* (strict lower triangular part of A)*x */
268: cval = ib[j]*2;
269: z[cval] += v[0]*x1 + v[1]*x2;
270: z[cval+1] += v[2]*x1 + v[3]*x2;
271: /* (strict upper triangular part of A)*x */
272: z[2*i] += v[0]*x[cval] + v[2]*x[cval+1];
273: z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
274: v += 4;
275: }
276: xb +=2; ai++;
277: }
279: VecRestoreArray(xx,&x);
280: VecRestoreArray(zz,&z);
281: PetscLogFlops(8.0*(a->nz*2.0 - nonzerorow) - nonzerorow);
282: return(0);
283: }
287: PetscErrorCode MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz)
288: {
289: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
290: PetscScalar *x,*z,*xb,x1,x2,x3,zero=0.0;
291: MatScalar *v;
293: PetscInt mbs =a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
294: PetscInt nonzerorow=0;
297: VecSet(zz,zero);
298: VecGetArray(xx,&x);
299: VecGetArray(zz,&z);
301: v = a->a;
302: xb = x;
304: for (i=0; i<mbs; i++) {
305: n = ai[1] - ai[0]; /* length of i_th block row of A */
306: x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
307: ib = aj + *ai;
308: jmin = 0;
309: nonzerorow += (n>0);
310: if (*ib == i) { /* (diag of A)*x */
311: z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3;
312: z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
313: z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
314: v += 9; jmin++;
315: }
316: PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
317: PetscPrefetchBlock(v+9*n,9*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
318: for (j=jmin; j<n; j++) {
319: /* (strict lower triangular part of A)*x */
320: cval = ib[j]*3;
321: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3;
322: z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3;
323: z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
324: /* (strict upper triangular part of A)*x */
325: z[3*i] += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
326: z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
327: z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
328: v += 9;
329: }
330: xb +=3; ai++;
331: }
333: VecRestoreArray(xx,&x);
334: VecRestoreArray(zz,&z);
335: PetscLogFlops(18.0*(a->nz*2.0 - nonzerorow) - nonzerorow);
336: return(0);
337: }
341: PetscErrorCode MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz)
342: {
343: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
344: PetscScalar *x,*z,*xb,x1,x2,x3,x4,zero=0.0;
345: MatScalar *v;
347: PetscInt mbs =a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
348: PetscInt nonzerorow=0;
351: VecSet(zz,zero);
352: VecGetArray(xx,&x);
353: VecGetArray(zz,&z);
355: v = a->a;
356: xb = x;
358: for (i=0; i<mbs; i++) {
359: n = ai[1] - ai[0]; /* length of i_th block row of A */
360: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
361: ib = aj + *ai;
362: jmin = 0;
363: nonzerorow += (n>0);
364: if (*ib == i) { /* (diag of A)*x */
365: z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4;
366: z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4;
367: z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
368: z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
369: v += 16; jmin++;
370: }
371: PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
372: PetscPrefetchBlock(v+16*n,16*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
373: for (j=jmin; j<n; j++) {
374: /* (strict lower triangular part of A)*x */
375: cval = ib[j]*4;
376: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
377: z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
378: z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
379: z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
380: /* (strict upper triangular part of A)*x */
381: z[4*i] += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
382: z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
383: z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
384: z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
385: v += 16;
386: }
387: xb +=4; ai++;
388: }
390: VecRestoreArray(xx,&x);
391: VecRestoreArray(zz,&z);
392: PetscLogFlops(32.0*(a->nz*2.0 - nonzerorow) - nonzerorow);
393: return(0);
394: }
398: PetscErrorCode MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz)
399: {
400: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
401: PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0;
402: MatScalar *v;
404: PetscInt mbs =a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
405: PetscInt nonzerorow=0;
408: VecSet(zz,zero);
409: VecGetArray(xx,&x);
410: VecGetArray(zz,&z);
412: v = a->a;
413: xb = x;
415: for (i=0; i<mbs; i++) {
416: n = ai[1] - ai[0]; /* length of i_th block row of A */
417: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
418: ib = aj + *ai;
419: jmin = 0;
420: nonzerorow += (n>0);
421: if (*ib == i) { /* (diag of A)*x */
422: z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
423: z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
424: z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
425: z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
426: z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
427: v += 25; jmin++;
428: }
429: PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
430: PetscPrefetchBlock(v+25*n,25*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
431: for (j=jmin; j<n; j++) {
432: /* (strict lower triangular part of A)*x */
433: cval = ib[j]*5;
434: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
435: z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
436: z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
437: z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
438: z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
439: /* (strict upper triangular part of A)*x */
440: z[5*i] +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
441: z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
442: z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
443: z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
444: z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
445: v += 25;
446: }
447: xb +=5; ai++;
448: }
450: VecRestoreArray(xx,&x);
451: VecRestoreArray(zz,&z);
452: PetscLogFlops(50.0*(a->nz*2.0 - nonzerorow) - nonzerorow);
453: return(0);
454: }
459: PetscErrorCode MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz)
460: {
461: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
462: PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0;
463: MatScalar *v;
465: PetscInt mbs =a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
466: PetscInt nonzerorow=0;
469: VecSet(zz,zero);
470: VecGetArray(xx,&x);
471: VecGetArray(zz,&z);
473: v = a->a;
474: xb = x;
476: for (i=0; i<mbs; i++) {
477: n = ai[1] - ai[0]; /* length of i_th block row of A */
478: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
479: ib = aj + *ai;
480: jmin = 0;
481: nonzerorow += (n>0);
482: if (*ib == i) { /* (diag of A)*x */
483: z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
484: z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
485: z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
486: z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
487: z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
488: z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
489: v += 36; jmin++;
490: }
491: PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
492: PetscPrefetchBlock(v+36*n,36*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
493: for (j=jmin; j<n; j++) {
494: /* (strict lower triangular part of A)*x */
495: cval = ib[j]*6;
496: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
497: z[cval+1] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
498: z[cval+2] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
499: z[cval+3] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
500: z[cval+4] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
501: z[cval+5] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
502: /* (strict upper triangular part of A)*x */
503: z[6*i] +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
504: z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
505: z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
506: z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
507: z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
508: z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
509: v += 36;
510: }
511: xb +=6; ai++;
512: }
514: VecRestoreArray(xx,&x);
515: VecRestoreArray(zz,&z);
516: PetscLogFlops(72.0*(a->nz*2.0 - nonzerorow) - nonzerorow);
517: return(0);
518: }
521: PetscErrorCode MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz)
522: {
523: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
524: PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0;
525: MatScalar *v;
527: PetscInt mbs =a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
528: PetscInt nonzerorow=0;
531: VecSet(zz,zero);
532: VecGetArray(xx,&x);
533: VecGetArray(zz,&z);
535: v = a->a;
536: xb = x;
538: for (i=0; i<mbs; i++) {
539: n = ai[1] - ai[0]; /* length of i_th block row of A */
540: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
541: ib = aj + *ai;
542: jmin = 0;
543: nonzerorow += (n>0);
544: if (*ib == i) { /* (diag of A)*x */
545: z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
546: z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
547: z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
548: z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
549: z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
550: z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
551: z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
552: v += 49; jmin++;
553: }
554: PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
555: PetscPrefetchBlock(v+49*n,49*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
556: for (j=jmin; j<n; j++) {
557: /* (strict lower triangular part of A)*x */
558: cval = ib[j]*7;
559: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
560: z[cval+1] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
561: z[cval+2] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
562: z[cval+3] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
563: z[cval+4] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
564: z[cval+5] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
565: z[cval+6] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
566: /* (strict upper triangular part of A)*x */
567: z[7*i] +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
568: z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
569: z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
570: z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
571: z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
572: z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
573: z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
574: v += 49;
575: }
576: xb +=7; ai++;
577: }
578: VecRestoreArray(xx,&x);
579: VecRestoreArray(zz,&z);
580: PetscLogFlops(98.0*(a->nz*2.0 - nonzerorow) - nonzerorow);
581: return(0);
582: }
584: /*
585: This will not work with MatScalar == float because it calls the BLAS
586: */
589: PetscErrorCode MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz)
590: {
591: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
592: PetscScalar *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0;
593: MatScalar *v;
595: PetscInt mbs =a->mbs,i,*idx,*aj,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2,ncols,k;
596: PetscInt nonzerorow=0;
599: VecSet(zz,zero);
600: VecGetArray(xx,&x); x_ptr=x;
601: VecGetArray(zz,&z); z_ptr=z;
603: aj = a->j;
604: v = a->a;
605: ii = a->i;
607: if (!a->mult_work) {
608: PetscMalloc((A->rmap->N+1)*sizeof(PetscScalar),&a->mult_work);
609: }
610: work = a->mult_work;
612: for (i=0; i<mbs; i++) {
613: n = ii[1] - ii[0]; ncols = n*bs;
614: workt = work; idx=aj+ii[0];
615: nonzerorow += (n>0);
617: /* upper triangular part */
618: for (j=0; j<n; j++) {
619: xb = x_ptr + bs*(*idx++);
620: for (k=0; k<bs; k++) workt[k] = xb[k];
621: workt += bs;
622: }
623: /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
624: PetscKernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
626: /* strict lower triangular part */
627: idx = aj+ii[0];
628: if (*idx == i) {
629: ncols -= bs; v += bs2; idx++; n--;
630: }
632: if (ncols > 0) {
633: workt = work;
634: PetscMemzero(workt,ncols*sizeof(PetscScalar));
635: PetscKernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
636: for (j=0; j<n; j++) {
637: zb = z_ptr + bs*(*idx++);
638: for (k=0; k<bs; k++) zb[k] += workt[k];
639: workt += bs;
640: }
641: }
642: x += bs; v += n*bs2; z += bs; ii++;
643: }
645: VecRestoreArray(xx,&x);
646: VecRestoreArray(zz,&z);
647: PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow)*bs2 - nonzerorow);
648: return(0);
649: }
653: PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
654: {
655: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
656: PetscScalar *x,*z,*xb,x1;
657: MatScalar *v;
659: PetscInt mbs =a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
660: PetscInt nonzerorow=0;
663: VecCopy(yy,zz);
664: VecGetArray(xx,&x);
665: VecGetArray(zz,&z);
666: v = a->a;
667: xb = x;
669: for (i=0; i<mbs; i++) {
670: n = ai[1] - ai[0]; /* length of i_th row of A */
671: x1 = xb[0];
672: ib = aj + *ai;
673: jmin = 0;
674: nonzerorow += (n>0);
675: if (*ib == i) { /* (diag of A)*x */
676: z[i] += *v++ * x[*ib++]; jmin++;
677: }
678: for (j=jmin; j<n; j++) {
679: cval = *ib;
680: z[cval] += *v * x1; /* (strict lower triangular part of A)*x */
681: z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x */
682: }
683: xb++; ai++;
684: }
686: VecRestoreArray(xx,&x);
687: VecRestoreArray(zz,&z);
689: PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow));
690: return(0);
691: }
695: PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
696: {
697: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
698: PetscScalar *x,*z,*xb,x1,x2;
699: MatScalar *v;
701: PetscInt mbs =a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
702: PetscInt nonzerorow=0;
705: VecCopy(yy,zz);
706: VecGetArray(xx,&x);
707: VecGetArray(zz,&z);
709: v = a->a;
710: xb = x;
712: for (i=0; i<mbs; i++) {
713: n = ai[1] - ai[0]; /* length of i_th block row of A */
714: x1 = xb[0]; x2 = xb[1];
715: ib = aj + *ai;
716: jmin = 0;
717: nonzerorow += (n>0);
718: if (*ib == i) { /* (diag of A)*x */
719: z[2*i] += v[0]*x1 + v[2]*x2;
720: z[2*i+1] += v[2]*x1 + v[3]*x2;
721: v += 4; jmin++;
722: }
723: PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
724: PetscPrefetchBlock(v+4*n,4*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
725: for (j=jmin; j<n; j++) {
726: /* (strict lower triangular part of A)*x */
727: cval = ib[j]*2;
728: z[cval] += v[0]*x1 + v[1]*x2;
729: z[cval+1] += v[2]*x1 + v[3]*x2;
730: /* (strict upper triangular part of A)*x */
731: z[2*i] += v[0]*x[cval] + v[2]*x[cval+1];
732: z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
733: v += 4;
734: }
735: xb +=2; ai++;
736: }
737: VecRestoreArray(xx,&x);
738: VecRestoreArray(zz,&z);
740: PetscLogFlops(4.0*(a->nz*2.0 - nonzerorow));
741: return(0);
742: }
746: PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
747: {
748: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
749: PetscScalar *x,*z,*xb,x1,x2,x3;
750: MatScalar *v;
752: PetscInt mbs =a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
753: PetscInt nonzerorow=0;
756: VecCopy(yy,zz);
757: VecGetArray(xx,&x);
758: VecGetArray(zz,&z);
760: v = a->a;
761: xb = x;
763: for (i=0; i<mbs; i++) {
764: n = ai[1] - ai[0]; /* length of i_th block row of A */
765: x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
766: ib = aj + *ai;
767: jmin = 0;
768: nonzerorow += (n>0);
769: if (*ib == i) { /* (diag of A)*x */
770: z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3;
771: z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
772: z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
773: v += 9; jmin++;
774: }
775: PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
776: PetscPrefetchBlock(v+9*n,9*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
777: for (j=jmin; j<n; j++) {
778: /* (strict lower triangular part of A)*x */
779: cval = ib[j]*3;
780: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3;
781: z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3;
782: z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
783: /* (strict upper triangular part of A)*x */
784: z[3*i] += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
785: z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
786: z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
787: v += 9;
788: }
789: xb +=3; ai++;
790: }
792: VecRestoreArray(xx,&x);
793: VecRestoreArray(zz,&z);
795: PetscLogFlops(18.0*(a->nz*2.0 - nonzerorow));
796: return(0);
797: }
801: PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
802: {
803: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
804: PetscScalar *x,*z,*xb,x1,x2,x3,x4;
805: MatScalar *v;
807: PetscInt mbs =a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
808: PetscInt nonzerorow=0;
811: VecCopy(yy,zz);
812: VecGetArray(xx,&x);
813: VecGetArray(zz,&z);
815: v = a->a;
816: xb = x;
818: for (i=0; i<mbs; i++) {
819: n = ai[1] - ai[0]; /* length of i_th block row of A */
820: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
821: ib = aj + *ai;
822: jmin = 0;
823: nonzerorow += (n>0);
824: if (*ib == i) { /* (diag of A)*x */
825: z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4;
826: z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4;
827: z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
828: z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
829: v += 16; jmin++;
830: }
831: PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
832: PetscPrefetchBlock(v+16*n,16*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
833: for (j=jmin; j<n; j++) {
834: /* (strict lower triangular part of A)*x */
835: cval = ib[j]*4;
836: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
837: z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
838: z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
839: z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
840: /* (strict upper triangular part of A)*x */
841: z[4*i] += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
842: z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
843: z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
844: z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
845: v += 16;
846: }
847: xb +=4; ai++;
848: }
850: VecRestoreArray(xx,&x);
851: VecRestoreArray(zz,&z);
853: PetscLogFlops(32.0*(a->nz*2.0 - nonzerorow));
854: return(0);
855: }
859: PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
860: {
861: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
862: PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5;
863: MatScalar *v;
865: PetscInt mbs =a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
866: PetscInt nonzerorow=0;
869: VecCopy(yy,zz);
870: VecGetArray(xx,&x);
871: VecGetArray(zz,&z);
873: v = a->a;
874: xb = x;
876: for (i=0; i<mbs; i++) {
877: n = ai[1] - ai[0]; /* length of i_th block row of A */
878: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
879: ib = aj + *ai;
880: jmin = 0;
881: nonzerorow += (n>0);
882: if (*ib == i) { /* (diag of A)*x */
883: z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
884: z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
885: z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
886: z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
887: z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
888: v += 25; jmin++;
889: }
890: PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
891: PetscPrefetchBlock(v+25*n,25*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
892: for (j=jmin; j<n; j++) {
893: /* (strict lower triangular part of A)*x */
894: cval = ib[j]*5;
895: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
896: z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
897: z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
898: z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
899: z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
900: /* (strict upper triangular part of A)*x */
901: z[5*i] +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
902: z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
903: z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
904: z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
905: z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
906: v += 25;
907: }
908: xb +=5; ai++;
909: }
911: VecRestoreArray(xx,&x);
912: VecRestoreArray(zz,&z);
914: PetscLogFlops(50.0*(a->nz*2.0 - nonzerorow));
915: return(0);
916: }
919: PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
920: {
921: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
922: PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6;
923: MatScalar *v;
925: PetscInt mbs =a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
926: PetscInt nonzerorow=0;
929: VecCopy(yy,zz);
930: VecGetArray(xx,&x);
931: VecGetArray(zz,&z);
933: v = a->a;
934: xb = x;
936: for (i=0; i<mbs; i++) {
937: n = ai[1] - ai[0]; /* length of i_th block row of A */
938: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
939: ib = aj + *ai;
940: jmin = 0;
941: nonzerorow += (n>0);
942: if (*ib == i) { /* (diag of A)*x */
943: z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
944: z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
945: z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
946: z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
947: z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
948: z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
949: v += 36; jmin++;
950: }
951: PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
952: PetscPrefetchBlock(v+36*n,36*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
953: for (j=jmin; j<n; j++) {
954: /* (strict lower triangular part of A)*x */
955: cval = ib[j]*6;
956: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
957: z[cval+1] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
958: z[cval+2] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
959: z[cval+3] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
960: z[cval+4] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
961: z[cval+5] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
962: /* (strict upper triangular part of A)*x */
963: z[6*i] +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
964: z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
965: z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
966: z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
967: z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
968: z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
969: v += 36;
970: }
971: xb +=6; ai++;
972: }
974: VecRestoreArray(xx,&x);
975: VecRestoreArray(zz,&z);
977: PetscLogFlops(72.0*(a->nz*2.0 - nonzerorow));
978: return(0);
979: }
983: PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
984: {
985: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
986: PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7;
987: MatScalar *v;
989: PetscInt mbs =a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
990: PetscInt nonzerorow=0;
993: VecCopy(yy,zz);
994: VecGetArray(xx,&x);
995: VecGetArray(zz,&z);
997: v = a->a;
998: xb = x;
1000: for (i=0; i<mbs; i++) {
1001: n = ai[1] - ai[0]; /* length of i_th block row of A */
1002: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
1003: ib = aj + *ai;
1004: jmin = 0;
1005: nonzerorow += (n>0);
1006: if (*ib == i) { /* (diag of A)*x */
1007: z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
1008: z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
1009: z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
1010: z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
1011: z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
1012: z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
1013: z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
1014: v += 49; jmin++;
1015: }
1016: PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
1017: PetscPrefetchBlock(v+49*n,49*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
1018: for (j=jmin; j<n; j++) {
1019: /* (strict lower triangular part of A)*x */
1020: cval = ib[j]*7;
1021: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
1022: z[cval+1] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
1023: z[cval+2] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
1024: z[cval+3] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
1025: z[cval+4] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
1026: z[cval+5] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
1027: z[cval+6] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
1028: /* (strict upper triangular part of A)*x */
1029: z[7*i] +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
1030: z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
1031: z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
1032: z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
1033: z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
1034: z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
1035: z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
1036: v += 49;
1037: }
1038: xb +=7; ai++;
1039: }
1041: VecRestoreArray(xx,&x);
1042: VecRestoreArray(zz,&z);
1044: PetscLogFlops(98.0*(a->nz*2.0 - nonzerorow));
1045: return(0);
1046: }
1050: PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1051: {
1052: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1053: PetscScalar *x,*x_ptr,*z,*z_ptr=0,*xb,*zb,*work,*workt;
1054: MatScalar *v;
1056: PetscInt mbs =a->mbs,i,*idx,*aj,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2,ncols,k;
1057: PetscInt nonzerorow=0;
1060: VecCopy(yy,zz);
1061: VecGetArray(xx,&x); x_ptr=x;
1062: VecGetArray(zz,&z); z_ptr=z;
1064: aj = a->j;
1065: v = a->a;
1066: ii = a->i;
1068: if (!a->mult_work) {
1069: PetscMalloc((A->rmap->n+1)*sizeof(PetscScalar),&a->mult_work);
1070: }
1071: work = a->mult_work;
1074: for (i=0; i<mbs; i++) {
1075: n = ii[1] - ii[0]; ncols = n*bs;
1076: workt = work; idx=aj+ii[0];
1077: nonzerorow += (n>0);
1079: /* upper triangular part */
1080: for (j=0; j<n; j++) {
1081: xb = x_ptr + bs*(*idx++);
1082: for (k=0; k<bs; k++) workt[k] = xb[k];
1083: workt += bs;
1084: }
1085: /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
1086: PetscKernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
1088: /* strict lower triangular part */
1089: idx = aj+ii[0];
1090: if (*idx == i) {
1091: ncols -= bs; v += bs2; idx++; n--;
1092: }
1093: if (ncols > 0) {
1094: workt = work;
1095: PetscMemzero(workt,ncols*sizeof(PetscScalar));
1096: PetscKernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
1097: for (j=0; j<n; j++) {
1098: zb = z_ptr + bs*(*idx++);
1099: for (k=0; k<bs; k++) zb[k] += workt[k];
1100: workt += bs;
1101: }
1102: }
1104: x += bs; v += n*bs2; z += bs; ii++;
1105: }
1107: VecRestoreArray(xx,&x);
1108: VecRestoreArray(zz,&z);
1110: PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow));
1111: return(0);
1112: }
1116: PetscErrorCode MatScale_SeqSBAIJ(Mat inA,PetscScalar alpha)
1117: {
1118: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
1119: PetscScalar oalpha = alpha;
1121: PetscBLASInt one = 1,totalnz;
1124: PetscBLASIntCast(a->bs2*a->nz,&totalnz);
1125: PetscStackCallBLAS("BLASscal",BLASscal_(&totalnz,&oalpha,a->a,&one));
1126: PetscLogFlops(totalnz);
1127: return(0);
1128: }
1132: PetscErrorCode MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm)
1133: {
1134: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1135: MatScalar *v = a->a;
1136: PetscReal sum_diag = 0.0, sum_off = 0.0, *sum;
1137: PetscInt i,j,k,bs = A->rmap->bs,bs2=a->bs2,k1,mbs=a->mbs,*aj=a->j;
1139: PetscInt *jl,*il,jmin,jmax,nexti,ik,*col;
1142: if (type == NORM_FROBENIUS) {
1143: for (k=0; k<mbs; k++) {
1144: jmin = a->i[k]; jmax = a->i[k+1];
1145: col = aj + jmin;
1146: if (*col == k) { /* diagonal block */
1147: for (i=0; i<bs2; i++) {
1148: sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++;
1149: }
1150: jmin++;
1151: }
1152: for (j=jmin; j<jmax; j++) { /* off-diagonal blocks */
1153: for (i=0; i<bs2; i++) {
1154: sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++;
1155: }
1156: }
1157: }
1158: *norm = PetscSqrtReal(sum_diag + 2*sum_off);
1159: } else if (type == NORM_INFINITY || type == NORM_1) { /* maximum row/column sum */
1160: PetscMalloc3(bs,PetscReal,&sum,mbs,PetscInt,&il,mbs,PetscInt,&jl);
1161: for (i=0; i<mbs; i++) jl[i] = mbs;
1162: il[0] = 0;
1164: *norm = 0.0;
1165: for (k=0; k<mbs; k++) { /* k_th block row */
1166: for (j=0; j<bs; j++) sum[j]=0.0;
1167: /*-- col sum --*/
1168: i = jl[k]; /* first |A(i,k)| to be added */
1169: /* jl[k]=i: first nozero element in row i for submatrix A(1:k,k:n) (active window)
1170: at step k */
1171: while (i<mbs) {
1172: nexti = jl[i]; /* next block row to be added */
1173: ik = il[i]; /* block index of A(i,k) in the array a */
1174: for (j=0; j<bs; j++) {
1175: v = a->a + ik*bs2 + j*bs;
1176: for (k1=0; k1<bs; k1++) {
1177: sum[j] += PetscAbsScalar(*v); v++;
1178: }
1179: }
1180: /* update il, jl */
1181: jmin = ik + 1; /* block index of array a: points to the next nonzero of A in row i */
1182: jmax = a->i[i+1];
1183: if (jmin < jmax) {
1184: il[i] = jmin;
1185: j = a->j[jmin];
1186: jl[i] = jl[j]; jl[j]=i;
1187: }
1188: i = nexti;
1189: }
1190: /*-- row sum --*/
1191: jmin = a->i[k]; jmax = a->i[k+1];
1192: for (i=jmin; i<jmax; i++) {
1193: for (j=0; j<bs; j++) {
1194: v = a->a + i*bs2 + j;
1195: for (k1=0; k1<bs; k1++) {
1196: sum[j] += PetscAbsScalar(*v); v += bs;
1197: }
1198: }
1199: }
1200: /* add k_th block row to il, jl */
1201: col = aj+jmin;
1202: if (*col == k) jmin++;
1203: if (jmin < jmax) {
1204: il[k] = jmin;
1205: j = a->j[jmin]; jl[k] = jl[j]; jl[j] = k;
1206: }
1207: for (j=0; j<bs; j++) {
1208: if (sum[j] > *norm) *norm = sum[j];
1209: }
1210: }
1211: PetscFree3(sum,il,jl);
1212: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
1213: return(0);
1214: }
1218: PetscErrorCode MatEqual_SeqSBAIJ(Mat A,Mat B,PetscBool * flg)
1219: {
1220: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ*)B->data;
1224: /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */
1225: if ((A->rmap->N != B->rmap->N) || (A->cmap->n != B->cmap->n) || (A->rmap->bs != B->rmap->bs)|| (a->nz != b->nz)) {
1226: *flg = PETSC_FALSE;
1227: return(0);
1228: }
1230: /* if the a->i are the same */
1231: PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);
1232: if (!*flg) return(0);
1234: /* if a->j are the same */
1235: PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);
1236: if (!*flg) return(0);
1238: /* if a->a are the same */
1239: PetscMemcmp(a->a,b->a,(a->nz)*(A->rmap->bs)*(A->rmap->bs)*sizeof(PetscScalar),flg);
1240: return(0);
1241: }
1245: PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat A,Vec v)
1246: {
1247: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1249: PetscInt i,j,k,row,bs,*ai,*aj,ambs,bs2;
1250: PetscScalar *x,zero = 0.0;
1251: MatScalar *aa,*aa_j;
1254: bs = A->rmap->bs;
1255: if (A->factortype && bs>1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix with bs>1");
1257: aa = a->a;
1258: ambs = a->mbs;
1260: if (A->factortype == MAT_FACTOR_CHOLESKY || A->factortype == MAT_FACTOR_ICC) {
1261: PetscInt *diag=a->diag;
1262: aa = a->a;
1263: ambs = a->mbs;
1264: VecGetArray(v,&x);
1265: for (i=0; i<ambs; i++) x[i] = 1.0/aa[diag[i]];
1266: VecRestoreArray(v,&x);
1267: return(0);
1268: }
1270: ai = a->i;
1271: aj = a->j;
1272: bs2 = a->bs2;
1273: VecSet(v,zero);
1274: VecGetArray(v,&x);
1275: for (i=0; i<ambs; i++) {
1276: j=ai[i];
1277: if (aj[j] == i) { /* if this is a diagonal element */
1278: row = i*bs;
1279: aa_j = aa + j*bs2;
1280: for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1281: }
1282: }
1283: VecRestoreArray(v,&x);
1284: return(0);
1285: }
1289: PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr)
1290: {
1291: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1292: PetscScalar *l,x,*li,*ri;
1293: MatScalar *aa,*v;
1295: PetscInt i,j,k,lm,M,m,*ai,*aj,mbs,tmp,bs,bs2;
1296: PetscBool flg;
1299: if (ll != rr) {
1300: VecEqual(ll,rr,&flg);
1301: if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1302: }
1303: if (!ll) return(0);
1304: ai = a->i;
1305: aj = a->j;
1306: aa = a->a;
1307: m = A->rmap->N;
1308: bs = A->rmap->bs;
1309: mbs = a->mbs;
1310: bs2 = a->bs2;
1312: VecGetArray(ll,&l);
1313: VecGetLocalSize(ll,&lm);
1314: if (lm != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
1315: for (i=0; i<mbs; i++) { /* for each block row */
1316: M = ai[i+1] - ai[i];
1317: li = l + i*bs;
1318: v = aa + bs2*ai[i];
1319: for (j=0; j<M; j++) { /* for each block */
1320: ri = l + bs*aj[ai[i]+j];
1321: for (k=0; k<bs; k++) {
1322: x = ri[k];
1323: for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x;
1324: }
1325: }
1326: }
1327: VecRestoreArray(ll,&l);
1328: PetscLogFlops(2.0*a->nz);
1329: return(0);
1330: }
1334: PetscErrorCode MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info)
1335: {
1336: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1339: info->block_size = a->bs2;
1340: info->nz_allocated = a->bs2*a->maxnz; /*num. of nonzeros in upper triangular part */
1341: info->nz_used = a->bs2*a->nz; /*num. of nonzeros in upper triangular part */
1342: info->nz_unneeded = (double)(info->nz_allocated - info->nz_used);
1343: info->assemblies = A->num_ass;
1344: info->mallocs = A->info.mallocs;
1345: info->memory = ((PetscObject)A)->mem;
1346: if (A->factortype) {
1347: info->fill_ratio_given = A->info.fill_ratio_given;
1348: info->fill_ratio_needed = A->info.fill_ratio_needed;
1349: info->factor_mallocs = A->info.factor_mallocs;
1350: } else {
1351: info->fill_ratio_given = 0;
1352: info->fill_ratio_needed = 0;
1353: info->factor_mallocs = 0;
1354: }
1355: return(0);
1356: }
1361: PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat A)
1362: {
1363: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1367: PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));
1368: return(0);
1369: }
1373: /*
1374: This code does not work since it only checks the upper triangular part of
1375: the matrix. Hence it is not listed in the function table.
1376: */
1377: PetscErrorCode MatGetRowMaxAbs_SeqSBAIJ(Mat A,Vec v,PetscInt idx[])
1378: {
1379: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1381: PetscInt i,j,n,row,col,bs,*ai,*aj,mbs;
1382: PetscReal atmp;
1383: MatScalar *aa;
1384: PetscScalar *x;
1385: PetscInt ncols,brow,bcol,krow,kcol;
1388: if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
1389: if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1390: bs = A->rmap->bs;
1391: aa = a->a;
1392: ai = a->i;
1393: aj = a->j;
1394: mbs = a->mbs;
1396: VecSet(v,0.0);
1397: VecGetArray(v,&x);
1398: VecGetLocalSize(v,&n);
1399: if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1400: for (i=0; i<mbs; i++) {
1401: ncols = ai[1] - ai[0]; ai++;
1402: brow = bs*i;
1403: for (j=0; j<ncols; j++) {
1404: bcol = bs*(*aj);
1405: for (kcol=0; kcol<bs; kcol++) {
1406: col = bcol + kcol; /* col index */
1407: for (krow=0; krow<bs; krow++) {
1408: atmp = PetscAbsScalar(*aa); aa++;
1409: row = brow + krow; /* row index */
1410: if (PetscRealPart(x[row]) < atmp) x[row] = atmp;
1411: if (*aj > i && PetscRealPart(x[col]) < atmp) x[col] = atmp;
1412: }
1413: }
1414: aj++;
1415: }
1416: }
1417: VecRestoreArray(v,&x);
1418: return(0);
1419: }