Actual source code: baij.c
2: /*
3: Defines the basic matrix operations for the BAIJ (compressed row)
4: matrix storage format.
5: */
6: #include <../src/mat/impls/baij/seq/baij.h>
7: #include <petscblaslapack.h>
8: #include <petsc/private/kernels/blockinvert.h>
9: #include <petsc/private/kernels/blockmatmult.h>
11: #if defined(PETSC_HAVE_HYPRE)
12: PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
13: #endif
15: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
16: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*);
17: #endif
18: PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
20: PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A,PetscInt type,PetscReal *reductions)
21: {
23: Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) A->data;
24: PetscInt m,n,i;
25: PetscInt ib,jb,bs = A->rmap->bs;
26: MatScalar *a_val = a_aij->a;
29: MatGetSize(A,&m,&n);
30: for (i=0; i<n; i++) reductions[i] = 0.0;
31: if (type == NORM_2) {
32: for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
33: for (jb=0; jb<bs; jb++) {
34: for (ib=0; ib<bs; ib++) {
35: reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
36: a_val++;
37: }
38: }
39: }
40: } else if (type == NORM_1) {
41: for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
42: for (jb=0; jb<bs; jb++) {
43: for (ib=0; ib<bs; ib++) {
44: reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
45: a_val++;
46: }
47: }
48: }
49: } else if (type == NORM_INFINITY) {
50: for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
51: for (jb=0; jb<bs; jb++) {
52: for (ib=0; ib<bs; ib++) {
53: int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
54: reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]);
55: a_val++;
56: }
57: }
58: }
59: } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
60: for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
61: for (jb=0; jb<bs; jb++) {
62: for (ib=0; ib<bs; ib++) {
63: reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
64: a_val++;
65: }
66: }
67: }
68: } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
69: for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
70: for (jb=0; jb<bs; jb++) {
71: for (ib=0; ib<bs; ib++) {
72: reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
73: a_val++;
74: }
75: }
76: }
77: } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type");
78: if (type == NORM_2) {
79: for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
80: } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
81: for (i=0; i<n; i++) reductions[i] /= m;
82: }
83: return(0);
84: }
86: PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
87: {
88: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data;
90: PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
91: MatScalar *v = a->a,*odiag,*diag,work[25],*v_work;
92: PetscReal shift = 0.0;
93: PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE;
96: allowzeropivot = PetscNot(A->erroriffailure);
98: if (a->idiagvalid) {
99: if (values) *values = a->idiag;
100: return(0);
101: }
102: MatMarkDiagonal_SeqBAIJ(A);
103: diag_offset = a->diag;
104: if (!a->idiag) {
105: PetscMalloc1(bs2*mbs,&a->idiag);
106: PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));
107: }
108: diag = a->idiag;
109: if (values) *values = a->idiag;
110: /* factor and invert each block */
111: switch (bs) {
112: case 1:
113: for (i=0; i<mbs; i++) {
114: odiag = v + 1*diag_offset[i];
115: diag[0] = odiag[0];
117: if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
118: if (allowzeropivot) {
119: A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
120: A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
121: A->factorerror_zeropivot_row = i;
122: PetscInfo1(A,"Zero pivot, row %D\n",i);
123: } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
124: }
126: diag[0] = (PetscScalar)1.0 / (diag[0] + shift);
127: diag += 1;
128: }
129: break;
130: case 2:
131: for (i=0; i<mbs; i++) {
132: odiag = v + 4*diag_offset[i];
133: diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
134: PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);
135: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
136: diag += 4;
137: }
138: break;
139: case 3:
140: for (i=0; i<mbs; i++) {
141: odiag = v + 9*diag_offset[i];
142: diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
143: diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
144: diag[8] = odiag[8];
145: PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);
146: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
147: diag += 9;
148: }
149: break;
150: case 4:
151: for (i=0; i<mbs; i++) {
152: odiag = v + 16*diag_offset[i];
153: PetscArraycpy(diag,odiag,16);
154: PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);
155: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
156: diag += 16;
157: }
158: break;
159: case 5:
160: for (i=0; i<mbs; i++) {
161: odiag = v + 25*diag_offset[i];
162: PetscArraycpy(diag,odiag,25);
163: PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);
164: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
165: diag += 25;
166: }
167: break;
168: case 6:
169: for (i=0; i<mbs; i++) {
170: odiag = v + 36*diag_offset[i];
171: PetscArraycpy(diag,odiag,36);
172: PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);
173: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
174: diag += 36;
175: }
176: break;
177: case 7:
178: for (i=0; i<mbs; i++) {
179: odiag = v + 49*diag_offset[i];
180: PetscArraycpy(diag,odiag,49);
181: PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);
182: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
183: diag += 49;
184: }
185: break;
186: default:
187: PetscMalloc2(bs,&v_work,bs,&v_pivots);
188: for (i=0; i<mbs; i++) {
189: odiag = v + bs2*diag_offset[i];
190: PetscArraycpy(diag,odiag,bs2);
191: PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);
192: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
193: diag += bs2;
194: }
195: PetscFree2(v_work,v_pivots);
196: }
197: a->idiagvalid = PETSC_TRUE;
198: return(0);
199: }
201: PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
202: {
203: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
204: PetscScalar *x,*work,*w,*workt,*t;
205: const MatScalar *v,*aa = a->a, *idiag;
206: const PetscScalar *b,*xb;
207: PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
208: PetscErrorCode ierr;
209: PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
210: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
213: its = its*lits;
214: if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
215: if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
216: if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift");
217: if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for non-trivial relaxation factor");
218: if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts");
220: if (!a->idiagvalid) {MatInvertBlockDiagonal(A,NULL);}
222: if (!m) return(0);
223: diag = a->diag;
224: idiag = a->idiag;
225: k = PetscMax(A->rmap->n,A->cmap->n);
226: if (!a->mult_work) {
227: PetscMalloc1(k+1,&a->mult_work);
228: }
229: if (!a->sor_workt) {
230: PetscMalloc1(k,&a->sor_workt);
231: }
232: if (!a->sor_work) {
233: PetscMalloc1(bs,&a->sor_work);
234: }
235: work = a->mult_work;
236: t = a->sor_workt;
237: w = a->sor_work;
239: VecGetArray(xx,&x);
240: VecGetArrayRead(bb,&b);
242: if (flag & SOR_ZERO_INITIAL_GUESS) {
243: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
244: switch (bs) {
245: case 1:
246: PetscKernel_v_gets_A_times_w_1(x,idiag,b);
247: t[0] = b[0];
248: i2 = 1;
249: idiag += 1;
250: for (i=1; i<m; i++) {
251: v = aa + ai[i];
252: vi = aj + ai[i];
253: nz = diag[i] - ai[i];
254: s[0] = b[i2];
255: for (j=0; j<nz; j++) {
256: xw[0] = x[vi[j]];
257: PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
258: }
259: t[i2] = s[0];
260: PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
261: x[i2] = xw[0];
262: idiag += 1;
263: i2 += 1;
264: }
265: break;
266: case 2:
267: PetscKernel_v_gets_A_times_w_2(x,idiag,b);
268: t[0] = b[0]; t[1] = b[1];
269: i2 = 2;
270: idiag += 4;
271: for (i=1; i<m; i++) {
272: v = aa + 4*ai[i];
273: vi = aj + ai[i];
274: nz = diag[i] - ai[i];
275: s[0] = b[i2]; s[1] = b[i2+1];
276: for (j=0; j<nz; j++) {
277: idx = 2*vi[j];
278: it = 4*j;
279: xw[0] = x[idx]; xw[1] = x[1+idx];
280: PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
281: }
282: t[i2] = s[0]; t[i2+1] = s[1];
283: PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
284: x[i2] = xw[0]; x[i2+1] = xw[1];
285: idiag += 4;
286: i2 += 2;
287: }
288: break;
289: case 3:
290: PetscKernel_v_gets_A_times_w_3(x,idiag,b);
291: t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
292: i2 = 3;
293: idiag += 9;
294: for (i=1; i<m; i++) {
295: v = aa + 9*ai[i];
296: vi = aj + ai[i];
297: nz = diag[i] - ai[i];
298: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
299: while (nz--) {
300: idx = 3*(*vi++);
301: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
302: PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
303: v += 9;
304: }
305: t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
306: PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
307: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
308: idiag += 9;
309: i2 += 3;
310: }
311: break;
312: case 4:
313: PetscKernel_v_gets_A_times_w_4(x,idiag,b);
314: t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
315: i2 = 4;
316: idiag += 16;
317: for (i=1; i<m; i++) {
318: v = aa + 16*ai[i];
319: vi = aj + ai[i];
320: nz = diag[i] - ai[i];
321: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
322: while (nz--) {
323: idx = 4*(*vi++);
324: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
325: PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
326: v += 16;
327: }
328: t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
329: PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
330: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
331: idiag += 16;
332: i2 += 4;
333: }
334: break;
335: case 5:
336: PetscKernel_v_gets_A_times_w_5(x,idiag,b);
337: t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
338: i2 = 5;
339: idiag += 25;
340: for (i=1; i<m; i++) {
341: v = aa + 25*ai[i];
342: vi = aj + ai[i];
343: nz = diag[i] - ai[i];
344: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
345: while (nz--) {
346: idx = 5*(*vi++);
347: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
348: PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
349: v += 25;
350: }
351: t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
352: PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
353: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
354: idiag += 25;
355: i2 += 5;
356: }
357: break;
358: case 6:
359: PetscKernel_v_gets_A_times_w_6(x,idiag,b);
360: t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
361: i2 = 6;
362: idiag += 36;
363: for (i=1; i<m; i++) {
364: v = aa + 36*ai[i];
365: vi = aj + ai[i];
366: nz = diag[i] - ai[i];
367: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
368: while (nz--) {
369: idx = 6*(*vi++);
370: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
371: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
372: PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
373: v += 36;
374: }
375: t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
376: t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
377: PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
378: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
379: idiag += 36;
380: i2 += 6;
381: }
382: break;
383: case 7:
384: PetscKernel_v_gets_A_times_w_7(x,idiag,b);
385: t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
386: t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
387: i2 = 7;
388: idiag += 49;
389: for (i=1; i<m; i++) {
390: v = aa + 49*ai[i];
391: vi = aj + ai[i];
392: nz = diag[i] - ai[i];
393: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
394: s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
395: while (nz--) {
396: idx = 7*(*vi++);
397: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
398: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
399: PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
400: v += 49;
401: }
402: t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
403: t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
404: PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
405: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
406: x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
407: idiag += 49;
408: i2 += 7;
409: }
410: break;
411: default:
412: PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
413: PetscArraycpy(t,b,bs);
414: i2 = bs;
415: idiag += bs2;
416: for (i=1; i<m; i++) {
417: v = aa + bs2*ai[i];
418: vi = aj + ai[i];
419: nz = diag[i] - ai[i];
421: PetscArraycpy(w,b+i2,bs);
422: /* copy all rows of x that are needed into contiguous space */
423: workt = work;
424: for (j=0; j<nz; j++) {
425: PetscArraycpy(workt,x + bs*(*vi++),bs);
426: workt += bs;
427: }
428: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
429: PetscArraycpy(t+i2,w,bs);
430: PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
432: idiag += bs2;
433: i2 += bs;
434: }
435: break;
436: }
437: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
438: PetscLogFlops(1.0*bs2*a->nz);
439: xb = t;
440: }
441: else xb = b;
442: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
443: idiag = a->idiag+bs2*(a->mbs-1);
444: i2 = bs * (m-1);
445: switch (bs) {
446: case 1:
447: s[0] = xb[i2];
448: PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
449: x[i2] = xw[0];
450: i2 -= 1;
451: for (i=m-2; i>=0; i--) {
452: v = aa + (diag[i]+1);
453: vi = aj + diag[i] + 1;
454: nz = ai[i+1] - diag[i] - 1;
455: s[0] = xb[i2];
456: for (j=0; j<nz; j++) {
457: xw[0] = x[vi[j]];
458: PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
459: }
460: PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
461: x[i2] = xw[0];
462: idiag -= 1;
463: i2 -= 1;
464: }
465: break;
466: case 2:
467: s[0] = xb[i2]; s[1] = xb[i2+1];
468: PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
469: x[i2] = xw[0]; x[i2+1] = xw[1];
470: i2 -= 2;
471: idiag -= 4;
472: for (i=m-2; i>=0; i--) {
473: v = aa + 4*(diag[i] + 1);
474: vi = aj + diag[i] + 1;
475: nz = ai[i+1] - diag[i] - 1;
476: s[0] = xb[i2]; s[1] = xb[i2+1];
477: for (j=0; j<nz; j++) {
478: idx = 2*vi[j];
479: it = 4*j;
480: xw[0] = x[idx]; xw[1] = x[1+idx];
481: PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
482: }
483: PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
484: x[i2] = xw[0]; x[i2+1] = xw[1];
485: idiag -= 4;
486: i2 -= 2;
487: }
488: break;
489: case 3:
490: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
491: PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
492: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
493: i2 -= 3;
494: idiag -= 9;
495: for (i=m-2; i>=0; i--) {
496: v = aa + 9*(diag[i]+1);
497: vi = aj + diag[i] + 1;
498: nz = ai[i+1] - diag[i] - 1;
499: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
500: while (nz--) {
501: idx = 3*(*vi++);
502: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
503: PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
504: v += 9;
505: }
506: PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
507: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
508: idiag -= 9;
509: i2 -= 3;
510: }
511: break;
512: case 4:
513: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
514: PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
515: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
516: i2 -= 4;
517: idiag -= 16;
518: for (i=m-2; i>=0; i--) {
519: v = aa + 16*(diag[i]+1);
520: vi = aj + diag[i] + 1;
521: nz = ai[i+1] - diag[i] - 1;
522: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
523: while (nz--) {
524: idx = 4*(*vi++);
525: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
526: PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
527: v += 16;
528: }
529: PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
530: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
531: idiag -= 16;
532: i2 -= 4;
533: }
534: break;
535: case 5:
536: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
537: PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
538: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
539: i2 -= 5;
540: idiag -= 25;
541: for (i=m-2; i>=0; i--) {
542: v = aa + 25*(diag[i]+1);
543: vi = aj + diag[i] + 1;
544: nz = ai[i+1] - diag[i] - 1;
545: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
546: while (nz--) {
547: idx = 5*(*vi++);
548: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
549: PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
550: v += 25;
551: }
552: PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
553: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
554: idiag -= 25;
555: i2 -= 5;
556: }
557: break;
558: case 6:
559: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
560: PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
561: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
562: i2 -= 6;
563: idiag -= 36;
564: for (i=m-2; i>=0; i--) {
565: v = aa + 36*(diag[i]+1);
566: vi = aj + diag[i] + 1;
567: nz = ai[i+1] - diag[i] - 1;
568: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
569: while (nz--) {
570: idx = 6*(*vi++);
571: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
572: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
573: PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
574: v += 36;
575: }
576: PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
577: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
578: idiag -= 36;
579: i2 -= 6;
580: }
581: break;
582: case 7:
583: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
584: s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
585: PetscKernel_v_gets_A_times_w_7(x,idiag,b);
586: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
587: x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
588: i2 -= 7;
589: idiag -= 49;
590: for (i=m-2; i>=0; i--) {
591: v = aa + 49*(diag[i]+1);
592: vi = aj + diag[i] + 1;
593: nz = ai[i+1] - diag[i] - 1;
594: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
595: s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
596: while (nz--) {
597: idx = 7*(*vi++);
598: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
599: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
600: PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
601: v += 49;
602: }
603: PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
604: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
605: x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
606: idiag -= 49;
607: i2 -= 7;
608: }
609: break;
610: default:
611: PetscArraycpy(w,xb+i2,bs);
612: PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
613: i2 -= bs;
614: idiag -= bs2;
615: for (i=m-2; i>=0; i--) {
616: v = aa + bs2*(diag[i]+1);
617: vi = aj + diag[i] + 1;
618: nz = ai[i+1] - diag[i] - 1;
620: PetscArraycpy(w,xb+i2,bs);
621: /* copy all rows of x that are needed into contiguous space */
622: workt = work;
623: for (j=0; j<nz; j++) {
624: PetscArraycpy(workt,x + bs*(*vi++),bs);
625: workt += bs;
626: }
627: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
628: PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
630: idiag -= bs2;
631: i2 -= bs;
632: }
633: break;
634: }
635: PetscLogFlops(1.0*bs2*(a->nz));
636: }
637: its--;
638: }
639: while (its--) {
640: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
641: idiag = a->idiag;
642: i2 = 0;
643: switch (bs) {
644: case 1:
645: for (i=0; i<m; i++) {
646: v = aa + ai[i];
647: vi = aj + ai[i];
648: nz = ai[i+1] - ai[i];
649: s[0] = b[i2];
650: for (j=0; j<nz; j++) {
651: xw[0] = x[vi[j]];
652: PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
653: }
654: PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
655: x[i2] += xw[0];
656: idiag += 1;
657: i2 += 1;
658: }
659: break;
660: case 2:
661: for (i=0; i<m; i++) {
662: v = aa + 4*ai[i];
663: vi = aj + ai[i];
664: nz = ai[i+1] - ai[i];
665: s[0] = b[i2]; s[1] = b[i2+1];
666: for (j=0; j<nz; j++) {
667: idx = 2*vi[j];
668: it = 4*j;
669: xw[0] = x[idx]; xw[1] = x[1+idx];
670: PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
671: }
672: PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
673: x[i2] += xw[0]; x[i2+1] += xw[1];
674: idiag += 4;
675: i2 += 2;
676: }
677: break;
678: case 3:
679: for (i=0; i<m; i++) {
680: v = aa + 9*ai[i];
681: vi = aj + ai[i];
682: nz = ai[i+1] - ai[i];
683: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
684: while (nz--) {
685: idx = 3*(*vi++);
686: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
687: PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
688: v += 9;
689: }
690: PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
691: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
692: idiag += 9;
693: i2 += 3;
694: }
695: break;
696: case 4:
697: for (i=0; i<m; i++) {
698: v = aa + 16*ai[i];
699: vi = aj + ai[i];
700: nz = ai[i+1] - ai[i];
701: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
702: while (nz--) {
703: idx = 4*(*vi++);
704: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
705: PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
706: v += 16;
707: }
708: PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
709: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
710: idiag += 16;
711: i2 += 4;
712: }
713: break;
714: case 5:
715: for (i=0; i<m; i++) {
716: v = aa + 25*ai[i];
717: vi = aj + ai[i];
718: nz = ai[i+1] - ai[i];
719: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
720: while (nz--) {
721: idx = 5*(*vi++);
722: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
723: PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
724: v += 25;
725: }
726: PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
727: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
728: idiag += 25;
729: i2 += 5;
730: }
731: break;
732: case 6:
733: for (i=0; i<m; i++) {
734: v = aa + 36*ai[i];
735: vi = aj + ai[i];
736: nz = ai[i+1] - ai[i];
737: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
738: while (nz--) {
739: idx = 6*(*vi++);
740: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
741: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
742: PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
743: v += 36;
744: }
745: PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
746: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
747: x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
748: idiag += 36;
749: i2 += 6;
750: }
751: break;
752: case 7:
753: for (i=0; i<m; i++) {
754: v = aa + 49*ai[i];
755: vi = aj + ai[i];
756: nz = ai[i+1] - ai[i];
757: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
758: s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
759: while (nz--) {
760: idx = 7*(*vi++);
761: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
762: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
763: PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
764: v += 49;
765: }
766: PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
767: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
768: x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
769: idiag += 49;
770: i2 += 7;
771: }
772: break;
773: default:
774: for (i=0; i<m; i++) {
775: v = aa + bs2*ai[i];
776: vi = aj + ai[i];
777: nz = ai[i+1] - ai[i];
779: PetscArraycpy(w,b+i2,bs);
780: /* copy all rows of x that are needed into contiguous space */
781: workt = work;
782: for (j=0; j<nz; j++) {
783: PetscArraycpy(workt,x + bs*(*vi++),bs);
784: workt += bs;
785: }
786: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
787: PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
789: idiag += bs2;
790: i2 += bs;
791: }
792: break;
793: }
794: PetscLogFlops(2.0*bs2*a->nz);
795: }
796: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
797: idiag = a->idiag+bs2*(a->mbs-1);
798: i2 = bs * (m-1);
799: switch (bs) {
800: case 1:
801: for (i=m-1; i>=0; i--) {
802: v = aa + ai[i];
803: vi = aj + ai[i];
804: nz = ai[i+1] - ai[i];
805: s[0] = b[i2];
806: for (j=0; j<nz; j++) {
807: xw[0] = x[vi[j]];
808: PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
809: }
810: PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
811: x[i2] += xw[0];
812: idiag -= 1;
813: i2 -= 1;
814: }
815: break;
816: case 2:
817: for (i=m-1; i>=0; i--) {
818: v = aa + 4*ai[i];
819: vi = aj + ai[i];
820: nz = ai[i+1] - ai[i];
821: s[0] = b[i2]; s[1] = b[i2+1];
822: for (j=0; j<nz; j++) {
823: idx = 2*vi[j];
824: it = 4*j;
825: xw[0] = x[idx]; xw[1] = x[1+idx];
826: PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
827: }
828: PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
829: x[i2] += xw[0]; x[i2+1] += xw[1];
830: idiag -= 4;
831: i2 -= 2;
832: }
833: break;
834: case 3:
835: for (i=m-1; i>=0; i--) {
836: v = aa + 9*ai[i];
837: vi = aj + ai[i];
838: nz = ai[i+1] - ai[i];
839: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
840: while (nz--) {
841: idx = 3*(*vi++);
842: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
843: PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
844: v += 9;
845: }
846: PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
847: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
848: idiag -= 9;
849: i2 -= 3;
850: }
851: break;
852: case 4:
853: for (i=m-1; i>=0; i--) {
854: v = aa + 16*ai[i];
855: vi = aj + ai[i];
856: nz = ai[i+1] - ai[i];
857: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
858: while (nz--) {
859: idx = 4*(*vi++);
860: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
861: PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
862: v += 16;
863: }
864: PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
865: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
866: idiag -= 16;
867: i2 -= 4;
868: }
869: break;
870: case 5:
871: for (i=m-1; i>=0; i--) {
872: v = aa + 25*ai[i];
873: vi = aj + ai[i];
874: nz = ai[i+1] - ai[i];
875: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
876: while (nz--) {
877: idx = 5*(*vi++);
878: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
879: PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
880: v += 25;
881: }
882: PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
883: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
884: idiag -= 25;
885: i2 -= 5;
886: }
887: break;
888: case 6:
889: for (i=m-1; i>=0; i--) {
890: v = aa + 36*ai[i];
891: vi = aj + ai[i];
892: nz = ai[i+1] - ai[i];
893: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
894: while (nz--) {
895: idx = 6*(*vi++);
896: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
897: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
898: PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
899: v += 36;
900: }
901: PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
902: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
903: x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
904: idiag -= 36;
905: i2 -= 6;
906: }
907: break;
908: case 7:
909: for (i=m-1; i>=0; i--) {
910: v = aa + 49*ai[i];
911: vi = aj + ai[i];
912: nz = ai[i+1] - ai[i];
913: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
914: s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
915: while (nz--) {
916: idx = 7*(*vi++);
917: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
918: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
919: PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
920: v += 49;
921: }
922: PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
923: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
924: x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
925: idiag -= 49;
926: i2 -= 7;
927: }
928: break;
929: default:
930: for (i=m-1; i>=0; i--) {
931: v = aa + bs2*ai[i];
932: vi = aj + ai[i];
933: nz = ai[i+1] - ai[i];
935: PetscArraycpy(w,b+i2,bs);
936: /* copy all rows of x that are needed into contiguous space */
937: workt = work;
938: for (j=0; j<nz; j++) {
939: PetscArraycpy(workt,x + bs*(*vi++),bs);
940: workt += bs;
941: }
942: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
943: PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
945: idiag -= bs2;
946: i2 -= bs;
947: }
948: break;
949: }
950: PetscLogFlops(2.0*bs2*(a->nz));
951: }
952: }
953: VecRestoreArray(xx,&x);
954: VecRestoreArrayRead(bb,&b);
955: return(0);
956: }
958: /*
959: Special version for direct calls from Fortran (Used in PETSc-fun3d)
960: */
961: #if defined(PETSC_HAVE_FORTRAN_CAPS)
962: #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
963: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
964: #define matsetvaluesblocked4_ matsetvaluesblocked4
965: #endif
967: PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
968: {
969: Mat A = *AA;
970: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
971: PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
972: PetscInt *ai =a->i,*ailen=a->ilen;
973: PetscInt *aj =a->j,stepval,lastcol = -1;
974: const PetscScalar *value = v;
975: MatScalar *ap,*aa = a->a,*bap;
976: PetscErrorCode ierr;
979: if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
980: stepval = (n-1)*4;
981: for (k=0; k<m; k++) { /* loop over added rows */
982: row = im[k];
983: rp = aj + ai[row];
984: ap = aa + 16*ai[row];
985: nrow = ailen[row];
986: low = 0;
987: high = nrow;
988: for (l=0; l<n; l++) { /* loop over added columns */
989: col = in[l];
990: if (col <= lastcol) low = 0;
991: else high = nrow;
992: lastcol = col;
993: value = v + k*(stepval+4 + l)*4;
994: while (high-low > 7) {
995: t = (low+high)/2;
996: if (rp[t] > col) high = t;
997: else low = t;
998: }
999: for (i=low; i<high; i++) {
1000: if (rp[i] > col) break;
1001: if (rp[i] == col) {
1002: bap = ap + 16*i;
1003: for (ii=0; ii<4; ii++,value+=stepval) {
1004: for (jj=ii; jj<16; jj+=4) {
1005: bap[jj] += *value++;
1006: }
1007: }
1008: goto noinsert2;
1009: }
1010: }
1011: N = nrow++ - 1;
1012: high++; /* added new column index thus must search to one higher than before */
1013: /* shift up all the later entries in this row */
1014: for (ii=N; ii>=i; ii--) {
1015: rp[ii+1] = rp[ii];
1016: PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16);CHKERRV(ierr);
1017: }
1018: if (N >= i) {
1019: PetscArrayzero(ap+16*i,16);CHKERRV(ierr);
1020: }
1021: rp[i] = col;
1022: bap = ap + 16*i;
1023: for (ii=0; ii<4; ii++,value+=stepval) {
1024: for (jj=ii; jj<16; jj+=4) {
1025: bap[jj] = *value++;
1026: }
1027: }
1028: noinsert2:;
1029: low = i;
1030: }
1031: ailen[row] = nrow;
1032: }
1033: PetscFunctionReturnVoid();
1034: }
1036: #if defined(PETSC_HAVE_FORTRAN_CAPS)
1037: #define matsetvalues4_ MATSETVALUES4
1038: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1039: #define matsetvalues4_ matsetvalues4
1040: #endif
1042: PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1043: {
1044: Mat A = *AA;
1045: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1046: PetscInt *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm;
1047: PetscInt *ai=a->i,*ailen=a->ilen;
1048: PetscInt *aj=a->j,brow,bcol;
1049: PetscInt ridx,cidx,lastcol = -1;
1050: MatScalar *ap,value,*aa=a->a,*bap;
1054: for (k=0; k<m; k++) { /* loop over added rows */
1055: row = im[k]; brow = row/4;
1056: rp = aj + ai[brow];
1057: ap = aa + 16*ai[brow];
1058: nrow = ailen[brow];
1059: low = 0;
1060: high = nrow;
1061: for (l=0; l<n; l++) { /* loop over added columns */
1062: col = in[l]; bcol = col/4;
1063: ridx = row % 4; cidx = col % 4;
1064: value = v[l + k*n];
1065: if (col <= lastcol) low = 0;
1066: else high = nrow;
1067: lastcol = col;
1068: while (high-low > 7) {
1069: t = (low+high)/2;
1070: if (rp[t] > bcol) high = t;
1071: else low = t;
1072: }
1073: for (i=low; i<high; i++) {
1074: if (rp[i] > bcol) break;
1075: if (rp[i] == bcol) {
1076: bap = ap + 16*i + 4*cidx + ridx;
1077: *bap += value;
1078: goto noinsert1;
1079: }
1080: }
1081: N = nrow++ - 1;
1082: high++; /* added new column thus must search to one higher than before */
1083: /* shift up all the later entries in this row */
1084: PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRV(ierr);
1085: PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1));CHKERRV(ierr);
1086: PetscArrayzero(ap+16*i,16);CHKERRV(ierr);
1087: rp[i] = bcol;
1088: ap[16*i + 4*cidx + ridx] = value;
1089: noinsert1:;
1090: low = i;
1091: }
1092: ailen[brow] = nrow;
1093: }
1094: PetscFunctionReturnVoid();
1095: }
1097: /*
1098: Checks for missing diagonals
1099: */
1100: PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d)
1101: {
1102: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1104: PetscInt *diag,*ii = a->i,i;
1107: MatMarkDiagonal_SeqBAIJ(A);
1108: *missing = PETSC_FALSE;
1109: if (A->rmap->n > 0 && !ii) {
1110: *missing = PETSC_TRUE;
1111: if (d) *d = 0;
1112: PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");
1113: } else {
1114: PetscInt n;
1115: n = PetscMin(a->mbs, a->nbs);
1116: diag = a->diag;
1117: for (i=0; i<n; i++) {
1118: if (diag[i] >= ii[i+1]) {
1119: *missing = PETSC_TRUE;
1120: if (d) *d = i;
1121: PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);
1122: break;
1123: }
1124: }
1125: }
1126: return(0);
1127: }
1129: PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1130: {
1131: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1133: PetscInt i,j,m = a->mbs;
1136: if (!a->diag) {
1137: PetscMalloc1(m,&a->diag);
1138: PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));
1139: a->free_diag = PETSC_TRUE;
1140: }
1141: for (i=0; i<m; i++) {
1142: a->diag[i] = a->i[i+1];
1143: for (j=a->i[i]; j<a->i[i+1]; j++) {
1144: if (a->j[j] == i) {
1145: a->diag[i] = j;
1146: break;
1147: }
1148: }
1149: }
1150: return(0);
1151: }
1153: static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done)
1154: {
1155: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1157: PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1158: PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1161: *nn = n;
1162: if (!ia) return(0);
1163: if (symmetric) {
1164: MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);
1165: nz = tia[n];
1166: } else {
1167: tia = a->i; tja = a->j;
1168: }
1170: if (!blockcompressed && bs > 1) {
1171: (*nn) *= bs;
1172: /* malloc & create the natural set of indices */
1173: PetscMalloc1((n+1)*bs,ia);
1174: if (n) {
1175: (*ia)[0] = oshift;
1176: for (j=1; j<bs; j++) {
1177: (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1178: }
1179: }
1181: for (i=1; i<n; i++) {
1182: (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1183: for (j=1; j<bs; j++) {
1184: (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1185: }
1186: }
1187: if (n) {
1188: (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1189: }
1191: if (inja) {
1192: PetscMalloc1(nz*bs*bs,ja);
1193: cnt = 0;
1194: for (i=0; i<n; i++) {
1195: for (j=0; j<bs; j++) {
1196: for (k=tia[i]; k<tia[i+1]; k++) {
1197: for (l=0; l<bs; l++) {
1198: (*ja)[cnt++] = bs*tja[k] + l;
1199: }
1200: }
1201: }
1202: }
1203: }
1205: if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1206: PetscFree(tia);
1207: PetscFree(tja);
1208: }
1209: } else if (oshift == 1) {
1210: if (symmetric) {
1211: nz = tia[A->rmap->n/bs];
1212: /* add 1 to i and j indices */
1213: for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1214: *ia = tia;
1215: if (ja) {
1216: for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1217: *ja = tja;
1218: }
1219: } else {
1220: nz = a->i[A->rmap->n/bs];
1221: /* malloc space and add 1 to i and j indices */
1222: PetscMalloc1(A->rmap->n/bs+1,ia);
1223: for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1224: if (ja) {
1225: PetscMalloc1(nz,ja);
1226: for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1227: }
1228: }
1229: } else {
1230: *ia = tia;
1231: if (ja) *ja = tja;
1232: }
1233: return(0);
1234: }
1236: static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done)
1237: {
1241: if (!ia) return(0);
1242: if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1243: PetscFree(*ia);
1244: if (ja) {PetscFree(*ja);}
1245: }
1246: return(0);
1247: }
1249: PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1250: {
1251: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1255: #if defined(PETSC_USE_LOG)
1256: PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1257: #endif
1258: MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);
1259: ISDestroy(&a->row);
1260: ISDestroy(&a->col);
1261: if (a->free_diag) {PetscFree(a->diag);}
1262: PetscFree(a->idiag);
1263: if (a->free_imax_ilen) {PetscFree2(a->imax,a->ilen);}
1264: PetscFree(a->solve_work);
1265: PetscFree(a->mult_work);
1266: PetscFree(a->sor_workt);
1267: PetscFree(a->sor_work);
1268: ISDestroy(&a->icol);
1269: PetscFree(a->saved_values);
1270: PetscFree2(a->compressedrow.i,a->compressedrow.rindex);
1272: MatDestroy(&a->sbaijMat);
1273: MatDestroy(&a->parent);
1274: PetscFree(A->data);
1276: PetscObjectChangeTypeName((PetscObject)A,NULL);
1277: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJGetArray_C",NULL);
1278: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJRestoreArray_C",NULL);
1279: PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);
1280: PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);
1281: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);
1282: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);
1283: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);
1284: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);
1285: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);
1286: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);
1287: PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);
1288: #if defined(PETSC_HAVE_HYPRE)
1289: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);
1290: #endif
1291: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);
1292: return(0);
1293: }
1295: PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1296: {
1297: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1301: switch (op) {
1302: case MAT_ROW_ORIENTED:
1303: a->roworiented = flg;
1304: break;
1305: case MAT_KEEP_NONZERO_PATTERN:
1306: a->keepnonzeropattern = flg;
1307: break;
1308: case MAT_NEW_NONZERO_LOCATIONS:
1309: a->nonew = (flg ? 0 : 1);
1310: break;
1311: case MAT_NEW_NONZERO_LOCATION_ERR:
1312: a->nonew = (flg ? -1 : 0);
1313: break;
1314: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1315: a->nonew = (flg ? -2 : 0);
1316: break;
1317: case MAT_UNUSED_NONZERO_LOCATION_ERR:
1318: a->nounused = (flg ? -1 : 0);
1319: break;
1320: case MAT_FORCE_DIAGONAL_ENTRIES:
1321: case MAT_IGNORE_OFF_PROC_ENTRIES:
1322: case MAT_USE_HASH_TABLE:
1323: case MAT_SORTED_FULL:
1324: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1325: break;
1326: case MAT_SPD:
1327: case MAT_SYMMETRIC:
1328: case MAT_STRUCTURALLY_SYMMETRIC:
1329: case MAT_HERMITIAN:
1330: case MAT_SYMMETRY_ETERNAL:
1331: case MAT_SUBMAT_SINGLEIS:
1332: case MAT_STRUCTURE_ONLY:
1333: /* These options are handled directly by MatSetOption() */
1334: break;
1335: default:
1336: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1337: }
1338: return(0);
1339: }
1341: /* used for both SeqBAIJ and SeqSBAIJ matrices */
1342: PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1343: {
1345: PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1346: MatScalar *aa_i;
1347: PetscScalar *v_i;
1350: bs = A->rmap->bs;
1351: bs2 = bs*bs;
1352: if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1354: bn = row/bs; /* Block number */
1355: bp = row % bs; /* Block Position */
1356: M = ai[bn+1] - ai[bn];
1357: *nz = bs*M;
1359: if (v) {
1360: *v = NULL;
1361: if (*nz) {
1362: PetscMalloc1(*nz,v);
1363: for (i=0; i<M; i++) { /* for each block in the block row */
1364: v_i = *v + i*bs;
1365: aa_i = aa + bs2*(ai[bn] + i);
1366: for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1367: }
1368: }
1369: }
1371: if (idx) {
1372: *idx = NULL;
1373: if (*nz) {
1374: PetscMalloc1(*nz,idx);
1375: for (i=0; i<M; i++) { /* for each block in the block row */
1376: idx_i = *idx + i*bs;
1377: itmp = bs*aj[ai[bn] + i];
1378: for (j=0; j<bs; j++) idx_i[j] = itmp++;
1379: }
1380: }
1381: }
1382: return(0);
1383: }
1385: PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1386: {
1387: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1391: MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);
1392: return(0);
1393: }
1395: PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1396: {
1400: if (nz) *nz = 0;
1401: if (idx) {PetscFree(*idx);}
1402: if (v) {PetscFree(*v);}
1403: return(0);
1404: }
1406: PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1407: {
1408: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data,*at;
1409: Mat C;
1411: PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill;
1412: PetscInt bs2=a->bs2,*ati,*atj,anzj,kr;
1413: MatScalar *ata,*aa=a->a;
1416: PetscCalloc1(1+nbs,&atfill);
1417: if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1418: for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
1420: MatCreate(PetscObjectComm((PetscObject)A),&C);
1421: MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);
1422: MatSetType(C,((PetscObject)A)->type_name);
1423: MatSeqBAIJSetPreallocation(C,bs,0,atfill);
1425: at = (Mat_SeqBAIJ*)C->data;
1426: ati = at->i;
1427: for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i];
1428: } else {
1429: C = *B;
1430: at = (Mat_SeqBAIJ*)C->data;
1431: ati = at->i;
1432: }
1434: atj = at->j;
1435: ata = at->a;
1437: /* Copy ati into atfill so we have locations of the next free space in atj */
1438: PetscArraycpy(atfill,ati,nbs);
1440: /* Walk through A row-wise and mark nonzero entries of A^T. */
1441: for (i=0; i<mbs; i++) {
1442: anzj = ai[i+1] - ai[i];
1443: for (j=0; j<anzj; j++) {
1444: atj[atfill[*aj]] = i;
1445: for (kr=0; kr<bs; kr++) {
1446: for (k=0; k<bs; k++) {
1447: ata[bs2*atfill[*aj]+k*bs+kr] = *aa++;
1448: }
1449: }
1450: atfill[*aj++] += 1;
1451: }
1452: }
1453: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1454: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1456: /* Clean up temporary space and complete requests. */
1457: PetscFree(atfill);
1459: if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1460: MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));
1461: *B = C;
1462: } else {
1463: MatHeaderMerge(A,&C);
1464: }
1465: return(0);
1466: }
1468: PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f)
1469: {
1471: Mat Btrans;
1474: *f = PETSC_FALSE;
1475: MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);
1476: MatEqual_SeqBAIJ(B,Btrans,f);
1477: MatDestroy(&Btrans);
1478: return(0);
1479: }
1481: /* Used for both SeqBAIJ and SeqSBAIJ matrices */
1482: PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)
1483: {
1484: Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)mat->data;
1485: PetscInt header[4],M,N,m,bs,nz,cnt,i,j,k,l;
1486: PetscInt *rowlens,*colidxs;
1487: PetscScalar *matvals;
1491: PetscViewerSetUp(viewer);
1493: M = mat->rmap->N;
1494: N = mat->cmap->N;
1495: m = mat->rmap->n;
1496: bs = mat->rmap->bs;
1497: nz = bs*bs*A->nz;
1499: /* write matrix header */
1500: header[0] = MAT_FILE_CLASSID;
1501: header[1] = M; header[2] = N; header[3] = nz;
1502: PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);
1504: /* store row lengths */
1505: PetscMalloc1(m,&rowlens);
1506: for (cnt=0, i=0; i<A->mbs; i++)
1507: for (j=0; j<bs; j++)
1508: rowlens[cnt++] = bs*(A->i[i+1] - A->i[i]);
1509: PetscViewerBinaryWrite(viewer,rowlens,m,PETSC_INT);
1510: PetscFree(rowlens);
1512: /* store column indices */
1513: PetscMalloc1(nz,&colidxs);
1514: for (cnt=0, i=0; i<A->mbs; i++)
1515: for (k=0; k<bs; k++)
1516: for (j=A->i[i]; j<A->i[i+1]; j++)
1517: for (l=0; l<bs; l++)
1518: colidxs[cnt++] = bs*A->j[j] + l;
1519: if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1520: PetscViewerBinaryWrite(viewer,colidxs,nz,PETSC_INT);
1521: PetscFree(colidxs);
1523: /* store nonzero values */
1524: PetscMalloc1(nz,&matvals);
1525: for (cnt=0, i=0; i<A->mbs; i++)
1526: for (k=0; k<bs; k++)
1527: for (j=A->i[i]; j<A->i[i+1]; j++)
1528: for (l=0; l<bs; l++)
1529: matvals[cnt++] = A->a[bs*(bs*j + l) + k];
1530: if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1531: PetscViewerBinaryWrite(viewer,matvals,nz,PETSC_SCALAR);
1532: PetscFree(matvals);
1534: /* write block size option to the viewer's .info file */
1535: MatView_Binary_BlockSizes(mat,viewer);
1536: return(0);
1537: }
1539: static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)
1540: {
1542: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1543: PetscInt i,bs = A->rmap->bs,k;
1546: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1547: for (i=0; i<a->mbs; i++) {
1548: PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);
1549: for (k=a->i[i]; k<a->i[i+1]; k++) {
1550: PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);
1551: }
1552: PetscViewerASCIIPrintf(viewer,"\n");
1553: }
1554: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1555: return(0);
1556: }
1558: static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1559: {
1560: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1561: PetscErrorCode ierr;
1562: PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1563: PetscViewerFormat format;
1566: if (A->structure_only) {
1567: MatView_SeqBAIJ_ASCII_structonly(A,viewer);
1568: return(0);
1569: }
1571: PetscViewerGetFormat(viewer,&format);
1572: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1573: PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);
1574: } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1575: const char *matname;
1576: Mat aij;
1577: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);
1578: PetscObjectGetName((PetscObject)A,&matname);
1579: PetscObjectSetName((PetscObject)aij,matname);
1580: MatView(aij,viewer);
1581: MatDestroy(&aij);
1582: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1583: return(0);
1584: } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1585: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1586: for (i=0; i<a->mbs; i++) {
1587: for (j=0; j<bs; j++) {
1588: PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1589: for (k=a->i[i]; k<a->i[i+1]; k++) {
1590: for (l=0; l<bs; l++) {
1591: #if defined(PETSC_USE_COMPLEX)
1592: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1593: PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1594: (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1595: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1596: PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1597: (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1598: } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1599: PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));
1600: }
1601: #else
1602: if (a->a[bs2*k + l*bs + j] != 0.0) {
1603: PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);
1604: }
1605: #endif
1606: }
1607: }
1608: PetscViewerASCIIPrintf(viewer,"\n");
1609: }
1610: }
1611: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1612: } else {
1613: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1614: for (i=0; i<a->mbs; i++) {
1615: for (j=0; j<bs; j++) {
1616: PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1617: for (k=a->i[i]; k<a->i[i+1]; k++) {
1618: for (l=0; l<bs; l++) {
1619: #if defined(PETSC_USE_COMPLEX)
1620: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1621: PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1622: (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1623: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1624: PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1625: (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1626: } else {
1627: PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));
1628: }
1629: #else
1630: PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);
1631: #endif
1632: }
1633: }
1634: PetscViewerASCIIPrintf(viewer,"\n");
1635: }
1636: }
1637: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1638: }
1639: PetscViewerFlush(viewer);
1640: return(0);
1641: }
1643: #include <petscdraw.h>
1644: static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1645: {
1646: Mat A = (Mat) Aa;
1647: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
1648: PetscErrorCode ierr;
1649: PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1650: PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1651: MatScalar *aa;
1652: PetscViewer viewer;
1653: PetscViewerFormat format;
1656: PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);
1657: PetscViewerGetFormat(viewer,&format);
1658: PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);
1660: /* loop over matrix elements drawing boxes */
1662: if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1663: PetscDrawCollectiveBegin(draw);
1664: /* Blue for negative, Cyan for zero and Red for positive */
1665: color = PETSC_DRAW_BLUE;
1666: for (i=0,row=0; i<mbs; i++,row+=bs) {
1667: for (j=a->i[i]; j<a->i[i+1]; j++) {
1668: y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1669: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1670: aa = a->a + j*bs2;
1671: for (k=0; k<bs; k++) {
1672: for (l=0; l<bs; l++) {
1673: if (PetscRealPart(*aa++) >= 0.) continue;
1674: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1675: }
1676: }
1677: }
1678: }
1679: color = PETSC_DRAW_CYAN;
1680: for (i=0,row=0; i<mbs; i++,row+=bs) {
1681: for (j=a->i[i]; j<a->i[i+1]; j++) {
1682: y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1683: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1684: aa = a->a + j*bs2;
1685: for (k=0; k<bs; k++) {
1686: for (l=0; l<bs; l++) {
1687: if (PetscRealPart(*aa++) != 0.) continue;
1688: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1689: }
1690: }
1691: }
1692: }
1693: color = PETSC_DRAW_RED;
1694: for (i=0,row=0; i<mbs; i++,row+=bs) {
1695: for (j=a->i[i]; j<a->i[i+1]; j++) {
1696: y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1697: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1698: aa = a->a + j*bs2;
1699: for (k=0; k<bs; k++) {
1700: for (l=0; l<bs; l++) {
1701: if (PetscRealPart(*aa++) <= 0.) continue;
1702: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1703: }
1704: }
1705: }
1706: }
1707: PetscDrawCollectiveEnd(draw);
1708: } else {
1709: /* use contour shading to indicate magnitude of values */
1710: /* first determine max of all nonzero values */
1711: PetscReal minv = 0.0, maxv = 0.0;
1712: PetscDraw popup;
1714: for (i=0; i<a->nz*a->bs2; i++) {
1715: if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1716: }
1717: if (minv >= maxv) maxv = minv + PETSC_SMALL;
1718: PetscDrawGetPopup(draw,&popup);
1719: PetscDrawScalePopup(popup,0.0,maxv);
1721: PetscDrawCollectiveBegin(draw);
1722: for (i=0,row=0; i<mbs; i++,row+=bs) {
1723: for (j=a->i[i]; j<a->i[i+1]; j++) {
1724: y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1725: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1726: aa = a->a + j*bs2;
1727: for (k=0; k<bs; k++) {
1728: for (l=0; l<bs; l++) {
1729: MatScalar v = *aa++;
1730: color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1731: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1732: }
1733: }
1734: }
1735: }
1736: PetscDrawCollectiveEnd(draw);
1737: }
1738: return(0);
1739: }
1741: static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1742: {
1744: PetscReal xl,yl,xr,yr,w,h;
1745: PetscDraw draw;
1746: PetscBool isnull;
1749: PetscViewerDrawGetDraw(viewer,0,&draw);
1750: PetscDrawIsNull(draw,&isnull);
1751: if (isnull) return(0);
1753: xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1754: xr += w; yr += h; xl = -w; yl = -h;
1755: PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
1756: PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
1757: PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
1758: PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);
1759: PetscDrawSave(draw);
1760: return(0);
1761: }
1763: PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1764: {
1766: PetscBool iascii,isbinary,isdraw;
1769: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1770: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1771: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1772: if (iascii) {
1773: MatView_SeqBAIJ_ASCII(A,viewer);
1774: } else if (isbinary) {
1775: MatView_SeqBAIJ_Binary(A,viewer);
1776: } else if (isdraw) {
1777: MatView_SeqBAIJ_Draw(A,viewer);
1778: } else {
1779: Mat B;
1780: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
1781: MatView(B,viewer);
1782: MatDestroy(&B);
1783: }
1784: return(0);
1785: }
1787: PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1788: {
1789: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1790: PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1791: PetscInt *ai = a->i,*ailen = a->ilen;
1792: PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1793: MatScalar *ap,*aa = a->a;
1796: for (k=0; k<m; k++) { /* loop over rows */
1797: row = im[k]; brow = row/bs;
1798: if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1799: if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1800: rp = aj + ai[brow]; ap = aa + bs2*ai[brow];
1801: nrow = ailen[brow];
1802: for (l=0; l<n; l++) { /* loop over columns */
1803: if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1804: if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1805: col = in[l];
1806: bcol = col/bs;
1807: cidx = col%bs;
1808: ridx = row%bs;
1809: high = nrow;
1810: low = 0; /* assume unsorted */
1811: while (high-low > 5) {
1812: t = (low+high)/2;
1813: if (rp[t] > bcol) high = t;
1814: else low = t;
1815: }
1816: for (i=low; i<high; i++) {
1817: if (rp[i] > bcol) break;
1818: if (rp[i] == bcol) {
1819: *v++ = ap[bs2*i+bs*cidx+ridx];
1820: goto finished;
1821: }
1822: }
1823: *v++ = 0.0;
1824: finished:;
1825: }
1826: }
1827: return(0);
1828: }
1830: PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1831: {
1832: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1833: PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1834: PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1835: PetscErrorCode ierr;
1836: PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1837: PetscBool roworiented=a->roworiented;
1838: const PetscScalar *value = v;
1839: MatScalar *ap=NULL,*aa = a->a,*bap;
1842: if (roworiented) {
1843: stepval = (n-1)*bs;
1844: } else {
1845: stepval = (m-1)*bs;
1846: }
1847: for (k=0; k<m; k++) { /* loop over added rows */
1848: row = im[k];
1849: if (row < 0) continue;
1850: if (PetscUnlikelyDebug(row >= a->mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1851: rp = aj + ai[row];
1852: if (!A->structure_only) ap = aa + bs2*ai[row];
1853: rmax = imax[row];
1854: nrow = ailen[row];
1855: low = 0;
1856: high = nrow;
1857: for (l=0; l<n; l++) { /* loop over added columns */
1858: if (in[l] < 0) continue;
1859: if (PetscUnlikelyDebug(in[l] >= a->nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1860: col = in[l];
1861: if (!A->structure_only) {
1862: if (roworiented) {
1863: value = v + (k*(stepval+bs) + l)*bs;
1864: } else {
1865: value = v + (l*(stepval+bs) + k)*bs;
1866: }
1867: }
1868: if (col <= lastcol) low = 0;
1869: else high = nrow;
1870: lastcol = col;
1871: while (high-low > 7) {
1872: t = (low+high)/2;
1873: if (rp[t] > col) high = t;
1874: else low = t;
1875: }
1876: for (i=low; i<high; i++) {
1877: if (rp[i] > col) break;
1878: if (rp[i] == col) {
1879: if (A->structure_only) goto noinsert2;
1880: bap = ap + bs2*i;
1881: if (roworiented) {
1882: if (is == ADD_VALUES) {
1883: for (ii=0; ii<bs; ii++,value+=stepval) {
1884: for (jj=ii; jj<bs2; jj+=bs) {
1885: bap[jj] += *value++;
1886: }
1887: }
1888: } else {
1889: for (ii=0; ii<bs; ii++,value+=stepval) {
1890: for (jj=ii; jj<bs2; jj+=bs) {
1891: bap[jj] = *value++;
1892: }
1893: }
1894: }
1895: } else {
1896: if (is == ADD_VALUES) {
1897: for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1898: for (jj=0; jj<bs; jj++) {
1899: bap[jj] += value[jj];
1900: }
1901: bap += bs;
1902: }
1903: } else {
1904: for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1905: for (jj=0; jj<bs; jj++) {
1906: bap[jj] = value[jj];
1907: }
1908: bap += bs;
1909: }
1910: }
1911: }
1912: goto noinsert2;
1913: }
1914: }
1915: if (nonew == 1) goto noinsert2;
1916: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1917: if (A->structure_only) {
1918: MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar);
1919: } else {
1920: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1921: }
1922: N = nrow++ - 1; high++;
1923: /* shift up all the later entries in this row */
1924: PetscArraymove(rp+i+1,rp+i,N-i+1);
1925: rp[i] = col;
1926: if (!A->structure_only) {
1927: PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));
1928: bap = ap + bs2*i;
1929: if (roworiented) {
1930: for (ii=0; ii<bs; ii++,value+=stepval) {
1931: for (jj=ii; jj<bs2; jj+=bs) {
1932: bap[jj] = *value++;
1933: }
1934: }
1935: } else {
1936: for (ii=0; ii<bs; ii++,value+=stepval) {
1937: for (jj=0; jj<bs; jj++) {
1938: *bap++ = *value++;
1939: }
1940: }
1941: }
1942: }
1943: noinsert2:;
1944: low = i;
1945: }
1946: ailen[row] = nrow;
1947: }
1948: return(0);
1949: }
1951: PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1952: {
1953: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1954: PetscInt fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax;
1955: PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen;
1957: PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1958: MatScalar *aa = a->a,*ap;
1959: PetscReal ratio=0.6;
1962: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
1964: if (m) rmax = ailen[0];
1965: for (i=1; i<mbs; i++) {
1966: /* move each row back by the amount of empty slots (fshift) before it*/
1967: fshift += imax[i-1] - ailen[i-1];
1968: rmax = PetscMax(rmax,ailen[i]);
1969: if (fshift) {
1970: ip = aj + ai[i];
1971: ap = aa + bs2*ai[i];
1972: N = ailen[i];
1973: PetscArraymove(ip-fshift,ip,N);
1974: if (!A->structure_only) {
1975: PetscArraymove(ap-bs2*fshift,ap,bs2*N);
1976: }
1977: }
1978: ai[i] = ai[i-1] + ailen[i-1];
1979: }
1980: if (mbs) {
1981: fshift += imax[mbs-1] - ailen[mbs-1];
1982: ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1983: }
1985: /* reset ilen and imax for each row */
1986: a->nonzerorowcnt = 0;
1987: if (A->structure_only) {
1988: PetscFree2(a->imax,a->ilen);
1989: } else { /* !A->structure_only */
1990: for (i=0; i<mbs; i++) {
1991: ailen[i] = imax[i] = ai[i+1] - ai[i];
1992: a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1993: }
1994: }
1995: a->nz = ai[mbs];
1997: /* diagonals may have moved, so kill the diagonal pointers */
1998: a->idiagvalid = PETSC_FALSE;
1999: if (fshift && a->diag) {
2000: PetscFree(a->diag);
2001: PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));
2002: a->diag = NULL;
2003: }
2004: if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
2005: PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);
2006: PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);
2007: PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);
2009: A->info.mallocs += a->reallocs;
2010: a->reallocs = 0;
2011: A->info.nz_unneeded = (PetscReal)fshift*bs2;
2012: a->rmax = rmax;
2014: if (!A->structure_only) {
2015: MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);
2016: }
2017: return(0);
2018: }
2020: /*
2021: This function returns an array of flags which indicate the locations of contiguous
2022: blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9]
2023: then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2024: Assume: sizes should be long enough to hold all the values.
2025: */
2026: static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2027: {
2028: PetscInt i,j,k,row;
2029: PetscBool flg;
2032: for (i=0,j=0; i<n; j++) {
2033: row = idx[i];
2034: if (row%bs!=0) { /* Not the beginning of a block */
2035: sizes[j] = 1;
2036: i++;
2037: } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2038: sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */
2039: i++;
2040: } else { /* Begining of the block, so check if the complete block exists */
2041: flg = PETSC_TRUE;
2042: for (k=1; k<bs; k++) {
2043: if (row+k != idx[i+k]) { /* break in the block */
2044: flg = PETSC_FALSE;
2045: break;
2046: }
2047: }
2048: if (flg) { /* No break in the bs */
2049: sizes[j] = bs;
2050: i += bs;
2051: } else {
2052: sizes[j] = 1;
2053: i++;
2054: }
2055: }
2056: }
2057: *bs_max = j;
2058: return(0);
2059: }
2061: PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2062: {
2063: Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
2064: PetscErrorCode ierr;
2065: PetscInt i,j,k,count,*rows;
2066: PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2067: PetscScalar zero = 0.0;
2068: MatScalar *aa;
2069: const PetscScalar *xx;
2070: PetscScalar *bb;
2073: /* fix right hand side if needed */
2074: if (x && b) {
2075: VecGetArrayRead(x,&xx);
2076: VecGetArray(b,&bb);
2077: for (i=0; i<is_n; i++) {
2078: bb[is_idx[i]] = diag*xx[is_idx[i]];
2079: }
2080: VecRestoreArrayRead(x,&xx);
2081: VecRestoreArray(b,&bb);
2082: }
2084: /* Make a copy of the IS and sort it */
2085: /* allocate memory for rows,sizes */
2086: PetscMalloc2(is_n,&rows,2*is_n,&sizes);
2088: /* copy IS values to rows, and sort them */
2089: for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2090: PetscSortInt(is_n,rows);
2092: if (baij->keepnonzeropattern) {
2093: for (i=0; i<is_n; i++) sizes[i] = 1;
2094: bs_max = is_n;
2095: } else {
2096: MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);
2097: A->nonzerostate++;
2098: }
2100: for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2101: row = rows[j];
2102: if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2103: count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2104: aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2105: if (sizes[i] == bs && !baij->keepnonzeropattern) {
2106: if (diag != (PetscScalar)0.0) {
2107: if (baij->ilen[row/bs] > 0) {
2108: baij->ilen[row/bs] = 1;
2109: baij->j[baij->i[row/bs]] = row/bs;
2111: PetscArrayzero(aa,count*bs);
2112: }
2113: /* Now insert all the diagonal values for this bs */
2114: for (k=0; k<bs; k++) {
2115: (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);
2116: }
2117: } else { /* (diag == 0.0) */
2118: baij->ilen[row/bs] = 0;
2119: } /* end (diag == 0.0) */
2120: } else { /* (sizes[i] != bs) */
2121: if (PetscUnlikelyDebug(sizes[i] != 1)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2122: for (k=0; k<count; k++) {
2123: aa[0] = zero;
2124: aa += bs;
2125: }
2126: if (diag != (PetscScalar)0.0) {
2127: (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);
2128: }
2129: }
2130: }
2132: PetscFree2(rows,sizes);
2133: MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
2134: return(0);
2135: }
2137: PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2138: {
2139: Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
2140: PetscErrorCode ierr;
2141: PetscInt i,j,k,count;
2142: PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col;
2143: PetscScalar zero = 0.0;
2144: MatScalar *aa;
2145: const PetscScalar *xx;
2146: PetscScalar *bb;
2147: PetscBool *zeroed,vecs = PETSC_FALSE;
2150: /* fix right hand side if needed */
2151: if (x && b) {
2152: VecGetArrayRead(x,&xx);
2153: VecGetArray(b,&bb);
2154: vecs = PETSC_TRUE;
2155: }
2157: /* zero the columns */
2158: PetscCalloc1(A->rmap->n,&zeroed);
2159: for (i=0; i<is_n; i++) {
2160: if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2161: zeroed[is_idx[i]] = PETSC_TRUE;
2162: }
2163: for (i=0; i<A->rmap->N; i++) {
2164: if (!zeroed[i]) {
2165: row = i/bs;
2166: for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2167: for (k=0; k<bs; k++) {
2168: col = bs*baij->j[j] + k;
2169: if (zeroed[col]) {
2170: aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2171: if (vecs) bb[i] -= aa[0]*xx[col];
2172: aa[0] = 0.0;
2173: }
2174: }
2175: }
2176: } else if (vecs) bb[i] = diag*xx[i];
2177: }
2178: PetscFree(zeroed);
2179: if (vecs) {
2180: VecRestoreArrayRead(x,&xx);
2181: VecRestoreArray(b,&bb);
2182: }
2184: /* zero the rows */
2185: for (i=0; i<is_n; i++) {
2186: row = is_idx[i];
2187: count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2188: aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2189: for (k=0; k<count; k++) {
2190: aa[0] = zero;
2191: aa += bs;
2192: }
2193: if (diag != (PetscScalar)0.0) {
2194: (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);
2195: }
2196: }
2197: MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
2198: return(0);
2199: }
2201: PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2202: {
2203: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2204: PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2205: PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2206: PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2208: PetscInt ridx,cidx,bs2=a->bs2;
2209: PetscBool roworiented=a->roworiented;
2210: MatScalar *ap=NULL,value=0.0,*aa=a->a,*bap;
2213: for (k=0; k<m; k++) { /* loop over added rows */
2214: row = im[k];
2215: brow = row/bs;
2216: if (row < 0) continue;
2217: if (PetscUnlikelyDebug(row >= A->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2218: rp = aj + ai[brow];
2219: if (!A->structure_only) ap = aa + bs2*ai[brow];
2220: rmax = imax[brow];
2221: nrow = ailen[brow];
2222: low = 0;
2223: high = nrow;
2224: for (l=0; l<n; l++) { /* loop over added columns */
2225: if (in[l] < 0) continue;
2226: if (PetscUnlikelyDebug(in[l] >= A->cmap->n)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2227: col = in[l]; bcol = col/bs;
2228: ridx = row % bs; cidx = col % bs;
2229: if (!A->structure_only) {
2230: if (roworiented) {
2231: value = v[l + k*n];
2232: } else {
2233: value = v[k + l*m];
2234: }
2235: }
2236: if (col <= lastcol) low = 0; else high = nrow;
2237: lastcol = col;
2238: while (high-low > 7) {
2239: t = (low+high)/2;
2240: if (rp[t] > bcol) high = t;
2241: else low = t;
2242: }
2243: for (i=low; i<high; i++) {
2244: if (rp[i] > bcol) break;
2245: if (rp[i] == bcol) {
2246: bap = ap + bs2*i + bs*cidx + ridx;
2247: if (!A->structure_only) {
2248: if (is == ADD_VALUES) *bap += value;
2249: else *bap = value;
2250: }
2251: goto noinsert1;
2252: }
2253: }
2254: if (nonew == 1) goto noinsert1;
2255: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2256: if (A->structure_only) {
2257: MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar);
2258: } else {
2259: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2260: }
2261: N = nrow++ - 1; high++;
2262: /* shift up all the later entries in this row */
2263: PetscArraymove(rp+i+1,rp+i,N-i+1);
2264: rp[i] = bcol;
2265: if (!A->structure_only) {
2266: PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));
2267: PetscArrayzero(ap+bs2*i,bs2);
2268: ap[bs2*i + bs*cidx + ridx] = value;
2269: }
2270: a->nz++;
2271: A->nonzerostate++;
2272: noinsert1:;
2273: low = i;
2274: }
2275: ailen[brow] = nrow;
2276: }
2277: return(0);
2278: }
2280: PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2281: {
2282: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
2283: Mat outA;
2285: PetscBool row_identity,col_identity;
2288: if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2289: ISIdentity(row,&row_identity);
2290: ISIdentity(col,&col_identity);
2291: if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2293: outA = inA;
2294: inA->factortype = MAT_FACTOR_LU;
2295: PetscFree(inA->solvertype);
2296: PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);
2298: MatMarkDiagonal_SeqBAIJ(inA);
2300: PetscObjectReference((PetscObject)row);
2301: ISDestroy(&a->row);
2302: a->row = row;
2303: PetscObjectReference((PetscObject)col);
2304: ISDestroy(&a->col);
2305: a->col = col;
2307: /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2308: ISDestroy(&a->icol);
2309: ISInvertPermutation(col,PETSC_DECIDE,&a->icol);
2310: PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);
2312: MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));
2313: if (!a->solve_work) {
2314: PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);
2315: PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));
2316: }
2317: MatLUFactorNumeric(outA,inA,info);
2318: return(0);
2319: }
2321: PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2322: {
2323: Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2324: PetscInt i,nz,mbs;
2327: nz = baij->maxnz;
2328: mbs = baij->mbs;
2329: for (i=0; i<nz; i++) {
2330: baij->j[i] = indices[i];
2331: }
2332: baij->nz = nz;
2333: for (i=0; i<mbs; i++) {
2334: baij->ilen[i] = baij->imax[i];
2335: }
2336: return(0);
2337: }
2339: /*@
2340: MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2341: in the matrix.
2343: Input Parameters:
2344: + mat - the SeqBAIJ matrix
2345: - indices - the column indices
2347: Level: advanced
2349: Notes:
2350: This can be called if you have precomputed the nonzero structure of the
2351: matrix and want to provide it to the matrix object to improve the performance
2352: of the MatSetValues() operation.
2354: You MUST have set the correct numbers of nonzeros per row in the call to
2355: MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2357: MUST be called before any calls to MatSetValues();
2359: @*/
2360: PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2361: {
2367: PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));
2368: return(0);
2369: }
2371: PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2372: {
2373: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2375: PetscInt i,j,n,row,bs,*ai,*aj,mbs;
2376: PetscReal atmp;
2377: PetscScalar *x,zero = 0.0;
2378: MatScalar *aa;
2379: PetscInt ncols,brow,krow,kcol;
2382: if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2383: bs = A->rmap->bs;
2384: aa = a->a;
2385: ai = a->i;
2386: aj = a->j;
2387: mbs = a->mbs;
2389: VecSet(v,zero);
2390: VecGetArray(v,&x);
2391: VecGetLocalSize(v,&n);
2392: if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2393: for (i=0; i<mbs; i++) {
2394: ncols = ai[1] - ai[0]; ai++;
2395: brow = bs*i;
2396: for (j=0; j<ncols; j++) {
2397: for (kcol=0; kcol<bs; kcol++) {
2398: for (krow=0; krow<bs; krow++) {
2399: atmp = PetscAbsScalar(*aa);aa++;
2400: row = brow + krow; /* row index */
2401: if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2402: }
2403: }
2404: aj++;
2405: }
2406: }
2407: VecRestoreArray(v,&x);
2408: return(0);
2409: }
2411: PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2412: {
2416: /* If the two matrices have the same copy implementation, use fast copy. */
2417: if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2418: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2419: Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2420: PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2422: if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2423: if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2424: PetscArraycpy(b->a,a->a,bs2*a->i[ambs]);
2425: PetscObjectStateIncrease((PetscObject)B);
2426: } else {
2427: MatCopy_Basic(A,B,str);
2428: }
2429: return(0);
2430: }
2432: PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2433: {
2437: MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL);
2438: return(0);
2439: }
2441: static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2442: {
2443: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2446: *array = a->a;
2447: return(0);
2448: }
2450: static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2451: {
2453: *array = NULL;
2454: return(0);
2455: }
2457: PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2458: {
2459: PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2460: Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data;
2461: Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data;
2465: /* Set the number of nonzeros in the new matrix */
2466: MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);
2467: return(0);
2468: }
2470: PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2471: {
2472: Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2474: PetscInt bs=Y->rmap->bs,bs2=bs*bs;
2475: PetscBLASInt one=1;
2478: if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) {
2479: PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE;
2480: if (e) {
2481: PetscArraycmp(x->i,y->i,x->mbs+1,&e);
2482: if (e) {
2483: PetscArraycmp(x->j,y->j,x->i[x->mbs],&e);
2484: if (e) str = SAME_NONZERO_PATTERN;
2485: }
2486: }
2487: if (!e && str == SAME_NONZERO_PATTERN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"MatStructure is not SAME_NONZERO_PATTERN");
2488: }
2489: if (str == SAME_NONZERO_PATTERN) {
2490: PetscScalar alpha = a;
2491: PetscBLASInt bnz;
2492: PetscBLASIntCast(x->nz*bs2,&bnz);
2493: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2494: PetscObjectStateIncrease((PetscObject)Y);
2495: } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2496: MatAXPY_Basic(Y,a,X,str);
2497: } else {
2498: Mat B;
2499: PetscInt *nnz;
2500: if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2501: PetscMalloc1(Y->rmap->N,&nnz);
2502: MatCreate(PetscObjectComm((PetscObject)Y),&B);
2503: PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);
2504: MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);
2505: MatSetBlockSizesFromMats(B,Y,Y);
2506: MatSetType(B,(MatType) ((PetscObject)Y)->type_name);
2507: MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);
2508: MatSeqBAIJSetPreallocation(B,bs,0,nnz);
2509: MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
2510: MatHeaderReplace(Y,&B);
2511: PetscFree(nnz);
2512: }
2513: return(0);
2514: }
2516: PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A)
2517: {
2518: #if defined(PETSC_USE_COMPLEX)
2519: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2520: PetscInt i,nz = a->bs2*a->i[a->mbs];
2521: MatScalar *aa = a->a;
2524: for (i=0; i<nz; i++) aa[i] = PetscConj(aa[i]);
2525: #else
2527: #endif
2528: return(0);
2529: }
2531: PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2532: {
2533: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2534: PetscInt i,nz = a->bs2*a->i[a->mbs];
2535: MatScalar *aa = a->a;
2538: for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2539: return(0);
2540: }
2542: PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2543: {
2544: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2545: PetscInt i,nz = a->bs2*a->i[a->mbs];
2546: MatScalar *aa = a->a;
2549: for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2550: return(0);
2551: }
2553: /*
2554: Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code
2555: */
2556: PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done)
2557: {
2558: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2560: PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2561: PetscInt nz = a->i[m],row,*jj,mr,col;
2564: *nn = n;
2565: if (!ia) return(0);
2566: if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2567: else {
2568: PetscCalloc1(n,&collengths);
2569: PetscMalloc1(n+1,&cia);
2570: PetscMalloc1(nz,&cja);
2571: jj = a->j;
2572: for (i=0; i<nz; i++) {
2573: collengths[jj[i]]++;
2574: }
2575: cia[0] = oshift;
2576: for (i=0; i<n; i++) {
2577: cia[i+1] = cia[i] + collengths[i];
2578: }
2579: PetscArrayzero(collengths,n);
2580: jj = a->j;
2581: for (row=0; row<m; row++) {
2582: mr = a->i[row+1] - a->i[row];
2583: for (i=0; i<mr; i++) {
2584: col = *jj++;
2586: cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2587: }
2588: }
2589: PetscFree(collengths);
2590: *ia = cia; *ja = cja;
2591: }
2592: return(0);
2593: }
2595: PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done)
2596: {
2600: if (!ia) return(0);
2601: PetscFree(*ia);
2602: PetscFree(*ja);
2603: return(0);
2604: }
2606: /*
2607: MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2608: MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2609: spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2610: */
2611: PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done)
2612: {
2613: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2615: PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2616: PetscInt nz = a->i[m],row,*jj,mr,col;
2617: PetscInt *cspidx;
2620: *nn = n;
2621: if (!ia) return(0);
2623: PetscCalloc1(n,&collengths);
2624: PetscMalloc1(n+1,&cia);
2625: PetscMalloc1(nz,&cja);
2626: PetscMalloc1(nz,&cspidx);
2627: jj = a->j;
2628: for (i=0; i<nz; i++) {
2629: collengths[jj[i]]++;
2630: }
2631: cia[0] = oshift;
2632: for (i=0; i<n; i++) {
2633: cia[i+1] = cia[i] + collengths[i];
2634: }
2635: PetscArrayzero(collengths,n);
2636: jj = a->j;
2637: for (row=0; row<m; row++) {
2638: mr = a->i[row+1] - a->i[row];
2639: for (i=0; i<mr; i++) {
2640: col = *jj++;
2641: cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2642: cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2643: }
2644: }
2645: PetscFree(collengths);
2646: *ia = cia;
2647: *ja = cja;
2648: *spidx = cspidx;
2649: return(0);
2650: }
2652: PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done)
2653: {
2657: MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);
2658: PetscFree(*spidx);
2659: return(0);
2660: }
2662: PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2663: {
2665: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data;
2668: if (!Y->preallocated || !aij->nz) {
2669: MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);
2670: }
2671: MatShift_Basic(Y,a);
2672: return(0);
2673: }
2675: /* -------------------------------------------------------------------*/
2676: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2677: MatGetRow_SeqBAIJ,
2678: MatRestoreRow_SeqBAIJ,
2679: MatMult_SeqBAIJ_N,
2680: /* 4*/ MatMultAdd_SeqBAIJ_N,
2681: MatMultTranspose_SeqBAIJ,
2682: MatMultTransposeAdd_SeqBAIJ,
2683: NULL,
2684: NULL,
2685: NULL,
2686: /* 10*/ NULL,
2687: MatLUFactor_SeqBAIJ,
2688: NULL,
2689: NULL,
2690: MatTranspose_SeqBAIJ,
2691: /* 15*/ MatGetInfo_SeqBAIJ,
2692: MatEqual_SeqBAIJ,
2693: MatGetDiagonal_SeqBAIJ,
2694: MatDiagonalScale_SeqBAIJ,
2695: MatNorm_SeqBAIJ,
2696: /* 20*/ NULL,
2697: MatAssemblyEnd_SeqBAIJ,
2698: MatSetOption_SeqBAIJ,
2699: MatZeroEntries_SeqBAIJ,
2700: /* 24*/ MatZeroRows_SeqBAIJ,
2701: NULL,
2702: NULL,
2703: NULL,
2704: NULL,
2705: /* 29*/ MatSetUp_SeqBAIJ,
2706: NULL,
2707: NULL,
2708: NULL,
2709: NULL,
2710: /* 34*/ MatDuplicate_SeqBAIJ,
2711: NULL,
2712: NULL,
2713: MatILUFactor_SeqBAIJ,
2714: NULL,
2715: /* 39*/ MatAXPY_SeqBAIJ,
2716: MatCreateSubMatrices_SeqBAIJ,
2717: MatIncreaseOverlap_SeqBAIJ,
2718: MatGetValues_SeqBAIJ,
2719: MatCopy_SeqBAIJ,
2720: /* 44*/ NULL,
2721: MatScale_SeqBAIJ,
2722: MatShift_SeqBAIJ,
2723: NULL,
2724: MatZeroRowsColumns_SeqBAIJ,
2725: /* 49*/ NULL,
2726: MatGetRowIJ_SeqBAIJ,
2727: MatRestoreRowIJ_SeqBAIJ,
2728: MatGetColumnIJ_SeqBAIJ,
2729: MatRestoreColumnIJ_SeqBAIJ,
2730: /* 54*/ MatFDColoringCreate_SeqXAIJ,
2731: NULL,
2732: NULL,
2733: NULL,
2734: MatSetValuesBlocked_SeqBAIJ,
2735: /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2736: MatDestroy_SeqBAIJ,
2737: MatView_SeqBAIJ,
2738: NULL,
2739: NULL,
2740: /* 64*/ NULL,
2741: NULL,
2742: NULL,
2743: NULL,
2744: NULL,
2745: /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2746: NULL,
2747: MatConvert_Basic,
2748: NULL,
2749: NULL,
2750: /* 74*/ NULL,
2751: MatFDColoringApply_BAIJ,
2752: NULL,
2753: NULL,
2754: NULL,
2755: /* 79*/ NULL,
2756: NULL,
2757: NULL,
2758: NULL,
2759: MatLoad_SeqBAIJ,
2760: /* 84*/ NULL,
2761: NULL,
2762: NULL,
2763: NULL,
2764: NULL,
2765: /* 89*/ NULL,
2766: NULL,
2767: NULL,
2768: NULL,
2769: NULL,
2770: /* 94*/ NULL,
2771: NULL,
2772: NULL,
2773: NULL,
2774: NULL,
2775: /* 99*/ NULL,
2776: NULL,
2777: NULL,
2778: MatConjugate_SeqBAIJ,
2779: NULL,
2780: /*104*/ NULL,
2781: MatRealPart_SeqBAIJ,
2782: MatImaginaryPart_SeqBAIJ,
2783: NULL,
2784: NULL,
2785: /*109*/ NULL,
2786: NULL,
2787: NULL,
2788: NULL,
2789: MatMissingDiagonal_SeqBAIJ,
2790: /*114*/ NULL,
2791: NULL,
2792: NULL,
2793: NULL,
2794: NULL,
2795: /*119*/ NULL,
2796: NULL,
2797: MatMultHermitianTranspose_SeqBAIJ,
2798: MatMultHermitianTransposeAdd_SeqBAIJ,
2799: NULL,
2800: /*124*/ NULL,
2801: MatGetColumnReductions_SeqBAIJ,
2802: MatInvertBlockDiagonal_SeqBAIJ,
2803: NULL,
2804: NULL,
2805: /*129*/ NULL,
2806: NULL,
2807: NULL,
2808: NULL,
2809: NULL,
2810: /*134*/ NULL,
2811: NULL,
2812: NULL,
2813: NULL,
2814: NULL,
2815: /*139*/ MatSetBlockSizes_Default,
2816: NULL,
2817: NULL,
2818: MatFDColoringSetUp_SeqXAIJ,
2819: NULL,
2820: /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
2821: MatDestroySubMatrices_SeqBAIJ
2822: };
2824: PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
2825: {
2826: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data;
2827: PetscInt nz = aij->i[aij->mbs]*aij->bs2;
2831: if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2833: /* allocate space for values if not already there */
2834: if (!aij->saved_values) {
2835: PetscMalloc1(nz+1,&aij->saved_values);
2836: PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));
2837: }
2839: /* copy values over */
2840: PetscArraycpy(aij->saved_values,aij->a,nz);
2841: return(0);
2842: }
2844: PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
2845: {
2846: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data;
2848: PetscInt nz = aij->i[aij->mbs]*aij->bs2;
2851: if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2852: if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2854: /* copy values over */
2855: PetscArraycpy(aij->a,aij->saved_values,nz);
2856: return(0);
2857: }
2859: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2860: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2862: PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2863: {
2864: Mat_SeqBAIJ *b;
2866: PetscInt i,mbs,nbs,bs2;
2867: PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2870: if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2871: if (nz == MAT_SKIP_ALLOCATION) {
2872: skipallocation = PETSC_TRUE;
2873: nz = 0;
2874: }
2876: MatSetBlockSize(B,PetscAbs(bs));
2877: PetscLayoutSetUp(B->rmap);
2878: PetscLayoutSetUp(B->cmap);
2879: PetscLayoutGetBlockSize(B->rmap,&bs);
2881: B->preallocated = PETSC_TRUE;
2883: mbs = B->rmap->n/bs;
2884: nbs = B->cmap->n/bs;
2885: bs2 = bs*bs;
2887: if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2889: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2890: if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2891: if (nnz) {
2892: for (i=0; i<mbs; i++) {
2893: if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2894: if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2895: }
2896: }
2898: b = (Mat_SeqBAIJ*)B->data;
2899: PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");
2900: PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);
2901: PetscOptionsEnd();
2903: if (!flg) {
2904: switch (bs) {
2905: case 1:
2906: B->ops->mult = MatMult_SeqBAIJ_1;
2907: B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2908: break;
2909: case 2:
2910: B->ops->mult = MatMult_SeqBAIJ_2;
2911: B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2912: break;
2913: case 3:
2914: B->ops->mult = MatMult_SeqBAIJ_3;
2915: B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2916: break;
2917: case 4:
2918: B->ops->mult = MatMult_SeqBAIJ_4;
2919: B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2920: break;
2921: case 5:
2922: B->ops->mult = MatMult_SeqBAIJ_5;
2923: B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2924: break;
2925: case 6:
2926: B->ops->mult = MatMult_SeqBAIJ_6;
2927: B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2928: break;
2929: case 7:
2930: B->ops->mult = MatMult_SeqBAIJ_7;
2931: B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2932: break;
2933: case 9:
2934: {
2935: PetscInt version = 1;
2936: PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);
2937: switch (version) {
2938: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2939: case 1:
2940: B->ops->mult = MatMult_SeqBAIJ_9_AVX2;
2941: B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
2942: PetscInfo1((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %D\n",bs);
2943: break;
2944: #endif
2945: default:
2946: B->ops->mult = MatMult_SeqBAIJ_N;
2947: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2948: PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);
2949: break;
2950: }
2951: break;
2952: }
2953: case 11:
2954: B->ops->mult = MatMult_SeqBAIJ_11;
2955: B->ops->multadd = MatMultAdd_SeqBAIJ_11;
2956: break;
2957: case 12:
2958: {
2959: PetscInt version = 1;
2960: PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);
2961: switch (version) {
2962: case 1:
2963: B->ops->mult = MatMult_SeqBAIJ_12_ver1;
2964: B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
2965: PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);
2966: break;
2967: case 2:
2968: B->ops->mult = MatMult_SeqBAIJ_12_ver2;
2969: B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2;
2970: PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);
2971: break;
2972: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2973: case 3:
2974: B->ops->mult = MatMult_SeqBAIJ_12_AVX2;
2975: B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
2976: PetscInfo1((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %D\n",bs);
2977: break;
2978: #endif
2979: default:
2980: B->ops->mult = MatMult_SeqBAIJ_N;
2981: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2982: PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);
2983: break;
2984: }
2985: break;
2986: }
2987: case 15:
2988: {
2989: PetscInt version = 1;
2990: PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);
2991: switch (version) {
2992: case 1:
2993: B->ops->mult = MatMult_SeqBAIJ_15_ver1;
2994: PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);
2995: break;
2996: case 2:
2997: B->ops->mult = MatMult_SeqBAIJ_15_ver2;
2998: PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);
2999: break;
3000: case 3:
3001: B->ops->mult = MatMult_SeqBAIJ_15_ver3;
3002: PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);
3003: break;
3004: case 4:
3005: B->ops->mult = MatMult_SeqBAIJ_15_ver4;
3006: PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);
3007: break;
3008: default:
3009: B->ops->mult = MatMult_SeqBAIJ_N;
3010: PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);
3011: break;
3012: }
3013: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3014: break;
3015: }
3016: default:
3017: B->ops->mult = MatMult_SeqBAIJ_N;
3018: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3019: PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);
3020: break;
3021: }
3022: }
3023: B->ops->sor = MatSOR_SeqBAIJ;
3024: b->mbs = mbs;
3025: b->nbs = nbs;
3026: if (!skipallocation) {
3027: if (!b->imax) {
3028: PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);
3029: PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));
3031: b->free_imax_ilen = PETSC_TRUE;
3032: }
3033: /* b->ilen will count nonzeros in each block row so far. */
3034: for (i=0; i<mbs; i++) b->ilen[i] = 0;
3035: if (!nnz) {
3036: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3037: else if (nz < 0) nz = 1;
3038: nz = PetscMin(nz,nbs);
3039: for (i=0; i<mbs; i++) b->imax[i] = nz;
3040: PetscIntMultError(nz,mbs,&nz);
3041: } else {
3042: PetscInt64 nz64 = 0;
3043: for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz64 += nnz[i];}
3044: PetscIntCast(nz64,&nz);
3045: }
3047: /* allocate the matrix space */
3048: MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);
3049: if (B->structure_only) {
3050: PetscMalloc1(nz,&b->j);
3051: PetscMalloc1(B->rmap->N+1,&b->i);
3052: PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));
3053: } else {
3054: PetscInt nzbs2 = 0;
3055: PetscIntMultError(nz,bs2,&nzbs2);
3056: PetscMalloc3(nzbs2,&b->a,nz,&b->j,B->rmap->N+1,&b->i);
3057: PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));
3058: PetscArrayzero(b->a,nz*bs2);
3059: }
3060: PetscArrayzero(b->j,nz);
3062: if (B->structure_only) {
3063: b->singlemalloc = PETSC_FALSE;
3064: b->free_a = PETSC_FALSE;
3065: } else {
3066: b->singlemalloc = PETSC_TRUE;
3067: b->free_a = PETSC_TRUE;
3068: }
3069: b->free_ij = PETSC_TRUE;
3071: b->i[0] = 0;
3072: for (i=1; i<mbs+1; i++) {
3073: b->i[i] = b->i[i-1] + b->imax[i-1];
3074: }
3076: } else {
3077: b->free_a = PETSC_FALSE;
3078: b->free_ij = PETSC_FALSE;
3079: }
3081: b->bs2 = bs2;
3082: b->mbs = mbs;
3083: b->nz = 0;
3084: b->maxnz = nz;
3085: B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3086: B->was_assembled = PETSC_FALSE;
3087: B->assembled = PETSC_FALSE;
3088: if (realalloc) {MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);}
3089: return(0);
3090: }
3092: PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3093: {
3094: PetscInt i,m,nz,nz_max=0,*nnz;
3095: PetscScalar *values=NULL;
3096: PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
3100: if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3101: PetscLayoutSetBlockSize(B->rmap,bs);
3102: PetscLayoutSetBlockSize(B->cmap,bs);
3103: PetscLayoutSetUp(B->rmap);
3104: PetscLayoutSetUp(B->cmap);
3105: PetscLayoutGetBlockSize(B->rmap,&bs);
3106: m = B->rmap->n/bs;
3108: if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
3109: PetscMalloc1(m+1, &nnz);
3110: for (i=0; i<m; i++) {
3111: nz = ii[i+1]- ii[i];
3112: if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
3113: nz_max = PetscMax(nz_max, nz);
3114: nnz[i] = nz;
3115: }
3116: MatSeqBAIJSetPreallocation(B,bs,0,nnz);
3117: PetscFree(nnz);
3119: values = (PetscScalar*)V;
3120: if (!values) {
3121: PetscCalloc1(bs*bs*(nz_max+1),&values);
3122: }
3123: for (i=0; i<m; i++) {
3124: PetscInt ncols = ii[i+1] - ii[i];
3125: const PetscInt *icols = jj + ii[i];
3126: if (bs == 1 || !roworiented) {
3127: const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3128: MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);
3129: } else {
3130: PetscInt j;
3131: for (j=0; j<ncols; j++) {
3132: const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
3133: MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);
3134: }
3135: }
3136: }
3137: if (!V) { PetscFree(values); }
3138: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3139: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3140: MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3141: return(0);
3142: }
3144: /*@C
3145: MatSeqBAIJGetArray - gives access to the array where the data for a MATSEQBAIJ matrix is stored
3147: Not Collective
3149: Input Parameter:
3150: . mat - a MATSEQBAIJ matrix
3152: Output Parameter:
3153: . array - pointer to the data
3155: Level: intermediate
3157: .seealso: MatSeqBAIJRestoreArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray()
3158: @*/
3159: PetscErrorCode MatSeqBAIJGetArray(Mat A,PetscScalar **array)
3160: {
3164: PetscUseMethod(A,"MatSeqBAIJGetArray_C",(Mat,PetscScalar**),(A,array));
3165: return(0);
3166: }
3168: /*@C
3169: MatSeqBAIJRestoreArray - returns access to the array where the data for a MATSEQBAIJ matrix is stored obtained by MatSeqBAIJGetArray()
3171: Not Collective
3173: Input Parameters:
3174: + mat - a MATSEQBAIJ matrix
3175: - array - pointer to the data
3177: Level: intermediate
3179: .seealso: MatSeqBAIJGetArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray()
3180: @*/
3181: PetscErrorCode MatSeqBAIJRestoreArray(Mat A,PetscScalar **array)
3182: {
3186: PetscUseMethod(A,"MatSeqBAIJRestoreArray_C",(Mat,PetscScalar**),(A,array));
3187: return(0);
3188: }
3190: /*MC
3191: MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3192: block sparse compressed row format.
3194: Options Database Keys:
3195: + -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3196: - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS)
3198: Level: beginner
3200: Notes:
3201: MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
3202: space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
3204: Run with -info to see what version of the matrix-vector product is being used
3206: .seealso: MatCreateSeqBAIJ()
3207: M*/
3209: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3211: PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3212: {
3214: PetscMPIInt size;
3215: Mat_SeqBAIJ *b;
3218: MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);
3219: if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3221: PetscNewLog(B,&b);
3222: B->data = (void*)b;
3223: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
3225: b->row = NULL;
3226: b->col = NULL;
3227: b->icol = NULL;
3228: b->reallocs = 0;
3229: b->saved_values = NULL;
3231: b->roworiented = PETSC_TRUE;
3232: b->nonew = 0;
3233: b->diag = NULL;
3234: B->spptr = NULL;
3235: B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2;
3236: b->keepnonzeropattern = PETSC_FALSE;
3238: PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJGetArray_C",MatSeqBAIJGetArray_SeqBAIJ);
3239: PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJRestoreArray_C",MatSeqBAIJRestoreArray_SeqBAIJ);
3240: PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);
3241: PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);
3242: PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);
3243: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);
3244: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);
3245: PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);
3246: PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);
3247: PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);
3248: #if defined(PETSC_HAVE_HYPRE)
3249: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);
3250: #endif
3251: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);
3252: PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);
3253: return(0);
3254: }
3256: PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3257: {
3258: Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3260: PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3263: if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3265: if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3266: c->imax = a->imax;
3267: c->ilen = a->ilen;
3268: c->free_imax_ilen = PETSC_FALSE;
3269: } else {
3270: PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);
3271: PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));
3272: for (i=0; i<mbs; i++) {
3273: c->imax[i] = a->imax[i];
3274: c->ilen[i] = a->ilen[i];
3275: }
3276: c->free_imax_ilen = PETSC_TRUE;
3277: }
3279: /* allocate the matrix space */
3280: if (mallocmatspace) {
3281: if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3282: PetscCalloc1(bs2*nz,&c->a);
3283: PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));
3285: c->i = a->i;
3286: c->j = a->j;
3287: c->singlemalloc = PETSC_FALSE;
3288: c->free_a = PETSC_TRUE;
3289: c->free_ij = PETSC_FALSE;
3290: c->parent = A;
3291: C->preallocated = PETSC_TRUE;
3292: C->assembled = PETSC_TRUE;
3294: PetscObjectReference((PetscObject)A);
3295: MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3296: MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3297: } else {
3298: PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);
3299: PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));
3301: c->singlemalloc = PETSC_TRUE;
3302: c->free_a = PETSC_TRUE;
3303: c->free_ij = PETSC_TRUE;
3305: PetscArraycpy(c->i,a->i,mbs+1);
3306: if (mbs > 0) {
3307: PetscArraycpy(c->j,a->j,nz);
3308: if (cpvalues == MAT_COPY_VALUES) {
3309: PetscArraycpy(c->a,a->a,bs2*nz);
3310: } else {
3311: PetscArrayzero(c->a,bs2*nz);
3312: }
3313: }
3314: C->preallocated = PETSC_TRUE;
3315: C->assembled = PETSC_TRUE;
3316: }
3317: }
3319: c->roworiented = a->roworiented;
3320: c->nonew = a->nonew;
3322: PetscLayoutReference(A->rmap,&C->rmap);
3323: PetscLayoutReference(A->cmap,&C->cmap);
3325: c->bs2 = a->bs2;
3326: c->mbs = a->mbs;
3327: c->nbs = a->nbs;
3329: if (a->diag) {
3330: if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3331: c->diag = a->diag;
3332: c->free_diag = PETSC_FALSE;
3333: } else {
3334: PetscMalloc1(mbs+1,&c->diag);
3335: PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));
3336: for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3337: c->free_diag = PETSC_TRUE;
3338: }
3339: } else c->diag = NULL;
3341: c->nz = a->nz;
3342: c->maxnz = a->nz; /* Since we allocate exactly the right amount */
3343: c->solve_work = NULL;
3344: c->mult_work = NULL;
3345: c->sor_workt = NULL;
3346: c->sor_work = NULL;
3348: c->compressedrow.use = a->compressedrow.use;
3349: c->compressedrow.nrows = a->compressedrow.nrows;
3350: if (a->compressedrow.use) {
3351: i = a->compressedrow.nrows;
3352: PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);
3353: PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));
3354: PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1);
3355: PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i);
3356: } else {
3357: c->compressedrow.use = PETSC_FALSE;
3358: c->compressedrow.i = NULL;
3359: c->compressedrow.rindex = NULL;
3360: }
3361: C->nonzerostate = A->nonzerostate;
3363: PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);
3364: return(0);
3365: }
3367: PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3368: {
3372: MatCreate(PetscObjectComm((PetscObject)A),B);
3373: MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);
3374: MatSetType(*B,MATSEQBAIJ);
3375: MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);
3376: return(0);
3377: }
3379: /* Used for both SeqBAIJ and SeqSBAIJ matrices */
3380: PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)
3381: {
3382: PetscInt header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k;
3383: PetscInt *rowidxs,*colidxs;
3384: PetscScalar *matvals;
3388: PetscViewerSetUp(viewer);
3390: /* read matrix header */
3391: PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);
3392: if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3393: M = header[1]; N = header[2]; nz = header[3];
3394: if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3395: if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3396: if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as SeqBAIJ");
3398: /* set block sizes from the viewer's .info file */
3399: MatLoad_Binary_BlockSizes(mat,viewer);
3400: /* set local and global sizes if not set already */
3401: if (mat->rmap->n < 0) mat->rmap->n = M;
3402: if (mat->cmap->n < 0) mat->cmap->n = N;
3403: if (mat->rmap->N < 0) mat->rmap->N = M;
3404: if (mat->cmap->N < 0) mat->cmap->N = N;
3405: PetscLayoutSetUp(mat->rmap);
3406: PetscLayoutSetUp(mat->cmap);
3408: /* check if the matrix sizes are correct */
3409: MatGetSize(mat,&rows,&cols);
3410: if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3411: MatGetBlockSize(mat,&bs);
3412: MatGetLocalSize(mat,&m,&n);
3413: mbs = m/bs; nbs = n/bs;
3415: /* read in row lengths, column indices and nonzero values */
3416: PetscMalloc1(m+1,&rowidxs);
3417: PetscViewerBinaryRead(viewer,rowidxs+1,m,NULL,PETSC_INT);
3418: rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3419: sum = rowidxs[m];
3420: if (sum != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3422: /* read in column indices and nonzero values */
3423: PetscMalloc2(rowidxs[m],&colidxs,nz,&matvals);
3424: PetscViewerBinaryRead(viewer,colidxs,rowidxs[m],NULL,PETSC_INT);
3425: PetscViewerBinaryRead(viewer,matvals,rowidxs[m],NULL,PETSC_SCALAR);
3427: { /* preallocate matrix storage */
3428: PetscBT bt; /* helper bit set to count nonzeros */
3429: PetscInt *nnz;
3430: PetscBool sbaij;
3432: PetscBTCreate(nbs,&bt);
3433: PetscCalloc1(mbs,&nnz);
3434: PetscObjectTypeCompare((PetscObject)mat,MATSEQSBAIJ,&sbaij);
3435: for (i=0; i<mbs; i++) {
3436: PetscBTMemzero(nbs,bt);
3437: for (k=0; k<bs; k++) {
3438: PetscInt row = bs*i + k;
3439: for (j=rowidxs[row]; j<rowidxs[row+1]; j++) {
3440: PetscInt col = colidxs[j];
3441: if (!sbaij || col >= row)
3442: if (!PetscBTLookupSet(bt,col/bs)) nnz[i]++;
3443: }
3444: }
3445: }
3446: PetscBTDestroy(&bt);
3447: MatSeqBAIJSetPreallocation(mat,bs,0,nnz);
3448: MatSeqSBAIJSetPreallocation(mat,bs,0,nnz);
3449: PetscFree(nnz);
3450: }
3452: /* store matrix values */
3453: for (i=0; i<m; i++) {
3454: PetscInt row = i, s = rowidxs[i], e = rowidxs[i+1];
3455: (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);
3456: }
3458: PetscFree(rowidxs);
3459: PetscFree2(colidxs,matvals);
3460: MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);
3461: MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);
3462: return(0);
3463: }
3465: PetscErrorCode MatLoad_SeqBAIJ(Mat mat,PetscViewer viewer)
3466: {
3468: PetscBool isbinary;
3471: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
3472: if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
3473: MatLoad_SeqBAIJ_Binary(mat,viewer);
3474: return(0);
3475: }
3477: /*@C
3478: MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3479: compressed row) format. For good matrix assembly performance the
3480: user should preallocate the matrix storage by setting the parameter nz
3481: (or the array nnz). By setting these parameters accurately, performance
3482: during matrix assembly can be increased by more than a factor of 50.
3484: Collective
3486: Input Parameters:
3487: + comm - MPI communicator, set to PETSC_COMM_SELF
3488: . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3489: blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3490: . m - number of rows
3491: . n - number of columns
3492: . nz - number of nonzero blocks per block row (same for all rows)
3493: - nnz - array containing the number of nonzero blocks in the various block rows
3494: (possibly different for each block row) or NULL
3496: Output Parameter:
3497: . A - the matrix
3499: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3500: MatXXXXSetPreallocation() paradigm instead of this routine directly.
3501: [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3503: Options Database Keys:
3504: + -mat_no_unroll - uses code that does not unroll the loops in the
3505: block calculations (much slower)
3506: - -mat_block_size - size of the blocks to use
3508: Level: intermediate
3510: Notes:
3511: The number of rows and columns must be divisible by blocksize.
3513: If the nnz parameter is given then the nz parameter is ignored
3515: A nonzero block is any block that as 1 or more nonzeros in it
3517: The block AIJ format is fully compatible with standard Fortran 77
3518: storage. That is, the stored row and column indices can begin at
3519: either one (as in Fortran) or zero. See the users' manual for details.
3521: Specify the preallocated storage with either nz or nnz (not both).
3522: Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3523: allocation. See Users-Manual: ch_mat for details.
3524: matrices.
3526: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3527: @*/
3528: PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3529: {
3533: MatCreate(comm,A);
3534: MatSetSizes(*A,m,n,m,n);
3535: MatSetType(*A,MATSEQBAIJ);
3536: MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);
3537: return(0);
3538: }
3540: /*@C
3541: MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3542: per row in the matrix. For good matrix assembly performance the
3543: user should preallocate the matrix storage by setting the parameter nz
3544: (or the array nnz). By setting these parameters accurately, performance
3545: during matrix assembly can be increased by more than a factor of 50.
3547: Collective
3549: Input Parameters:
3550: + B - the matrix
3551: . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3552: blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3553: . nz - number of block nonzeros per block row (same for all rows)
3554: - nnz - array containing the number of block nonzeros in the various block rows
3555: (possibly different for each block row) or NULL
3557: Options Database Keys:
3558: + -mat_no_unroll - uses code that does not unroll the loops in the
3559: block calculations (much slower)
3560: - -mat_block_size - size of the blocks to use
3562: Level: intermediate
3564: Notes:
3565: If the nnz parameter is given then the nz parameter is ignored
3567: You can call MatGetInfo() to get information on how effective the preallocation was;
3568: for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3569: You can also run with the option -info and look for messages with the string
3570: malloc in them to see if additional memory allocation was needed.
3572: The block AIJ format is fully compatible with standard Fortran 77
3573: storage. That is, the stored row and column indices can begin at
3574: either one (as in Fortran) or zero. See the users' manual for details.
3576: Specify the preallocated storage with either nz or nnz (not both).
3577: Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3578: allocation. See Users-Manual: ch_mat for details.
3580: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3581: @*/
3582: PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3583: {
3590: PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));
3591: return(0);
3592: }
3594: /*@C
3595: MatSeqBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values
3597: Collective
3599: Input Parameters:
3600: + B - the matrix
3601: . i - the indices into j for the start of each local row (starts with zero)
3602: . j - the column indices for each local row (starts with zero) these must be sorted for each row
3603: - v - optional values in the matrix
3605: Level: advanced
3607: Notes:
3608: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs
3609: may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3610: over rows within a block and the last index is over columns within a block row. Fortran programs will likely set
3611: MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3612: block column and the second index is over columns within a block.
3614: Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3616: .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3617: @*/
3618: PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3619: {
3626: PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));
3627: return(0);
3628: }
3630: /*@
3631: MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3633: Collective
3635: Input Parameters:
3636: + comm - must be an MPI communicator of size 1
3637: . bs - size of block
3638: . m - number of rows
3639: . n - number of columns
3640: . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3641: . j - column indices
3642: - a - matrix values
3644: Output Parameter:
3645: . mat - the matrix
3647: Level: advanced
3649: Notes:
3650: The i, j, and a arrays are not copied by this routine, the user must free these arrays
3651: once the matrix is destroyed
3653: You cannot set new nonzero locations into this matrix, that will generate an error.
3655: The i and j indices are 0 based
3657: When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3659: The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3660: the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3661: block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory
3662: with column-major ordering within blocks.
3664: .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3666: @*/
3667: PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat)
3668: {
3670: PetscInt ii;
3671: Mat_SeqBAIJ *baij;
3674: if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3675: if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3677: MatCreate(comm,mat);
3678: MatSetSizes(*mat,m,n,m,n);
3679: MatSetType(*mat,MATSEQBAIJ);
3680: MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,NULL);
3681: baij = (Mat_SeqBAIJ*)(*mat)->data;
3682: PetscMalloc2(m,&baij->imax,m,&baij->ilen);
3683: PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));
3685: baij->i = i;
3686: baij->j = j;
3687: baij->a = a;
3689: baij->singlemalloc = PETSC_FALSE;
3690: baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3691: baij->free_a = PETSC_FALSE;
3692: baij->free_ij = PETSC_FALSE;
3694: for (ii=0; ii<m; ii++) {
3695: baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3696: if (PetscUnlikelyDebug(i[ii+1] - i[ii] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3697: }
3698: if (PetscDefined(USE_DEBUG)) {
3699: for (ii=0; ii<baij->i[m]; ii++) {
3700: if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3701: if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3702: }
3703: }
3705: MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
3706: MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
3707: return(0);
3708: }
3710: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3711: {
3713: PetscMPIInt size;
3716: MPI_Comm_size(comm,&size);
3717: if (size == 1 && scall == MAT_REUSE_MATRIX) {
3718: MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);
3719: } else {
3720: MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);
3721: }
3722: return(0);
3723: }