Actual source code: baij.c
2: /*
3: Defines the basic matrix operations for the BAIJ (compressed row)
4: matrix storage format.
5: */
6: #include <../src/mat/impls/baij/seq/baij.h>
7: #include <petscblaslapack.h>
8: #include <petsc/private/kernels/blockinvert.h>
9: #include <petsc/private/kernels/blockmatmult.h>
11: #if defined(PETSC_HAVE_HYPRE)
12: PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
13: #endif
15: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
16: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*);
17: #endif
18: PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
20: PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A,PetscInt type,PetscReal *reductions)
21: {
22: Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) A->data;
23: PetscInt m,n,i;
24: PetscInt ib,jb,bs = A->rmap->bs;
25: MatScalar *a_val = a_aij->a;
27: MatGetSize(A,&m,&n);
28: for (i=0; i<n; i++) reductions[i] = 0.0;
29: if (type == NORM_2) {
30: for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
31: for (jb=0; jb<bs; jb++) {
32: for (ib=0; ib<bs; ib++) {
33: reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
34: a_val++;
35: }
36: }
37: }
38: } else if (type == NORM_1) {
39: for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
40: for (jb=0; jb<bs; jb++) {
41: for (ib=0; ib<bs; ib++) {
42: reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
43: a_val++;
44: }
45: }
46: }
47: } else if (type == NORM_INFINITY) {
48: for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
49: for (jb=0; jb<bs; jb++) {
50: for (ib=0; ib<bs; ib++) {
51: int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
52: reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]);
53: a_val++;
54: }
55: }
56: }
57: } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
58: for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
59: for (jb=0; jb<bs; jb++) {
60: for (ib=0; ib<bs; ib++) {
61: reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
62: a_val++;
63: }
64: }
65: }
66: } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
67: for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
68: for (jb=0; jb<bs; jb++) {
69: for (ib=0; ib<bs; ib++) {
70: reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
71: a_val++;
72: }
73: }
74: }
75: } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type");
76: if (type == NORM_2) {
77: for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
78: } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
79: for (i=0; i<n; i++) reductions[i] /= m;
80: }
81: return 0;
82: }
84: PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
85: {
86: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data;
87: PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
88: MatScalar *v = a->a,*odiag,*diag,work[25],*v_work;
89: PetscReal shift = 0.0;
90: PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE;
92: allowzeropivot = PetscNot(A->erroriffailure);
94: if (a->idiagvalid) {
95: if (values) *values = a->idiag;
96: return 0;
97: }
98: MatMarkDiagonal_SeqBAIJ(A);
99: diag_offset = a->diag;
100: if (!a->idiag) {
101: PetscMalloc1(bs2*mbs,&a->idiag);
102: PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));
103: }
104: diag = a->idiag;
105: if (values) *values = a->idiag;
106: /* factor and invert each block */
107: switch (bs) {
108: case 1:
109: for (i=0; i<mbs; i++) {
110: odiag = v + 1*diag_offset[i];
111: diag[0] = odiag[0];
113: if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
114: if (allowzeropivot) {
115: A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
116: A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
117: A->factorerror_zeropivot_row = i;
118: PetscInfo(A,"Zero pivot, row %" PetscInt_FMT "\n",i);
119: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
120: }
122: diag[0] = (PetscScalar)1.0 / (diag[0] + shift);
123: diag += 1;
124: }
125: break;
126: case 2:
127: for (i=0; i<mbs; i++) {
128: odiag = v + 4*diag_offset[i];
129: diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
130: PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);
131: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
132: diag += 4;
133: }
134: break;
135: case 3:
136: for (i=0; i<mbs; i++) {
137: odiag = v + 9*diag_offset[i];
138: diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
139: diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
140: diag[8] = odiag[8];
141: PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);
142: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
143: diag += 9;
144: }
145: break;
146: case 4:
147: for (i=0; i<mbs; i++) {
148: odiag = v + 16*diag_offset[i];
149: PetscArraycpy(diag,odiag,16);
150: PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);
151: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
152: diag += 16;
153: }
154: break;
155: case 5:
156: for (i=0; i<mbs; i++) {
157: odiag = v + 25*diag_offset[i];
158: PetscArraycpy(diag,odiag,25);
159: PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);
160: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
161: diag += 25;
162: }
163: break;
164: case 6:
165: for (i=0; i<mbs; i++) {
166: odiag = v + 36*diag_offset[i];
167: PetscArraycpy(diag,odiag,36);
168: PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);
169: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
170: diag += 36;
171: }
172: break;
173: case 7:
174: for (i=0; i<mbs; i++) {
175: odiag = v + 49*diag_offset[i];
176: PetscArraycpy(diag,odiag,49);
177: PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);
178: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
179: diag += 49;
180: }
181: break;
182: default:
183: PetscMalloc2(bs,&v_work,bs,&v_pivots);
184: for (i=0; i<mbs; i++) {
185: odiag = v + bs2*diag_offset[i];
186: PetscArraycpy(diag,odiag,bs2);
187: PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);
188: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
189: diag += bs2;
190: }
191: PetscFree2(v_work,v_pivots);
192: }
193: a->idiagvalid = PETSC_TRUE;
194: return 0;
195: }
197: PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
198: {
199: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
200: PetscScalar *x,*work,*w,*workt,*t;
201: const MatScalar *v,*aa = a->a, *idiag;
202: const PetscScalar *b,*xb;
203: PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
204: PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
205: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
207: its = its*lits;
214: if (!a->idiagvalid) MatInvertBlockDiagonal(A,NULL);
216: if (!m) return 0;
217: diag = a->diag;
218: idiag = a->idiag;
219: k = PetscMax(A->rmap->n,A->cmap->n);
220: if (!a->mult_work) {
221: PetscMalloc1(k+1,&a->mult_work);
222: }
223: if (!a->sor_workt) {
224: PetscMalloc1(k,&a->sor_workt);
225: }
226: if (!a->sor_work) {
227: PetscMalloc1(bs,&a->sor_work);
228: }
229: work = a->mult_work;
230: t = a->sor_workt;
231: w = a->sor_work;
233: VecGetArray(xx,&x);
234: VecGetArrayRead(bb,&b);
236: if (flag & SOR_ZERO_INITIAL_GUESS) {
237: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
238: switch (bs) {
239: case 1:
240: PetscKernel_v_gets_A_times_w_1(x,idiag,b);
241: t[0] = b[0];
242: i2 = 1;
243: idiag += 1;
244: for (i=1; i<m; i++) {
245: v = aa + ai[i];
246: vi = aj + ai[i];
247: nz = diag[i] - ai[i];
248: s[0] = b[i2];
249: for (j=0; j<nz; j++) {
250: xw[0] = x[vi[j]];
251: PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
252: }
253: t[i2] = s[0];
254: PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
255: x[i2] = xw[0];
256: idiag += 1;
257: i2 += 1;
258: }
259: break;
260: case 2:
261: PetscKernel_v_gets_A_times_w_2(x,idiag,b);
262: t[0] = b[0]; t[1] = b[1];
263: i2 = 2;
264: idiag += 4;
265: for (i=1; i<m; i++) {
266: v = aa + 4*ai[i];
267: vi = aj + ai[i];
268: nz = diag[i] - ai[i];
269: s[0] = b[i2]; s[1] = b[i2+1];
270: for (j=0; j<nz; j++) {
271: idx = 2*vi[j];
272: it = 4*j;
273: xw[0] = x[idx]; xw[1] = x[1+idx];
274: PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
275: }
276: t[i2] = s[0]; t[i2+1] = s[1];
277: PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
278: x[i2] = xw[0]; x[i2+1] = xw[1];
279: idiag += 4;
280: i2 += 2;
281: }
282: break;
283: case 3:
284: PetscKernel_v_gets_A_times_w_3(x,idiag,b);
285: t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
286: i2 = 3;
287: idiag += 9;
288: for (i=1; i<m; i++) {
289: v = aa + 9*ai[i];
290: vi = aj + ai[i];
291: nz = diag[i] - ai[i];
292: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
293: while (nz--) {
294: idx = 3*(*vi++);
295: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
296: PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
297: v += 9;
298: }
299: t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
300: PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
301: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
302: idiag += 9;
303: i2 += 3;
304: }
305: break;
306: case 4:
307: PetscKernel_v_gets_A_times_w_4(x,idiag,b);
308: t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
309: i2 = 4;
310: idiag += 16;
311: for (i=1; i<m; i++) {
312: v = aa + 16*ai[i];
313: vi = aj + ai[i];
314: nz = diag[i] - ai[i];
315: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
316: while (nz--) {
317: idx = 4*(*vi++);
318: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
319: PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
320: v += 16;
321: }
322: t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
323: PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
324: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
325: idiag += 16;
326: i2 += 4;
327: }
328: break;
329: case 5:
330: PetscKernel_v_gets_A_times_w_5(x,idiag,b);
331: t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
332: i2 = 5;
333: idiag += 25;
334: for (i=1; i<m; i++) {
335: v = aa + 25*ai[i];
336: vi = aj + ai[i];
337: nz = diag[i] - ai[i];
338: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
339: while (nz--) {
340: idx = 5*(*vi++);
341: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
342: PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
343: v += 25;
344: }
345: t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
346: PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
347: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
348: idiag += 25;
349: i2 += 5;
350: }
351: break;
352: case 6:
353: PetscKernel_v_gets_A_times_w_6(x,idiag,b);
354: t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
355: i2 = 6;
356: idiag += 36;
357: for (i=1; i<m; i++) {
358: v = aa + 36*ai[i];
359: vi = aj + ai[i];
360: nz = diag[i] - ai[i];
361: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
362: while (nz--) {
363: idx = 6*(*vi++);
364: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
365: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
366: PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
367: v += 36;
368: }
369: t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
370: t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
371: PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
372: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
373: idiag += 36;
374: i2 += 6;
375: }
376: break;
377: case 7:
378: PetscKernel_v_gets_A_times_w_7(x,idiag,b);
379: t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
380: t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
381: i2 = 7;
382: idiag += 49;
383: for (i=1; i<m; i++) {
384: v = aa + 49*ai[i];
385: vi = aj + ai[i];
386: nz = diag[i] - ai[i];
387: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
388: s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
389: while (nz--) {
390: idx = 7*(*vi++);
391: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
392: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
393: PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
394: v += 49;
395: }
396: t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
397: t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
398: PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
399: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
400: x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
401: idiag += 49;
402: i2 += 7;
403: }
404: break;
405: default:
406: PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
407: PetscArraycpy(t,b,bs);
408: i2 = bs;
409: idiag += bs2;
410: for (i=1; i<m; i++) {
411: v = aa + bs2*ai[i];
412: vi = aj + ai[i];
413: nz = diag[i] - ai[i];
415: PetscArraycpy(w,b+i2,bs);
416: /* copy all rows of x that are needed into contiguous space */
417: workt = work;
418: for (j=0; j<nz; j++) {
419: PetscArraycpy(workt,x + bs*(*vi++),bs);
420: workt += bs;
421: }
422: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
423: PetscArraycpy(t+i2,w,bs);
424: PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
426: idiag += bs2;
427: i2 += bs;
428: }
429: break;
430: }
431: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
432: PetscLogFlops(1.0*bs2*a->nz);
433: xb = t;
434: }
435: else xb = b;
436: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
437: idiag = a->idiag+bs2*(a->mbs-1);
438: i2 = bs * (m-1);
439: switch (bs) {
440: case 1:
441: s[0] = xb[i2];
442: PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
443: x[i2] = xw[0];
444: i2 -= 1;
445: for (i=m-2; i>=0; i--) {
446: v = aa + (diag[i]+1);
447: vi = aj + diag[i] + 1;
448: nz = ai[i+1] - diag[i] - 1;
449: s[0] = xb[i2];
450: for (j=0; j<nz; j++) {
451: xw[0] = x[vi[j]];
452: PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
453: }
454: PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
455: x[i2] = xw[0];
456: idiag -= 1;
457: i2 -= 1;
458: }
459: break;
460: case 2:
461: s[0] = xb[i2]; s[1] = xb[i2+1];
462: PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
463: x[i2] = xw[0]; x[i2+1] = xw[1];
464: i2 -= 2;
465: idiag -= 4;
466: for (i=m-2; i>=0; i--) {
467: v = aa + 4*(diag[i] + 1);
468: vi = aj + diag[i] + 1;
469: nz = ai[i+1] - diag[i] - 1;
470: s[0] = xb[i2]; s[1] = xb[i2+1];
471: for (j=0; j<nz; j++) {
472: idx = 2*vi[j];
473: it = 4*j;
474: xw[0] = x[idx]; xw[1] = x[1+idx];
475: PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
476: }
477: PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
478: x[i2] = xw[0]; x[i2+1] = xw[1];
479: idiag -= 4;
480: i2 -= 2;
481: }
482: break;
483: case 3:
484: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
485: PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
486: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
487: i2 -= 3;
488: idiag -= 9;
489: for (i=m-2; i>=0; i--) {
490: v = aa + 9*(diag[i]+1);
491: vi = aj + diag[i] + 1;
492: nz = ai[i+1] - diag[i] - 1;
493: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
494: while (nz--) {
495: idx = 3*(*vi++);
496: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
497: PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
498: v += 9;
499: }
500: PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
501: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
502: idiag -= 9;
503: i2 -= 3;
504: }
505: break;
506: case 4:
507: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
508: PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
509: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
510: i2 -= 4;
511: idiag -= 16;
512: for (i=m-2; i>=0; i--) {
513: v = aa + 16*(diag[i]+1);
514: vi = aj + diag[i] + 1;
515: nz = ai[i+1] - diag[i] - 1;
516: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
517: while (nz--) {
518: idx = 4*(*vi++);
519: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
520: PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
521: v += 16;
522: }
523: PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
524: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
525: idiag -= 16;
526: i2 -= 4;
527: }
528: break;
529: case 5:
530: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
531: PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
532: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
533: i2 -= 5;
534: idiag -= 25;
535: for (i=m-2; i>=0; i--) {
536: v = aa + 25*(diag[i]+1);
537: vi = aj + diag[i] + 1;
538: nz = ai[i+1] - diag[i] - 1;
539: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
540: while (nz--) {
541: idx = 5*(*vi++);
542: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
543: PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
544: v += 25;
545: }
546: PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
547: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
548: idiag -= 25;
549: i2 -= 5;
550: }
551: break;
552: case 6:
553: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
554: PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
555: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
556: i2 -= 6;
557: idiag -= 36;
558: for (i=m-2; i>=0; i--) {
559: v = aa + 36*(diag[i]+1);
560: vi = aj + diag[i] + 1;
561: nz = ai[i+1] - diag[i] - 1;
562: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
563: while (nz--) {
564: idx = 6*(*vi++);
565: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
566: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
567: PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
568: v += 36;
569: }
570: PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
571: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
572: idiag -= 36;
573: i2 -= 6;
574: }
575: break;
576: case 7:
577: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
578: s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
579: PetscKernel_v_gets_A_times_w_7(x,idiag,b);
580: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
581: x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
582: i2 -= 7;
583: idiag -= 49;
584: for (i=m-2; i>=0; i--) {
585: v = aa + 49*(diag[i]+1);
586: vi = aj + diag[i] + 1;
587: nz = ai[i+1] - diag[i] - 1;
588: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
589: s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
590: while (nz--) {
591: idx = 7*(*vi++);
592: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
593: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
594: PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
595: v += 49;
596: }
597: PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
598: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
599: x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
600: idiag -= 49;
601: i2 -= 7;
602: }
603: break;
604: default:
605: PetscArraycpy(w,xb+i2,bs);
606: PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
607: i2 -= bs;
608: idiag -= bs2;
609: for (i=m-2; i>=0; i--) {
610: v = aa + bs2*(diag[i]+1);
611: vi = aj + diag[i] + 1;
612: nz = ai[i+1] - diag[i] - 1;
614: PetscArraycpy(w,xb+i2,bs);
615: /* copy all rows of x that are needed into contiguous space */
616: workt = work;
617: for (j=0; j<nz; j++) {
618: PetscArraycpy(workt,x + bs*(*vi++),bs);
619: workt += bs;
620: }
621: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
622: PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
624: idiag -= bs2;
625: i2 -= bs;
626: }
627: break;
628: }
629: PetscLogFlops(1.0*bs2*(a->nz));
630: }
631: its--;
632: }
633: while (its--) {
634: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
635: idiag = a->idiag;
636: i2 = 0;
637: switch (bs) {
638: case 1:
639: for (i=0; i<m; i++) {
640: v = aa + ai[i];
641: vi = aj + ai[i];
642: nz = ai[i+1] - ai[i];
643: s[0] = b[i2];
644: for (j=0; j<nz; j++) {
645: xw[0] = x[vi[j]];
646: PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
647: }
648: PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
649: x[i2] += xw[0];
650: idiag += 1;
651: i2 += 1;
652: }
653: break;
654: case 2:
655: for (i=0; i<m; i++) {
656: v = aa + 4*ai[i];
657: vi = aj + ai[i];
658: nz = ai[i+1] - ai[i];
659: s[0] = b[i2]; s[1] = b[i2+1];
660: for (j=0; j<nz; j++) {
661: idx = 2*vi[j];
662: it = 4*j;
663: xw[0] = x[idx]; xw[1] = x[1+idx];
664: PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
665: }
666: PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
667: x[i2] += xw[0]; x[i2+1] += xw[1];
668: idiag += 4;
669: i2 += 2;
670: }
671: break;
672: case 3:
673: for (i=0; i<m; i++) {
674: v = aa + 9*ai[i];
675: vi = aj + ai[i];
676: nz = ai[i+1] - ai[i];
677: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
678: while (nz--) {
679: idx = 3*(*vi++);
680: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
681: PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
682: v += 9;
683: }
684: PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
685: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
686: idiag += 9;
687: i2 += 3;
688: }
689: break;
690: case 4:
691: for (i=0; i<m; i++) {
692: v = aa + 16*ai[i];
693: vi = aj + ai[i];
694: nz = ai[i+1] - ai[i];
695: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
696: while (nz--) {
697: idx = 4*(*vi++);
698: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
699: PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
700: v += 16;
701: }
702: PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
703: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
704: idiag += 16;
705: i2 += 4;
706: }
707: break;
708: case 5:
709: for (i=0; i<m; i++) {
710: v = aa + 25*ai[i];
711: vi = aj + ai[i];
712: nz = ai[i+1] - ai[i];
713: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
714: while (nz--) {
715: idx = 5*(*vi++);
716: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
717: PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
718: v += 25;
719: }
720: PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
721: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
722: idiag += 25;
723: i2 += 5;
724: }
725: break;
726: case 6:
727: for (i=0; i<m; i++) {
728: v = aa + 36*ai[i];
729: vi = aj + ai[i];
730: nz = ai[i+1] - ai[i];
731: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
732: while (nz--) {
733: idx = 6*(*vi++);
734: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
735: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
736: PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
737: v += 36;
738: }
739: PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
740: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
741: x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
742: idiag += 36;
743: i2 += 6;
744: }
745: break;
746: case 7:
747: for (i=0; i<m; i++) {
748: v = aa + 49*ai[i];
749: vi = aj + ai[i];
750: nz = ai[i+1] - ai[i];
751: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
752: s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
753: while (nz--) {
754: idx = 7*(*vi++);
755: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
756: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
757: PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
758: v += 49;
759: }
760: PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
761: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
762: x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
763: idiag += 49;
764: i2 += 7;
765: }
766: break;
767: default:
768: for (i=0; i<m; i++) {
769: v = aa + bs2*ai[i];
770: vi = aj + ai[i];
771: nz = ai[i+1] - ai[i];
773: PetscArraycpy(w,b+i2,bs);
774: /* copy all rows of x that are needed into contiguous space */
775: workt = work;
776: for (j=0; j<nz; j++) {
777: PetscArraycpy(workt,x + bs*(*vi++),bs);
778: workt += bs;
779: }
780: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
781: PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
783: idiag += bs2;
784: i2 += bs;
785: }
786: break;
787: }
788: PetscLogFlops(2.0*bs2*a->nz);
789: }
790: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
791: idiag = a->idiag+bs2*(a->mbs-1);
792: i2 = bs * (m-1);
793: switch (bs) {
794: case 1:
795: for (i=m-1; i>=0; i--) {
796: v = aa + ai[i];
797: vi = aj + ai[i];
798: nz = ai[i+1] - ai[i];
799: s[0] = b[i2];
800: for (j=0; j<nz; j++) {
801: xw[0] = x[vi[j]];
802: PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
803: }
804: PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
805: x[i2] += xw[0];
806: idiag -= 1;
807: i2 -= 1;
808: }
809: break;
810: case 2:
811: for (i=m-1; i>=0; i--) {
812: v = aa + 4*ai[i];
813: vi = aj + ai[i];
814: nz = ai[i+1] - ai[i];
815: s[0] = b[i2]; s[1] = b[i2+1];
816: for (j=0; j<nz; j++) {
817: idx = 2*vi[j];
818: it = 4*j;
819: xw[0] = x[idx]; xw[1] = x[1+idx];
820: PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
821: }
822: PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
823: x[i2] += xw[0]; x[i2+1] += xw[1];
824: idiag -= 4;
825: i2 -= 2;
826: }
827: break;
828: case 3:
829: for (i=m-1; i>=0; i--) {
830: v = aa + 9*ai[i];
831: vi = aj + ai[i];
832: nz = ai[i+1] - ai[i];
833: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
834: while (nz--) {
835: idx = 3*(*vi++);
836: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
837: PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
838: v += 9;
839: }
840: PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
841: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
842: idiag -= 9;
843: i2 -= 3;
844: }
845: break;
846: case 4:
847: for (i=m-1; i>=0; i--) {
848: v = aa + 16*ai[i];
849: vi = aj + ai[i];
850: nz = ai[i+1] - ai[i];
851: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
852: while (nz--) {
853: idx = 4*(*vi++);
854: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
855: PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
856: v += 16;
857: }
858: PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
859: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
860: idiag -= 16;
861: i2 -= 4;
862: }
863: break;
864: case 5:
865: for (i=m-1; i>=0; i--) {
866: v = aa + 25*ai[i];
867: vi = aj + ai[i];
868: nz = ai[i+1] - ai[i];
869: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
870: while (nz--) {
871: idx = 5*(*vi++);
872: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
873: PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
874: v += 25;
875: }
876: PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
877: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
878: idiag -= 25;
879: i2 -= 5;
880: }
881: break;
882: case 6:
883: for (i=m-1; i>=0; i--) {
884: v = aa + 36*ai[i];
885: vi = aj + ai[i];
886: nz = ai[i+1] - ai[i];
887: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
888: while (nz--) {
889: idx = 6*(*vi++);
890: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
891: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
892: PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
893: v += 36;
894: }
895: PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
896: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
897: x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
898: idiag -= 36;
899: i2 -= 6;
900: }
901: break;
902: case 7:
903: for (i=m-1; i>=0; i--) {
904: v = aa + 49*ai[i];
905: vi = aj + ai[i];
906: nz = ai[i+1] - ai[i];
907: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
908: s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
909: while (nz--) {
910: idx = 7*(*vi++);
911: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
912: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
913: PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
914: v += 49;
915: }
916: PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
917: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
918: x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
919: idiag -= 49;
920: i2 -= 7;
921: }
922: break;
923: default:
924: for (i=m-1; i>=0; i--) {
925: v = aa + bs2*ai[i];
926: vi = aj + ai[i];
927: nz = ai[i+1] - ai[i];
929: PetscArraycpy(w,b+i2,bs);
930: /* copy all rows of x that are needed into contiguous space */
931: workt = work;
932: for (j=0; j<nz; j++) {
933: PetscArraycpy(workt,x + bs*(*vi++),bs);
934: workt += bs;
935: }
936: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
937: PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
939: idiag -= bs2;
940: i2 -= bs;
941: }
942: break;
943: }
944: PetscLogFlops(2.0*bs2*(a->nz));
945: }
946: }
947: VecRestoreArray(xx,&x);
948: VecRestoreArrayRead(bb,&b);
949: return 0;
950: }
952: /*
953: Special version for direct calls from Fortran (Used in PETSc-fun3d)
954: */
955: #if defined(PETSC_HAVE_FORTRAN_CAPS)
956: #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
957: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
958: #define matsetvaluesblocked4_ matsetvaluesblocked4
959: #endif
961: PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
962: {
963: Mat A = *AA;
964: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
965: PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
966: PetscInt *ai =a->i,*ailen=a->ilen;
967: PetscInt *aj =a->j,stepval,lastcol = -1;
968: const PetscScalar *value = v;
969: MatScalar *ap,*aa = a->a,*bap;
971: if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
972: stepval = (n-1)*4;
973: for (k=0; k<m; k++) { /* loop over added rows */
974: row = im[k];
975: rp = aj + ai[row];
976: ap = aa + 16*ai[row];
977: nrow = ailen[row];
978: low = 0;
979: high = nrow;
980: for (l=0; l<n; l++) { /* loop over added columns */
981: col = in[l];
982: if (col <= lastcol) low = 0;
983: else high = nrow;
984: lastcol = col;
985: value = v + k*(stepval+4 + l)*4;
986: while (high-low > 7) {
987: t = (low+high)/2;
988: if (rp[t] > col) high = t;
989: else low = t;
990: }
991: for (i=low; i<high; i++) {
992: if (rp[i] > col) break;
993: if (rp[i] == col) {
994: bap = ap + 16*i;
995: for (ii=0; ii<4; ii++,value+=stepval) {
996: for (jj=ii; jj<16; jj+=4) {
997: bap[jj] += *value++;
998: }
999: }
1000: goto noinsert2;
1001: }
1002: }
1003: N = nrow++ - 1;
1004: high++; /* added new column index thus must search to one higher than before */
1005: /* shift up all the later entries in this row */
1006: for (ii=N; ii>=i; ii--) {
1007: rp[ii+1] = rp[ii];
1008: PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16);
1009: }
1010: if (N >= i) {
1011: PetscArrayzero(ap+16*i,16);
1012: }
1013: rp[i] = col;
1014: bap = ap + 16*i;
1015: for (ii=0; ii<4; ii++,value+=stepval) {
1016: for (jj=ii; jj<16; jj+=4) {
1017: bap[jj] = *value++;
1018: }
1019: }
1020: noinsert2:;
1021: low = i;
1022: }
1023: ailen[row] = nrow;
1024: }
1025: return;
1026: }
1028: #if defined(PETSC_HAVE_FORTRAN_CAPS)
1029: #define matsetvalues4_ MATSETVALUES4
1030: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1031: #define matsetvalues4_ matsetvalues4
1032: #endif
1034: PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1035: {
1036: Mat A = *AA;
1037: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1038: PetscInt *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm;
1039: PetscInt *ai=a->i,*ailen=a->ilen;
1040: PetscInt *aj=a->j,brow,bcol;
1041: PetscInt ridx,cidx,lastcol = -1;
1042: MatScalar *ap,value,*aa=a->a,*bap;
1044: for (k=0; k<m; k++) { /* loop over added rows */
1045: row = im[k]; brow = row/4;
1046: rp = aj + ai[brow];
1047: ap = aa + 16*ai[brow];
1048: nrow = ailen[brow];
1049: low = 0;
1050: high = nrow;
1051: for (l=0; l<n; l++) { /* loop over added columns */
1052: col = in[l]; bcol = col/4;
1053: ridx = row % 4; cidx = col % 4;
1054: value = v[l + k*n];
1055: if (col <= lastcol) low = 0;
1056: else high = nrow;
1057: lastcol = col;
1058: while (high-low > 7) {
1059: t = (low+high)/2;
1060: if (rp[t] > bcol) high = t;
1061: else low = t;
1062: }
1063: for (i=low; i<high; i++) {
1064: if (rp[i] > bcol) break;
1065: if (rp[i] == bcol) {
1066: bap = ap + 16*i + 4*cidx + ridx;
1067: *bap += value;
1068: goto noinsert1;
1069: }
1070: }
1071: N = nrow++ - 1;
1072: high++; /* added new column thus must search to one higher than before */
1073: /* shift up all the later entries in this row */
1074: PetscArraymove(rp+i+1,rp+i,N-i+1);
1075: PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1));
1076: PetscArrayzero(ap+16*i,16);
1077: rp[i] = bcol;
1078: ap[16*i + 4*cidx + ridx] = value;
1079: noinsert1:;
1080: low = i;
1081: }
1082: ailen[brow] = nrow;
1083: }
1084: return;
1085: }
1087: /*
1088: Checks for missing diagonals
1089: */
1090: PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d)
1091: {
1092: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1093: PetscInt *diag,*ii = a->i,i;
1095: MatMarkDiagonal_SeqBAIJ(A);
1096: *missing = PETSC_FALSE;
1097: if (A->rmap->n > 0 && !ii) {
1098: *missing = PETSC_TRUE;
1099: if (d) *d = 0;
1100: PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");
1101: } else {
1102: PetscInt n;
1103: n = PetscMin(a->mbs, a->nbs);
1104: diag = a->diag;
1105: for (i=0; i<n; i++) {
1106: if (diag[i] >= ii[i+1]) {
1107: *missing = PETSC_TRUE;
1108: if (d) *d = i;
1109: PetscInfo(A,"Matrix is missing block diagonal number %" PetscInt_FMT "\n",i);
1110: break;
1111: }
1112: }
1113: }
1114: return 0;
1115: }
1117: PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1118: {
1119: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1120: PetscInt i,j,m = a->mbs;
1122: if (!a->diag) {
1123: PetscMalloc1(m,&a->diag);
1124: PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));
1125: a->free_diag = PETSC_TRUE;
1126: }
1127: for (i=0; i<m; i++) {
1128: a->diag[i] = a->i[i+1];
1129: for (j=a->i[i]; j<a->i[i+1]; j++) {
1130: if (a->j[j] == i) {
1131: a->diag[i] = j;
1132: break;
1133: }
1134: }
1135: }
1136: return 0;
1137: }
1139: static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done)
1140: {
1141: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1142: PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1143: PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1145: *nn = n;
1146: if (!ia) return 0;
1147: if (symmetric) {
1148: MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);
1149: nz = tia[n];
1150: } else {
1151: tia = a->i; tja = a->j;
1152: }
1154: if (!blockcompressed && bs > 1) {
1155: (*nn) *= bs;
1156: /* malloc & create the natural set of indices */
1157: PetscMalloc1((n+1)*bs,ia);
1158: if (n) {
1159: (*ia)[0] = oshift;
1160: for (j=1; j<bs; j++) {
1161: (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1162: }
1163: }
1165: for (i=1; i<n; i++) {
1166: (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1167: for (j=1; j<bs; j++) {
1168: (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1169: }
1170: }
1171: if (n) {
1172: (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1173: }
1175: if (inja) {
1176: PetscMalloc1(nz*bs*bs,ja);
1177: cnt = 0;
1178: for (i=0; i<n; i++) {
1179: for (j=0; j<bs; j++) {
1180: for (k=tia[i]; k<tia[i+1]; k++) {
1181: for (l=0; l<bs; l++) {
1182: (*ja)[cnt++] = bs*tja[k] + l;
1183: }
1184: }
1185: }
1186: }
1187: }
1189: if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1190: PetscFree(tia);
1191: PetscFree(tja);
1192: }
1193: } else if (oshift == 1) {
1194: if (symmetric) {
1195: nz = tia[A->rmap->n/bs];
1196: /* add 1 to i and j indices */
1197: for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1198: *ia = tia;
1199: if (ja) {
1200: for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1201: *ja = tja;
1202: }
1203: } else {
1204: nz = a->i[A->rmap->n/bs];
1205: /* malloc space and add 1 to i and j indices */
1206: PetscMalloc1(A->rmap->n/bs+1,ia);
1207: for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1208: if (ja) {
1209: PetscMalloc1(nz,ja);
1210: for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1211: }
1212: }
1213: } else {
1214: *ia = tia;
1215: if (ja) *ja = tja;
1216: }
1217: return 0;
1218: }
1220: static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done)
1221: {
1222: if (!ia) return 0;
1223: if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1224: PetscFree(*ia);
1225: if (ja) PetscFree(*ja);
1226: }
1227: return 0;
1228: }
1230: PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1231: {
1232: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1234: #if defined(PETSC_USE_LOG)
1235: PetscLogObjectState((PetscObject)A,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT,A->rmap->N,A->cmap->n,a->nz);
1236: #endif
1237: MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);
1238: ISDestroy(&a->row);
1239: ISDestroy(&a->col);
1240: if (a->free_diag) PetscFree(a->diag);
1241: PetscFree(a->idiag);
1242: if (a->free_imax_ilen) PetscFree2(a->imax,a->ilen);
1243: PetscFree(a->solve_work);
1244: PetscFree(a->mult_work);
1245: PetscFree(a->sor_workt);
1246: PetscFree(a->sor_work);
1247: ISDestroy(&a->icol);
1248: PetscFree(a->saved_values);
1249: PetscFree2(a->compressedrow.i,a->compressedrow.rindex);
1251: MatDestroy(&a->sbaijMat);
1252: MatDestroy(&a->parent);
1253: PetscFree(A->data);
1255: PetscObjectChangeTypeName((PetscObject)A,NULL);
1256: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJGetArray_C",NULL);
1257: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJRestoreArray_C",NULL);
1258: PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);
1259: PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);
1260: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);
1261: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);
1262: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);
1263: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);
1264: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);
1265: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);
1266: PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);
1267: #if defined(PETSC_HAVE_HYPRE)
1268: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);
1269: #endif
1270: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);
1271: return 0;
1272: }
1274: PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1275: {
1276: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1278: switch (op) {
1279: case MAT_ROW_ORIENTED:
1280: a->roworiented = flg;
1281: break;
1282: case MAT_KEEP_NONZERO_PATTERN:
1283: a->keepnonzeropattern = flg;
1284: break;
1285: case MAT_NEW_NONZERO_LOCATIONS:
1286: a->nonew = (flg ? 0 : 1);
1287: break;
1288: case MAT_NEW_NONZERO_LOCATION_ERR:
1289: a->nonew = (flg ? -1 : 0);
1290: break;
1291: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1292: a->nonew = (flg ? -2 : 0);
1293: break;
1294: case MAT_UNUSED_NONZERO_LOCATION_ERR:
1295: a->nounused = (flg ? -1 : 0);
1296: break;
1297: case MAT_FORCE_DIAGONAL_ENTRIES:
1298: case MAT_IGNORE_OFF_PROC_ENTRIES:
1299: case MAT_USE_HASH_TABLE:
1300: case MAT_SORTED_FULL:
1301: PetscInfo(A,"Option %s ignored\n",MatOptions[op]);
1302: break;
1303: case MAT_SPD:
1304: case MAT_SYMMETRIC:
1305: case MAT_STRUCTURALLY_SYMMETRIC:
1306: case MAT_HERMITIAN:
1307: case MAT_SYMMETRY_ETERNAL:
1308: case MAT_SUBMAT_SINGLEIS:
1309: case MAT_STRUCTURE_ONLY:
1310: /* These options are handled directly by MatSetOption() */
1311: break;
1312: default:
1313: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1314: }
1315: return 0;
1316: }
1318: /* used for both SeqBAIJ and SeqSBAIJ matrices */
1319: PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1320: {
1321: PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1322: MatScalar *aa_i;
1323: PetscScalar *v_i;
1325: bs = A->rmap->bs;
1326: bs2 = bs*bs;
1329: bn = row/bs; /* Block number */
1330: bp = row % bs; /* Block Position */
1331: M = ai[bn+1] - ai[bn];
1332: *nz = bs*M;
1334: if (v) {
1335: *v = NULL;
1336: if (*nz) {
1337: PetscMalloc1(*nz,v);
1338: for (i=0; i<M; i++) { /* for each block in the block row */
1339: v_i = *v + i*bs;
1340: aa_i = aa + bs2*(ai[bn] + i);
1341: for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1342: }
1343: }
1344: }
1346: if (idx) {
1347: *idx = NULL;
1348: if (*nz) {
1349: PetscMalloc1(*nz,idx);
1350: for (i=0; i<M; i++) { /* for each block in the block row */
1351: idx_i = *idx + i*bs;
1352: itmp = bs*aj[ai[bn] + i];
1353: for (j=0; j<bs; j++) idx_i[j] = itmp++;
1354: }
1355: }
1356: }
1357: return 0;
1358: }
1360: PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1361: {
1362: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1364: MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);
1365: return 0;
1366: }
1368: PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1369: {
1370: if (nz) *nz = 0;
1371: if (idx) PetscFree(*idx);
1372: if (v) PetscFree(*v);
1373: return 0;
1374: }
1376: PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1377: {
1378: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data,*at;
1379: Mat C;
1380: PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill;
1381: PetscInt bs2=a->bs2,*ati,*atj,anzj,kr;
1382: MatScalar *ata,*aa=a->a;
1384: PetscCalloc1(1+nbs,&atfill);
1385: if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1386: for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
1388: MatCreate(PetscObjectComm((PetscObject)A),&C);
1389: MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);
1390: MatSetType(C,((PetscObject)A)->type_name);
1391: MatSeqBAIJSetPreallocation(C,bs,0,atfill);
1393: at = (Mat_SeqBAIJ*)C->data;
1394: ati = at->i;
1395: for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i];
1396: } else {
1397: C = *B;
1398: at = (Mat_SeqBAIJ*)C->data;
1399: ati = at->i;
1400: }
1402: atj = at->j;
1403: ata = at->a;
1405: /* Copy ati into atfill so we have locations of the next free space in atj */
1406: PetscArraycpy(atfill,ati,nbs);
1408: /* Walk through A row-wise and mark nonzero entries of A^T. */
1409: for (i=0; i<mbs; i++) {
1410: anzj = ai[i+1] - ai[i];
1411: for (j=0; j<anzj; j++) {
1412: atj[atfill[*aj]] = i;
1413: for (kr=0; kr<bs; kr++) {
1414: for (k=0; k<bs; k++) {
1415: ata[bs2*atfill[*aj]+k*bs+kr] = *aa++;
1416: }
1417: }
1418: atfill[*aj++] += 1;
1419: }
1420: }
1421: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1422: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1424: /* Clean up temporary space and complete requests. */
1425: PetscFree(atfill);
1427: if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1428: MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));
1429: *B = C;
1430: } else {
1431: MatHeaderMerge(A,&C);
1432: }
1433: return 0;
1434: }
1436: PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f)
1437: {
1438: Mat Btrans;
1440: *f = PETSC_FALSE;
1441: MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);
1442: MatEqual_SeqBAIJ(B,Btrans,f);
1443: MatDestroy(&Btrans);
1444: return 0;
1445: }
1447: /* Used for both SeqBAIJ and SeqSBAIJ matrices */
1448: PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)
1449: {
1450: Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)mat->data;
1451: PetscInt header[4],M,N,m,bs,nz,cnt,i,j,k,l;
1452: PetscInt *rowlens,*colidxs;
1453: PetscScalar *matvals;
1455: PetscViewerSetUp(viewer);
1457: M = mat->rmap->N;
1458: N = mat->cmap->N;
1459: m = mat->rmap->n;
1460: bs = mat->rmap->bs;
1461: nz = bs*bs*A->nz;
1463: /* write matrix header */
1464: header[0] = MAT_FILE_CLASSID;
1465: header[1] = M; header[2] = N; header[3] = nz;
1466: PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);
1468: /* store row lengths */
1469: PetscMalloc1(m,&rowlens);
1470: for (cnt=0, i=0; i<A->mbs; i++)
1471: for (j=0; j<bs; j++)
1472: rowlens[cnt++] = bs*(A->i[i+1] - A->i[i]);
1473: PetscViewerBinaryWrite(viewer,rowlens,m,PETSC_INT);
1474: PetscFree(rowlens);
1476: /* store column indices */
1477: PetscMalloc1(nz,&colidxs);
1478: for (cnt=0, i=0; i<A->mbs; i++)
1479: for (k=0; k<bs; k++)
1480: for (j=A->i[i]; j<A->i[i+1]; j++)
1481: for (l=0; l<bs; l++)
1482: colidxs[cnt++] = bs*A->j[j] + l;
1484: PetscViewerBinaryWrite(viewer,colidxs,nz,PETSC_INT);
1485: PetscFree(colidxs);
1487: /* store nonzero values */
1488: PetscMalloc1(nz,&matvals);
1489: for (cnt=0, i=0; i<A->mbs; i++)
1490: for (k=0; k<bs; k++)
1491: for (j=A->i[i]; j<A->i[i+1]; j++)
1492: for (l=0; l<bs; l++)
1493: matvals[cnt++] = A->a[bs*(bs*j + l) + k];
1495: PetscViewerBinaryWrite(viewer,matvals,nz,PETSC_SCALAR);
1496: PetscFree(matvals);
1498: /* write block size option to the viewer's .info file */
1499: MatView_Binary_BlockSizes(mat,viewer);
1500: return 0;
1501: }
1503: static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)
1504: {
1505: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1506: PetscInt i,bs = A->rmap->bs,k;
1508: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1509: for (i=0; i<a->mbs; i++) {
1510: PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT "-%" PetscInt_FMT ":",i*bs,i*bs+bs-1);
1511: for (k=a->i[i]; k<a->i[i+1]; k++) {
1512: PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT "-%" PetscInt_FMT ") ",bs*a->j[k],bs*a->j[k]+bs-1);
1513: }
1514: PetscViewerASCIIPrintf(viewer,"\n");
1515: }
1516: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1517: return 0;
1518: }
1520: static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1521: {
1522: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1523: PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1524: PetscViewerFormat format;
1526: if (A->structure_only) {
1527: MatView_SeqBAIJ_ASCII_structonly(A,viewer);
1528: return 0;
1529: }
1531: PetscViewerGetFormat(viewer,&format);
1532: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1533: PetscViewerASCIIPrintf(viewer," block size is %" PetscInt_FMT "\n",bs);
1534: } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1535: const char *matname;
1536: Mat aij;
1537: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);
1538: PetscObjectGetName((PetscObject)A,&matname);
1539: PetscObjectSetName((PetscObject)aij,matname);
1540: MatView(aij,viewer);
1541: MatDestroy(&aij);
1542: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1543: return 0;
1544: } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1545: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1546: for (i=0; i<a->mbs; i++) {
1547: for (j=0; j<bs; j++) {
1548: PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT ":",i*bs+j);
1549: for (k=a->i[i]; k<a->i[i+1]; k++) {
1550: for (l=0; l<bs; l++) {
1551: #if defined(PETSC_USE_COMPLEX)
1552: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1553: PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g + %gi) ",bs*a->j[k]+l,
1554: (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j])));
1555: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1556: PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g - %gi) ",bs*a->j[k]+l,
1557: (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j])));
1558: } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1559: PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));
1560: }
1561: #else
1562: if (a->a[bs2*k + l*bs + j] != 0.0) {
1563: PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);
1564: }
1565: #endif
1566: }
1567: }
1568: PetscViewerASCIIPrintf(viewer,"\n");
1569: }
1570: }
1571: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1572: } else {
1573: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1574: for (i=0; i<a->mbs; i++) {
1575: for (j=0; j<bs; j++) {
1576: PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT ":",i*bs+j);
1577: for (k=a->i[i]; k<a->i[i+1]; k++) {
1578: for (l=0; l<bs; l++) {
1579: #if defined(PETSC_USE_COMPLEX)
1580: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1581: PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g + %g i) ",bs*a->j[k]+l,
1582: (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j])));
1583: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1584: PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g - %g i) ",bs*a->j[k]+l,
1585: (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j])));
1586: } else {
1587: PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));
1588: }
1589: #else
1590: PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);
1591: #endif
1592: }
1593: }
1594: PetscViewerASCIIPrintf(viewer,"\n");
1595: }
1596: }
1597: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1598: }
1599: PetscViewerFlush(viewer);
1600: return 0;
1601: }
1603: #include <petscdraw.h>
1604: static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1605: {
1606: Mat A = (Mat) Aa;
1607: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
1608: PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1609: PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1610: MatScalar *aa;
1611: PetscViewer viewer;
1612: PetscViewerFormat format;
1614: PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);
1615: PetscViewerGetFormat(viewer,&format);
1616: PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);
1618: /* loop over matrix elements drawing boxes */
1620: if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1622: PetscDrawCollectiveBegin(draw);
1623: /* Blue for negative, Cyan for zero and Red for positive */
1624: color = PETSC_DRAW_BLUE;
1625: for (i=0,row=0; i<mbs; i++,row+=bs) {
1626: for (j=a->i[i]; j<a->i[i+1]; j++) {
1627: y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1628: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1629: aa = a->a + j*bs2;
1630: for (k=0; k<bs; k++) {
1631: for (l=0; l<bs; l++) {
1632: if (PetscRealPart(*aa++) >= 0.) continue;
1633: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1634: }
1635: }
1636: }
1637: }
1638: color = PETSC_DRAW_CYAN;
1639: for (i=0,row=0; i<mbs; i++,row+=bs) {
1640: for (j=a->i[i]; j<a->i[i+1]; j++) {
1641: y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1642: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1643: aa = a->a + j*bs2;
1644: for (k=0; k<bs; k++) {
1645: for (l=0; l<bs; l++) {
1646: if (PetscRealPart(*aa++) != 0.) continue;
1647: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1648: }
1649: }
1650: }
1651: }
1652: color = PETSC_DRAW_RED;
1653: for (i=0,row=0; i<mbs; i++,row+=bs) {
1654: for (j=a->i[i]; j<a->i[i+1]; j++) {
1655: y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1656: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1657: aa = a->a + j*bs2;
1658: for (k=0; k<bs; k++) {
1659: for (l=0; l<bs; l++) {
1660: if (PetscRealPart(*aa++) <= 0.) continue;
1661: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1662: }
1663: }
1664: }
1665: }
1666: PetscDrawCollectiveEnd(draw);
1667: } else {
1668: /* use contour shading to indicate magnitude of values */
1669: /* first determine max of all nonzero values */
1670: PetscReal minv = 0.0, maxv = 0.0;
1671: PetscDraw popup;
1674: for (i=0; i<a->nz*a->bs2; i++) {
1675: if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1676: }
1677: if (minv >= maxv) maxv = minv + PETSC_SMALL;
1678: PetscDrawGetPopup(draw,&popup);
1679: PetscDrawScalePopup(popup,0.0,maxv);
1681: PetscDrawCollectiveBegin(draw);
1682: for (i=0,row=0; i<mbs; i++,row+=bs) {
1683: for (j=a->i[i]; j<a->i[i+1]; j++) {
1684: y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1685: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1686: aa = a->a + j*bs2;
1687: for (k=0; k<bs; k++) {
1688: for (l=0; l<bs; l++) {
1689: MatScalar v = *aa++;
1690: color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1691: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1692: }
1693: }
1694: }
1695: }
1696: PetscDrawCollectiveEnd(draw);
1697: }
1698: return 0;
1699: }
1701: static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1702: {
1703: PetscReal xl,yl,xr,yr,w,h;
1704: PetscDraw draw;
1705: PetscBool isnull;
1707: PetscViewerDrawGetDraw(viewer,0,&draw);
1708: PetscDrawIsNull(draw,&isnull);
1709: if (isnull) return 0;
1711: xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1712: xr += w; yr += h; xl = -w; yl = -h;
1713: PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
1714: PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
1715: PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
1716: PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);
1717: PetscDrawSave(draw);
1718: return 0;
1719: }
1721: PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1722: {
1723: PetscBool iascii,isbinary,isdraw;
1725: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1726: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1727: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1728: if (iascii) {
1729: MatView_SeqBAIJ_ASCII(A,viewer);
1730: } else if (isbinary) {
1731: MatView_SeqBAIJ_Binary(A,viewer);
1732: } else if (isdraw) {
1733: MatView_SeqBAIJ_Draw(A,viewer);
1734: } else {
1735: Mat B;
1736: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
1737: MatView(B,viewer);
1738: MatDestroy(&B);
1739: }
1740: return 0;
1741: }
1743: PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1744: {
1745: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1746: PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1747: PetscInt *ai = a->i,*ailen = a->ilen;
1748: PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1749: MatScalar *ap,*aa = a->a;
1751: for (k=0; k<m; k++) { /* loop over rows */
1752: row = im[k]; brow = row/bs;
1753: if (row < 0) {v += n; continue;} /* negative row */
1755: rp = aj + ai[brow]; ap = aa + bs2*ai[brow];
1756: nrow = ailen[brow];
1757: for (l=0; l<n; l++) { /* loop over columns */
1758: if (in[l] < 0) {v++; continue;} /* negative column */
1760: col = in[l];
1761: bcol = col/bs;
1762: cidx = col%bs;
1763: ridx = row%bs;
1764: high = nrow;
1765: low = 0; /* assume unsorted */
1766: while (high-low > 5) {
1767: t = (low+high)/2;
1768: if (rp[t] > bcol) high = t;
1769: else low = t;
1770: }
1771: for (i=low; i<high; i++) {
1772: if (rp[i] > bcol) break;
1773: if (rp[i] == bcol) {
1774: *v++ = ap[bs2*i+bs*cidx+ridx];
1775: goto finished;
1776: }
1777: }
1778: *v++ = 0.0;
1779: finished:;
1780: }
1781: }
1782: return 0;
1783: }
1785: PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1786: {
1787: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1788: PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1789: PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1790: PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1791: PetscBool roworiented=a->roworiented;
1792: const PetscScalar *value = v;
1793: MatScalar *ap=NULL,*aa = a->a,*bap;
1795: if (roworiented) {
1796: stepval = (n-1)*bs;
1797: } else {
1798: stepval = (m-1)*bs;
1799: }
1800: for (k=0; k<m; k++) { /* loop over added rows */
1801: row = im[k];
1802: if (row < 0) continue;
1804: rp = aj + ai[row];
1805: if (!A->structure_only) ap = aa + bs2*ai[row];
1806: rmax = imax[row];
1807: nrow = ailen[row];
1808: low = 0;
1809: high = nrow;
1810: for (l=0; l<n; l++) { /* loop over added columns */
1811: if (in[l] < 0) continue;
1813: col = in[l];
1814: if (!A->structure_only) {
1815: if (roworiented) {
1816: value = v + (k*(stepval+bs) + l)*bs;
1817: } else {
1818: value = v + (l*(stepval+bs) + k)*bs;
1819: }
1820: }
1821: if (col <= lastcol) low = 0;
1822: else high = nrow;
1823: lastcol = col;
1824: while (high-low > 7) {
1825: t = (low+high)/2;
1826: if (rp[t] > col) high = t;
1827: else low = t;
1828: }
1829: for (i=low; i<high; i++) {
1830: if (rp[i] > col) break;
1831: if (rp[i] == col) {
1832: if (A->structure_only) goto noinsert2;
1833: bap = ap + bs2*i;
1834: if (roworiented) {
1835: if (is == ADD_VALUES) {
1836: for (ii=0; ii<bs; ii++,value+=stepval) {
1837: for (jj=ii; jj<bs2; jj+=bs) {
1838: bap[jj] += *value++;
1839: }
1840: }
1841: } else {
1842: for (ii=0; ii<bs; ii++,value+=stepval) {
1843: for (jj=ii; jj<bs2; jj+=bs) {
1844: bap[jj] = *value++;
1845: }
1846: }
1847: }
1848: } else {
1849: if (is == ADD_VALUES) {
1850: for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1851: for (jj=0; jj<bs; jj++) {
1852: bap[jj] += value[jj];
1853: }
1854: bap += bs;
1855: }
1856: } else {
1857: for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1858: for (jj=0; jj<bs; jj++) {
1859: bap[jj] = value[jj];
1860: }
1861: bap += bs;
1862: }
1863: }
1864: }
1865: goto noinsert2;
1866: }
1867: }
1868: if (nonew == 1) goto noinsert2;
1870: if (A->structure_only) {
1871: MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar);
1872: } else {
1873: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1874: }
1875: N = nrow++ - 1; high++;
1876: /* shift up all the later entries in this row */
1877: PetscArraymove(rp+i+1,rp+i,N-i+1);
1878: rp[i] = col;
1879: if (!A->structure_only) {
1880: PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));
1881: bap = ap + bs2*i;
1882: if (roworiented) {
1883: for (ii=0; ii<bs; ii++,value+=stepval) {
1884: for (jj=ii; jj<bs2; jj+=bs) {
1885: bap[jj] = *value++;
1886: }
1887: }
1888: } else {
1889: for (ii=0; ii<bs; ii++,value+=stepval) {
1890: for (jj=0; jj<bs; jj++) {
1891: *bap++ = *value++;
1892: }
1893: }
1894: }
1895: }
1896: noinsert2:;
1897: low = i;
1898: }
1899: ailen[row] = nrow;
1900: }
1901: return 0;
1902: }
1904: PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1905: {
1906: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1907: PetscInt fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax;
1908: PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen;
1909: PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1910: MatScalar *aa = a->a,*ap;
1911: PetscReal ratio=0.6;
1913: if (mode == MAT_FLUSH_ASSEMBLY) return 0;
1915: if (m) rmax = ailen[0];
1916: for (i=1; i<mbs; i++) {
1917: /* move each row back by the amount of empty slots (fshift) before it*/
1918: fshift += imax[i-1] - ailen[i-1];
1919: rmax = PetscMax(rmax,ailen[i]);
1920: if (fshift) {
1921: ip = aj + ai[i];
1922: ap = aa + bs2*ai[i];
1923: N = ailen[i];
1924: PetscArraymove(ip-fshift,ip,N);
1925: if (!A->structure_only) {
1926: PetscArraymove(ap-bs2*fshift,ap,bs2*N);
1927: }
1928: }
1929: ai[i] = ai[i-1] + ailen[i-1];
1930: }
1931: if (mbs) {
1932: fshift += imax[mbs-1] - ailen[mbs-1];
1933: ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1934: }
1936: /* reset ilen and imax for each row */
1937: a->nonzerorowcnt = 0;
1938: if (A->structure_only) {
1939: PetscFree2(a->imax,a->ilen);
1940: } else { /* !A->structure_only */
1941: for (i=0; i<mbs; i++) {
1942: ailen[i] = imax[i] = ai[i+1] - ai[i];
1943: a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1944: }
1945: }
1946: a->nz = ai[mbs];
1948: /* diagonals may have moved, so kill the diagonal pointers */
1949: a->idiagvalid = PETSC_FALSE;
1950: if (fshift && a->diag) {
1951: PetscFree(a->diag);
1952: PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));
1953: a->diag = NULL;
1954: }
1956: PetscInfo(A,"Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);
1957: PetscInfo(A,"Number of mallocs during MatSetValues is %" PetscInt_FMT "\n",a->reallocs);
1958: PetscInfo(A,"Most nonzeros blocks in any row is %" PetscInt_FMT "\n",rmax);
1960: A->info.mallocs += a->reallocs;
1961: a->reallocs = 0;
1962: A->info.nz_unneeded = (PetscReal)fshift*bs2;
1963: a->rmax = rmax;
1965: if (!A->structure_only) {
1966: MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);
1967: }
1968: return 0;
1969: }
1971: /*
1972: This function returns an array of flags which indicate the locations of contiguous
1973: blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9]
1974: then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1975: Assume: sizes should be long enough to hold all the values.
1976: */
1977: static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1978: {
1979: PetscInt i,j,k,row;
1980: PetscBool flg;
1982: for (i=0,j=0; i<n; j++) {
1983: row = idx[i];
1984: if (row%bs!=0) { /* Not the beginning of a block */
1985: sizes[j] = 1;
1986: i++;
1987: } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1988: sizes[j] = 1; /* Also makes sure at least 'bs' values exist for next else */
1989: i++;
1990: } else { /* Beginning of the block, so check if the complete block exists */
1991: flg = PETSC_TRUE;
1992: for (k=1; k<bs; k++) {
1993: if (row+k != idx[i+k]) { /* break in the block */
1994: flg = PETSC_FALSE;
1995: break;
1996: }
1997: }
1998: if (flg) { /* No break in the bs */
1999: sizes[j] = bs;
2000: i += bs;
2001: } else {
2002: sizes[j] = 1;
2003: i++;
2004: }
2005: }
2006: }
2007: *bs_max = j;
2008: return 0;
2009: }
2011: PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2012: {
2013: Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
2014: PetscInt i,j,k,count,*rows;
2015: PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2016: PetscScalar zero = 0.0;
2017: MatScalar *aa;
2018: const PetscScalar *xx;
2019: PetscScalar *bb;
2021: /* fix right hand side if needed */
2022: if (x && b) {
2023: VecGetArrayRead(x,&xx);
2024: VecGetArray(b,&bb);
2025: for (i=0; i<is_n; i++) {
2026: bb[is_idx[i]] = diag*xx[is_idx[i]];
2027: }
2028: VecRestoreArrayRead(x,&xx);
2029: VecRestoreArray(b,&bb);
2030: }
2032: /* Make a copy of the IS and sort it */
2033: /* allocate memory for rows,sizes */
2034: PetscMalloc2(is_n,&rows,2*is_n,&sizes);
2036: /* copy IS values to rows, and sort them */
2037: for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2038: PetscSortInt(is_n,rows);
2040: if (baij->keepnonzeropattern) {
2041: for (i=0; i<is_n; i++) sizes[i] = 1;
2042: bs_max = is_n;
2043: } else {
2044: MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);
2045: A->nonzerostate++;
2046: }
2048: for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2049: row = rows[j];
2051: count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2052: aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2053: if (sizes[i] == bs && !baij->keepnonzeropattern) {
2054: if (diag != (PetscScalar)0.0) {
2055: if (baij->ilen[row/bs] > 0) {
2056: baij->ilen[row/bs] = 1;
2057: baij->j[baij->i[row/bs]] = row/bs;
2059: PetscArrayzero(aa,count*bs);
2060: }
2061: /* Now insert all the diagonal values for this bs */
2062: for (k=0; k<bs; k++) {
2063: (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);
2064: }
2065: } else { /* (diag == 0.0) */
2066: baij->ilen[row/bs] = 0;
2067: } /* end (diag == 0.0) */
2068: } else { /* (sizes[i] != bs) */
2069: PetscAssert(sizes[i] == 1,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2070: for (k=0; k<count; k++) {
2071: aa[0] = zero;
2072: aa += bs;
2073: }
2074: if (diag != (PetscScalar)0.0) {
2075: (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);
2076: }
2077: }
2078: }
2080: PetscFree2(rows,sizes);
2081: MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
2082: return 0;
2083: }
2085: PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2086: {
2087: Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
2088: PetscInt i,j,k,count;
2089: PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col;
2090: PetscScalar zero = 0.0;
2091: MatScalar *aa;
2092: const PetscScalar *xx;
2093: PetscScalar *bb;
2094: PetscBool *zeroed,vecs = PETSC_FALSE;
2096: /* fix right hand side if needed */
2097: if (x && b) {
2098: VecGetArrayRead(x,&xx);
2099: VecGetArray(b,&bb);
2100: vecs = PETSC_TRUE;
2101: }
2103: /* zero the columns */
2104: PetscCalloc1(A->rmap->n,&zeroed);
2105: for (i=0; i<is_n; i++) {
2107: zeroed[is_idx[i]] = PETSC_TRUE;
2108: }
2109: for (i=0; i<A->rmap->N; i++) {
2110: if (!zeroed[i]) {
2111: row = i/bs;
2112: for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2113: for (k=0; k<bs; k++) {
2114: col = bs*baij->j[j] + k;
2115: if (zeroed[col]) {
2116: aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2117: if (vecs) bb[i] -= aa[0]*xx[col];
2118: aa[0] = 0.0;
2119: }
2120: }
2121: }
2122: } else if (vecs) bb[i] = diag*xx[i];
2123: }
2124: PetscFree(zeroed);
2125: if (vecs) {
2126: VecRestoreArrayRead(x,&xx);
2127: VecRestoreArray(b,&bb);
2128: }
2130: /* zero the rows */
2131: for (i=0; i<is_n; i++) {
2132: row = is_idx[i];
2133: count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2134: aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2135: for (k=0; k<count; k++) {
2136: aa[0] = zero;
2137: aa += bs;
2138: }
2139: if (diag != (PetscScalar)0.0) {
2140: (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);
2141: }
2142: }
2143: MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
2144: return 0;
2145: }
2147: PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2148: {
2149: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2150: PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2151: PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2152: PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2153: PetscInt ridx,cidx,bs2=a->bs2;
2154: PetscBool roworiented=a->roworiented;
2155: MatScalar *ap=NULL,value=0.0,*aa=a->a,*bap;
2157: for (k=0; k<m; k++) { /* loop over added rows */
2158: row = im[k];
2159: brow = row/bs;
2160: if (row < 0) continue;
2162: rp = aj + ai[brow];
2163: if (!A->structure_only) ap = aa + bs2*ai[brow];
2164: rmax = imax[brow];
2165: nrow = ailen[brow];
2166: low = 0;
2167: high = nrow;
2168: for (l=0; l<n; l++) { /* loop over added columns */
2169: if (in[l] < 0) continue;
2171: col = in[l]; bcol = col/bs;
2172: ridx = row % bs; cidx = col % bs;
2173: if (!A->structure_only) {
2174: if (roworiented) {
2175: value = v[l + k*n];
2176: } else {
2177: value = v[k + l*m];
2178: }
2179: }
2180: if (col <= lastcol) low = 0; else high = nrow;
2181: lastcol = col;
2182: while (high-low > 7) {
2183: t = (low+high)/2;
2184: if (rp[t] > bcol) high = t;
2185: else low = t;
2186: }
2187: for (i=low; i<high; i++) {
2188: if (rp[i] > bcol) break;
2189: if (rp[i] == bcol) {
2190: bap = ap + bs2*i + bs*cidx + ridx;
2191: if (!A->structure_only) {
2192: if (is == ADD_VALUES) *bap += value;
2193: else *bap = value;
2194: }
2195: goto noinsert1;
2196: }
2197: }
2198: if (nonew == 1) goto noinsert1;
2200: if (A->structure_only) {
2201: MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar);
2202: } else {
2203: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2204: }
2205: N = nrow++ - 1; high++;
2206: /* shift up all the later entries in this row */
2207: PetscArraymove(rp+i+1,rp+i,N-i+1);
2208: rp[i] = bcol;
2209: if (!A->structure_only) {
2210: PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));
2211: PetscArrayzero(ap+bs2*i,bs2);
2212: ap[bs2*i + bs*cidx + ridx] = value;
2213: }
2214: a->nz++;
2215: A->nonzerostate++;
2216: noinsert1:;
2217: low = i;
2218: }
2219: ailen[brow] = nrow;
2220: }
2221: return 0;
2222: }
2224: PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2225: {
2226: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
2227: Mat outA;
2228: PetscBool row_identity,col_identity;
2231: ISIdentity(row,&row_identity);
2232: ISIdentity(col,&col_identity);
2235: outA = inA;
2236: inA->factortype = MAT_FACTOR_LU;
2237: PetscFree(inA->solvertype);
2238: PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);
2240: MatMarkDiagonal_SeqBAIJ(inA);
2242: PetscObjectReference((PetscObject)row);
2243: ISDestroy(&a->row);
2244: a->row = row;
2245: PetscObjectReference((PetscObject)col);
2246: ISDestroy(&a->col);
2247: a->col = col;
2249: /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2250: ISDestroy(&a->icol);
2251: ISInvertPermutation(col,PETSC_DECIDE,&a->icol);
2252: PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);
2254: MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));
2255: if (!a->solve_work) {
2256: PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);
2257: PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));
2258: }
2259: MatLUFactorNumeric(outA,inA,info);
2260: return 0;
2261: }
2263: PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2264: {
2265: Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2266: PetscInt i,nz,mbs;
2268: nz = baij->maxnz;
2269: mbs = baij->mbs;
2270: for (i=0; i<nz; i++) {
2271: baij->j[i] = indices[i];
2272: }
2273: baij->nz = nz;
2274: for (i=0; i<mbs; i++) {
2275: baij->ilen[i] = baij->imax[i];
2276: }
2277: return 0;
2278: }
2280: /*@
2281: MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2282: in the matrix.
2284: Input Parameters:
2285: + mat - the SeqBAIJ matrix
2286: - indices - the column indices
2288: Level: advanced
2290: Notes:
2291: This can be called if you have precomputed the nonzero structure of the
2292: matrix and want to provide it to the matrix object to improve the performance
2293: of the MatSetValues() operation.
2295: You MUST have set the correct numbers of nonzeros per row in the call to
2296: MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2298: MUST be called before any calls to MatSetValues();
2300: @*/
2301: PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2302: {
2305: PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));
2306: return 0;
2307: }
2309: PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2310: {
2311: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2312: PetscInt i,j,n,row,bs,*ai,*aj,mbs;
2313: PetscReal atmp;
2314: PetscScalar *x,zero = 0.0;
2315: MatScalar *aa;
2316: PetscInt ncols,brow,krow,kcol;
2318: /* why is this not a macro???????????????????????????????????????????????????????????????? */
2320: bs = A->rmap->bs;
2321: aa = a->a;
2322: ai = a->i;
2323: aj = a->j;
2324: mbs = a->mbs;
2326: VecSet(v,zero);
2327: VecGetArray(v,&x);
2328: VecGetLocalSize(v,&n);
2330: for (i=0; i<mbs; i++) {
2331: ncols = ai[1] - ai[0]; ai++;
2332: brow = bs*i;
2333: for (j=0; j<ncols; j++) {
2334: for (kcol=0; kcol<bs; kcol++) {
2335: for (krow=0; krow<bs; krow++) {
2336: atmp = PetscAbsScalar(*aa);aa++;
2337: row = brow + krow; /* row index */
2338: if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2339: }
2340: }
2341: aj++;
2342: }
2343: }
2344: VecRestoreArray(v,&x);
2345: return 0;
2346: }
2348: PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2349: {
2350: /* If the two matrices have the same copy implementation, use fast copy. */
2351: if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2352: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2353: Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2354: PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2358: PetscArraycpy(b->a,a->a,bs2*a->i[ambs]);
2359: PetscObjectStateIncrease((PetscObject)B);
2360: } else {
2361: MatCopy_Basic(A,B,str);
2362: }
2363: return 0;
2364: }
2366: PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2367: {
2368: MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL);
2369: return 0;
2370: }
2372: static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2373: {
2374: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2376: *array = a->a;
2377: return 0;
2378: }
2380: static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2381: {
2382: *array = NULL;
2383: return 0;
2384: }
2386: PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2387: {
2388: PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2389: Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data;
2390: Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data;
2392: /* Set the number of nonzeros in the new matrix */
2393: MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);
2394: return 0;
2395: }
2397: PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2398: {
2399: Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2400: PetscInt bs=Y->rmap->bs,bs2=bs*bs;
2401: PetscBLASInt one=1;
2403: if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) {
2404: PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE;
2405: if (e) {
2406: PetscArraycmp(x->i,y->i,x->mbs+1,&e);
2407: if (e) {
2408: PetscArraycmp(x->j,y->j,x->i[x->mbs],&e);
2409: if (e) str = SAME_NONZERO_PATTERN;
2410: }
2411: }
2413: }
2414: if (str == SAME_NONZERO_PATTERN) {
2415: PetscScalar alpha = a;
2416: PetscBLASInt bnz;
2417: PetscBLASIntCast(x->nz*bs2,&bnz);
2418: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2419: PetscObjectStateIncrease((PetscObject)Y);
2420: } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2421: MatAXPY_Basic(Y,a,X,str);
2422: } else {
2423: Mat B;
2424: PetscInt *nnz;
2426: PetscMalloc1(Y->rmap->N,&nnz);
2427: MatCreate(PetscObjectComm((PetscObject)Y),&B);
2428: PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);
2429: MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);
2430: MatSetBlockSizesFromMats(B,Y,Y);
2431: MatSetType(B,(MatType) ((PetscObject)Y)->type_name);
2432: MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);
2433: MatSeqBAIJSetPreallocation(B,bs,0,nnz);
2434: MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
2435: MatHeaderMerge(Y,&B);
2436: PetscFree(nnz);
2437: }
2438: return 0;
2439: }
2441: PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A)
2442: {
2443: #if defined(PETSC_USE_COMPLEX)
2444: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2445: PetscInt i,nz = a->bs2*a->i[a->mbs];
2446: MatScalar *aa = a->a;
2448: for (i=0; i<nz; i++) aa[i] = PetscConj(aa[i]);
2449: #else
2450: #endif
2451: return 0;
2452: }
2454: PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2455: {
2456: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2457: PetscInt i,nz = a->bs2*a->i[a->mbs];
2458: MatScalar *aa = a->a;
2460: for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2461: return 0;
2462: }
2464: PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2465: {
2466: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2467: PetscInt i,nz = a->bs2*a->i[a->mbs];
2468: MatScalar *aa = a->a;
2470: for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2471: return 0;
2472: }
2474: /*
2475: Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code
2476: */
2477: PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done)
2478: {
2479: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2480: PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2481: PetscInt nz = a->i[m],row,*jj,mr,col;
2483: *nn = n;
2484: if (!ia) return 0;
2486: else {
2487: PetscCalloc1(n,&collengths);
2488: PetscMalloc1(n+1,&cia);
2489: PetscMalloc1(nz,&cja);
2490: jj = a->j;
2491: for (i=0; i<nz; i++) {
2492: collengths[jj[i]]++;
2493: }
2494: cia[0] = oshift;
2495: for (i=0; i<n; i++) {
2496: cia[i+1] = cia[i] + collengths[i];
2497: }
2498: PetscArrayzero(collengths,n);
2499: jj = a->j;
2500: for (row=0; row<m; row++) {
2501: mr = a->i[row+1] - a->i[row];
2502: for (i=0; i<mr; i++) {
2503: col = *jj++;
2505: cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2506: }
2507: }
2508: PetscFree(collengths);
2509: *ia = cia; *ja = cja;
2510: }
2511: return 0;
2512: }
2514: PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done)
2515: {
2516: if (!ia) return 0;
2517: PetscFree(*ia);
2518: PetscFree(*ja);
2519: return 0;
2520: }
2522: /*
2523: MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2524: MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2525: spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2526: */
2527: PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done)
2528: {
2529: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2530: PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2531: PetscInt nz = a->i[m],row,*jj,mr,col;
2532: PetscInt *cspidx;
2534: *nn = n;
2535: if (!ia) return 0;
2537: PetscCalloc1(n,&collengths);
2538: PetscMalloc1(n+1,&cia);
2539: PetscMalloc1(nz,&cja);
2540: PetscMalloc1(nz,&cspidx);
2541: jj = a->j;
2542: for (i=0; i<nz; i++) {
2543: collengths[jj[i]]++;
2544: }
2545: cia[0] = oshift;
2546: for (i=0; i<n; i++) {
2547: cia[i+1] = cia[i] + collengths[i];
2548: }
2549: PetscArrayzero(collengths,n);
2550: jj = a->j;
2551: for (row=0; row<m; row++) {
2552: mr = a->i[row+1] - a->i[row];
2553: for (i=0; i<mr; i++) {
2554: col = *jj++;
2555: cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2556: cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2557: }
2558: }
2559: PetscFree(collengths);
2560: *ia = cia;
2561: *ja = cja;
2562: *spidx = cspidx;
2563: return 0;
2564: }
2566: PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done)
2567: {
2568: MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);
2569: PetscFree(*spidx);
2570: return 0;
2571: }
2573: PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2574: {
2575: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data;
2577: if (!Y->preallocated || !aij->nz) {
2578: MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);
2579: }
2580: MatShift_Basic(Y,a);
2581: return 0;
2582: }
2584: /* -------------------------------------------------------------------*/
2585: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2586: MatGetRow_SeqBAIJ,
2587: MatRestoreRow_SeqBAIJ,
2588: MatMult_SeqBAIJ_N,
2589: /* 4*/ MatMultAdd_SeqBAIJ_N,
2590: MatMultTranspose_SeqBAIJ,
2591: MatMultTransposeAdd_SeqBAIJ,
2592: NULL,
2593: NULL,
2594: NULL,
2595: /* 10*/ NULL,
2596: MatLUFactor_SeqBAIJ,
2597: NULL,
2598: NULL,
2599: MatTranspose_SeqBAIJ,
2600: /* 15*/ MatGetInfo_SeqBAIJ,
2601: MatEqual_SeqBAIJ,
2602: MatGetDiagonal_SeqBAIJ,
2603: MatDiagonalScale_SeqBAIJ,
2604: MatNorm_SeqBAIJ,
2605: /* 20*/ NULL,
2606: MatAssemblyEnd_SeqBAIJ,
2607: MatSetOption_SeqBAIJ,
2608: MatZeroEntries_SeqBAIJ,
2609: /* 24*/ MatZeroRows_SeqBAIJ,
2610: NULL,
2611: NULL,
2612: NULL,
2613: NULL,
2614: /* 29*/ MatSetUp_SeqBAIJ,
2615: NULL,
2616: NULL,
2617: NULL,
2618: NULL,
2619: /* 34*/ MatDuplicate_SeqBAIJ,
2620: NULL,
2621: NULL,
2622: MatILUFactor_SeqBAIJ,
2623: NULL,
2624: /* 39*/ MatAXPY_SeqBAIJ,
2625: MatCreateSubMatrices_SeqBAIJ,
2626: MatIncreaseOverlap_SeqBAIJ,
2627: MatGetValues_SeqBAIJ,
2628: MatCopy_SeqBAIJ,
2629: /* 44*/ NULL,
2630: MatScale_SeqBAIJ,
2631: MatShift_SeqBAIJ,
2632: NULL,
2633: MatZeroRowsColumns_SeqBAIJ,
2634: /* 49*/ NULL,
2635: MatGetRowIJ_SeqBAIJ,
2636: MatRestoreRowIJ_SeqBAIJ,
2637: MatGetColumnIJ_SeqBAIJ,
2638: MatRestoreColumnIJ_SeqBAIJ,
2639: /* 54*/ MatFDColoringCreate_SeqXAIJ,
2640: NULL,
2641: NULL,
2642: NULL,
2643: MatSetValuesBlocked_SeqBAIJ,
2644: /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2645: MatDestroy_SeqBAIJ,
2646: MatView_SeqBAIJ,
2647: NULL,
2648: NULL,
2649: /* 64*/ NULL,
2650: NULL,
2651: NULL,
2652: NULL,
2653: NULL,
2654: /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2655: NULL,
2656: MatConvert_Basic,
2657: NULL,
2658: NULL,
2659: /* 74*/ NULL,
2660: MatFDColoringApply_BAIJ,
2661: NULL,
2662: NULL,
2663: NULL,
2664: /* 79*/ NULL,
2665: NULL,
2666: NULL,
2667: NULL,
2668: MatLoad_SeqBAIJ,
2669: /* 84*/ NULL,
2670: NULL,
2671: NULL,
2672: NULL,
2673: NULL,
2674: /* 89*/ NULL,
2675: NULL,
2676: NULL,
2677: NULL,
2678: NULL,
2679: /* 94*/ NULL,
2680: NULL,
2681: NULL,
2682: NULL,
2683: NULL,
2684: /* 99*/ NULL,
2685: NULL,
2686: NULL,
2687: MatConjugate_SeqBAIJ,
2688: NULL,
2689: /*104*/ NULL,
2690: MatRealPart_SeqBAIJ,
2691: MatImaginaryPart_SeqBAIJ,
2692: NULL,
2693: NULL,
2694: /*109*/ NULL,
2695: NULL,
2696: NULL,
2697: NULL,
2698: MatMissingDiagonal_SeqBAIJ,
2699: /*114*/ NULL,
2700: NULL,
2701: NULL,
2702: NULL,
2703: NULL,
2704: /*119*/ NULL,
2705: NULL,
2706: MatMultHermitianTranspose_SeqBAIJ,
2707: MatMultHermitianTransposeAdd_SeqBAIJ,
2708: NULL,
2709: /*124*/ NULL,
2710: MatGetColumnReductions_SeqBAIJ,
2711: MatInvertBlockDiagonal_SeqBAIJ,
2712: NULL,
2713: NULL,
2714: /*129*/ NULL,
2715: NULL,
2716: NULL,
2717: NULL,
2718: NULL,
2719: /*134*/ NULL,
2720: NULL,
2721: NULL,
2722: NULL,
2723: NULL,
2724: /*139*/ MatSetBlockSizes_Default,
2725: NULL,
2726: NULL,
2727: MatFDColoringSetUp_SeqXAIJ,
2728: NULL,
2729: /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
2730: MatDestroySubMatrices_SeqBAIJ,
2731: NULL,
2732: NULL
2733: };
2735: PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
2736: {
2737: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data;
2738: PetscInt nz = aij->i[aij->mbs]*aij->bs2;
2742: /* allocate space for values if not already there */
2743: if (!aij->saved_values) {
2744: PetscMalloc1(nz+1,&aij->saved_values);
2745: PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));
2746: }
2748: /* copy values over */
2749: PetscArraycpy(aij->saved_values,aij->a,nz);
2750: return 0;
2751: }
2753: PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
2754: {
2755: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data;
2756: PetscInt nz = aij->i[aij->mbs]*aij->bs2;
2761: /* copy values over */
2762: PetscArraycpy(aij->a,aij->saved_values,nz);
2763: return 0;
2764: }
2766: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2767: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2769: PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2770: {
2771: Mat_SeqBAIJ *b;
2773: PetscInt i,mbs,nbs,bs2;
2774: PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2776: if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2777: if (nz == MAT_SKIP_ALLOCATION) {
2778: skipallocation = PETSC_TRUE;
2779: nz = 0;
2780: }
2782: MatSetBlockSize(B,PetscAbs(bs));
2783: PetscLayoutSetUp(B->rmap);
2784: PetscLayoutSetUp(B->cmap);
2785: PetscLayoutGetBlockSize(B->rmap,&bs);
2787: B->preallocated = PETSC_TRUE;
2789: mbs = B->rmap->n/bs;
2790: nbs = B->cmap->n/bs;
2791: bs2 = bs*bs;
2795: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2797: if (nnz) {
2798: for (i=0; i<mbs; i++) {
2801: }
2802: }
2804: b = (Mat_SeqBAIJ*)B->data;
2805: PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");
2806: PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);
2807: PetscOptionsEnd();
2809: if (!flg) {
2810: switch (bs) {
2811: case 1:
2812: B->ops->mult = MatMult_SeqBAIJ_1;
2813: B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2814: break;
2815: case 2:
2816: B->ops->mult = MatMult_SeqBAIJ_2;
2817: B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2818: break;
2819: case 3:
2820: B->ops->mult = MatMult_SeqBAIJ_3;
2821: B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2822: break;
2823: case 4:
2824: B->ops->mult = MatMult_SeqBAIJ_4;
2825: B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2826: break;
2827: case 5:
2828: B->ops->mult = MatMult_SeqBAIJ_5;
2829: B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2830: break;
2831: case 6:
2832: B->ops->mult = MatMult_SeqBAIJ_6;
2833: B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2834: break;
2835: case 7:
2836: B->ops->mult = MatMult_SeqBAIJ_7;
2837: B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2838: break;
2839: case 9:
2840: {
2841: PetscInt version = 1;
2842: PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);
2843: switch (version) {
2844: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2845: case 1:
2846: B->ops->mult = MatMult_SeqBAIJ_9_AVX2;
2847: B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
2848: PetscInfo((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs);
2849: break;
2850: #endif
2851: default:
2852: B->ops->mult = MatMult_SeqBAIJ_N;
2853: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2854: PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs);
2855: break;
2856: }
2857: break;
2858: }
2859: case 11:
2860: B->ops->mult = MatMult_SeqBAIJ_11;
2861: B->ops->multadd = MatMultAdd_SeqBAIJ_11;
2862: break;
2863: case 12:
2864: {
2865: PetscInt version = 1;
2866: PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);
2867: switch (version) {
2868: case 1:
2869: B->ops->mult = MatMult_SeqBAIJ_12_ver1;
2870: B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
2871: PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs);
2872: break;
2873: case 2:
2874: B->ops->mult = MatMult_SeqBAIJ_12_ver2;
2875: B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2;
2876: PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs);
2877: break;
2878: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2879: case 3:
2880: B->ops->mult = MatMult_SeqBAIJ_12_AVX2;
2881: B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
2882: PetscInfo((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs);
2883: break;
2884: #endif
2885: default:
2886: B->ops->mult = MatMult_SeqBAIJ_N;
2887: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2888: PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs);
2889: break;
2890: }
2891: break;
2892: }
2893: case 15:
2894: {
2895: PetscInt version = 1;
2896: PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);
2897: switch (version) {
2898: case 1:
2899: B->ops->mult = MatMult_SeqBAIJ_15_ver1;
2900: PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs);
2901: break;
2902: case 2:
2903: B->ops->mult = MatMult_SeqBAIJ_15_ver2;
2904: PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs);
2905: break;
2906: case 3:
2907: B->ops->mult = MatMult_SeqBAIJ_15_ver3;
2908: PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs);
2909: break;
2910: case 4:
2911: B->ops->mult = MatMult_SeqBAIJ_15_ver4;
2912: PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs);
2913: break;
2914: default:
2915: B->ops->mult = MatMult_SeqBAIJ_N;
2916: PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs);
2917: break;
2918: }
2919: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2920: break;
2921: }
2922: default:
2923: B->ops->mult = MatMult_SeqBAIJ_N;
2924: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2925: PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs);
2926: break;
2927: }
2928: }
2929: B->ops->sor = MatSOR_SeqBAIJ;
2930: b->mbs = mbs;
2931: b->nbs = nbs;
2932: if (!skipallocation) {
2933: if (!b->imax) {
2934: PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);
2935: PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));
2937: b->free_imax_ilen = PETSC_TRUE;
2938: }
2939: /* b->ilen will count nonzeros in each block row so far. */
2940: for (i=0; i<mbs; i++) b->ilen[i] = 0;
2941: if (!nnz) {
2942: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2943: else if (nz < 0) nz = 1;
2944: nz = PetscMin(nz,nbs);
2945: for (i=0; i<mbs; i++) b->imax[i] = nz;
2946: PetscIntMultError(nz,mbs,&nz);
2947: } else {
2948: PetscInt64 nz64 = 0;
2949: for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz64 += nnz[i];}
2950: PetscIntCast(nz64,&nz);
2951: }
2953: /* allocate the matrix space */
2954: MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);
2955: if (B->structure_only) {
2956: PetscMalloc1(nz,&b->j);
2957: PetscMalloc1(B->rmap->N+1,&b->i);
2958: PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));
2959: } else {
2960: PetscInt nzbs2 = 0;
2961: PetscIntMultError(nz,bs2,&nzbs2);
2962: PetscMalloc3(nzbs2,&b->a,nz,&b->j,B->rmap->N+1,&b->i);
2963: PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));
2964: PetscArrayzero(b->a,nz*bs2);
2965: }
2966: PetscArrayzero(b->j,nz);
2968: if (B->structure_only) {
2969: b->singlemalloc = PETSC_FALSE;
2970: b->free_a = PETSC_FALSE;
2971: } else {
2972: b->singlemalloc = PETSC_TRUE;
2973: b->free_a = PETSC_TRUE;
2974: }
2975: b->free_ij = PETSC_TRUE;
2977: b->i[0] = 0;
2978: for (i=1; i<mbs+1; i++) {
2979: b->i[i] = b->i[i-1] + b->imax[i-1];
2980: }
2982: } else {
2983: b->free_a = PETSC_FALSE;
2984: b->free_ij = PETSC_FALSE;
2985: }
2987: b->bs2 = bs2;
2988: b->mbs = mbs;
2989: b->nz = 0;
2990: b->maxnz = nz;
2991: B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2992: B->was_assembled = PETSC_FALSE;
2993: B->assembled = PETSC_FALSE;
2994: if (realalloc) MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
2995: return 0;
2996: }
2998: PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2999: {
3000: PetscInt i,m,nz,nz_max=0,*nnz;
3001: PetscScalar *values=NULL;
3002: PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
3005: PetscLayoutSetBlockSize(B->rmap,bs);
3006: PetscLayoutSetBlockSize(B->cmap,bs);
3007: PetscLayoutSetUp(B->rmap);
3008: PetscLayoutSetUp(B->cmap);
3009: PetscLayoutGetBlockSize(B->rmap,&bs);
3010: m = B->rmap->n/bs;
3013: PetscMalloc1(m+1, &nnz);
3014: for (i=0; i<m; i++) {
3015: nz = ii[i+1]- ii[i];
3017: nz_max = PetscMax(nz_max, nz);
3018: nnz[i] = nz;
3019: }
3020: MatSeqBAIJSetPreallocation(B,bs,0,nnz);
3021: PetscFree(nnz);
3023: values = (PetscScalar*)V;
3024: if (!values) {
3025: PetscCalloc1(bs*bs*(nz_max+1),&values);
3026: }
3027: for (i=0; i<m; i++) {
3028: PetscInt ncols = ii[i+1] - ii[i];
3029: const PetscInt *icols = jj + ii[i];
3030: if (bs == 1 || !roworiented) {
3031: const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3032: MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);
3033: } else {
3034: PetscInt j;
3035: for (j=0; j<ncols; j++) {
3036: const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
3037: MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);
3038: }
3039: }
3040: }
3041: if (!V) PetscFree(values);
3042: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3043: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3044: MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3045: return 0;
3046: }
3048: /*@C
3049: MatSeqBAIJGetArray - gives access to the array where the data for a MATSEQBAIJ matrix is stored
3051: Not Collective
3053: Input Parameter:
3054: . mat - a MATSEQBAIJ matrix
3056: Output Parameter:
3057: . array - pointer to the data
3059: Level: intermediate
3061: .seealso: MatSeqBAIJRestoreArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray()
3062: @*/
3063: PetscErrorCode MatSeqBAIJGetArray(Mat A,PetscScalar **array)
3064: {
3065: PetscUseMethod(A,"MatSeqBAIJGetArray_C",(Mat,PetscScalar**),(A,array));
3066: return 0;
3067: }
3069: /*@C
3070: MatSeqBAIJRestoreArray - returns access to the array where the data for a MATSEQBAIJ matrix is stored obtained by MatSeqBAIJGetArray()
3072: Not Collective
3074: Input Parameters:
3075: + mat - a MATSEQBAIJ matrix
3076: - array - pointer to the data
3078: Level: intermediate
3080: .seealso: MatSeqBAIJGetArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray()
3081: @*/
3082: PetscErrorCode MatSeqBAIJRestoreArray(Mat A,PetscScalar **array)
3083: {
3084: PetscUseMethod(A,"MatSeqBAIJRestoreArray_C",(Mat,PetscScalar**),(A,array));
3085: return 0;
3086: }
3088: /*MC
3089: MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3090: block sparse compressed row format.
3092: Options Database Keys:
3093: + -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3094: - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS)
3096: Level: beginner
3098: Notes:
3099: MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
3100: space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
3102: Run with -info to see what version of the matrix-vector product is being used
3104: .seealso: MatCreateSeqBAIJ()
3105: M*/
3107: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3109: PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3110: {
3111: PetscMPIInt size;
3112: Mat_SeqBAIJ *b;
3114: MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);
3117: PetscNewLog(B,&b);
3118: B->data = (void*)b;
3119: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
3121: b->row = NULL;
3122: b->col = NULL;
3123: b->icol = NULL;
3124: b->reallocs = 0;
3125: b->saved_values = NULL;
3127: b->roworiented = PETSC_TRUE;
3128: b->nonew = 0;
3129: b->diag = NULL;
3130: B->spptr = NULL;
3131: B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2;
3132: b->keepnonzeropattern = PETSC_FALSE;
3134: PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJGetArray_C",MatSeqBAIJGetArray_SeqBAIJ);
3135: PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJRestoreArray_C",MatSeqBAIJRestoreArray_SeqBAIJ);
3136: PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);
3137: PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);
3138: PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);
3139: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);
3140: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);
3141: PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);
3142: PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);
3143: PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);
3144: #if defined(PETSC_HAVE_HYPRE)
3145: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);
3146: #endif
3147: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);
3148: PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);
3149: return 0;
3150: }
3152: PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3153: {
3154: Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3155: PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3159: if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3160: c->imax = a->imax;
3161: c->ilen = a->ilen;
3162: c->free_imax_ilen = PETSC_FALSE;
3163: } else {
3164: PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);
3165: PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));
3166: for (i=0; i<mbs; i++) {
3167: c->imax[i] = a->imax[i];
3168: c->ilen[i] = a->ilen[i];
3169: }
3170: c->free_imax_ilen = PETSC_TRUE;
3171: }
3173: /* allocate the matrix space */
3174: if (mallocmatspace) {
3175: if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3176: PetscCalloc1(bs2*nz,&c->a);
3177: PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));
3179: c->i = a->i;
3180: c->j = a->j;
3181: c->singlemalloc = PETSC_FALSE;
3182: c->free_a = PETSC_TRUE;
3183: c->free_ij = PETSC_FALSE;
3184: c->parent = A;
3185: C->preallocated = PETSC_TRUE;
3186: C->assembled = PETSC_TRUE;
3188: PetscObjectReference((PetscObject)A);
3189: MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3190: MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3191: } else {
3192: PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);
3193: PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));
3195: c->singlemalloc = PETSC_TRUE;
3196: c->free_a = PETSC_TRUE;
3197: c->free_ij = PETSC_TRUE;
3199: PetscArraycpy(c->i,a->i,mbs+1);
3200: if (mbs > 0) {
3201: PetscArraycpy(c->j,a->j,nz);
3202: if (cpvalues == MAT_COPY_VALUES) {
3203: PetscArraycpy(c->a,a->a,bs2*nz);
3204: } else {
3205: PetscArrayzero(c->a,bs2*nz);
3206: }
3207: }
3208: C->preallocated = PETSC_TRUE;
3209: C->assembled = PETSC_TRUE;
3210: }
3211: }
3213: c->roworiented = a->roworiented;
3214: c->nonew = a->nonew;
3216: PetscLayoutReference(A->rmap,&C->rmap);
3217: PetscLayoutReference(A->cmap,&C->cmap);
3219: c->bs2 = a->bs2;
3220: c->mbs = a->mbs;
3221: c->nbs = a->nbs;
3223: if (a->diag) {
3224: if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3225: c->diag = a->diag;
3226: c->free_diag = PETSC_FALSE;
3227: } else {
3228: PetscMalloc1(mbs+1,&c->diag);
3229: PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));
3230: for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3231: c->free_diag = PETSC_TRUE;
3232: }
3233: } else c->diag = NULL;
3235: c->nz = a->nz;
3236: c->maxnz = a->nz; /* Since we allocate exactly the right amount */
3237: c->solve_work = NULL;
3238: c->mult_work = NULL;
3239: c->sor_workt = NULL;
3240: c->sor_work = NULL;
3242: c->compressedrow.use = a->compressedrow.use;
3243: c->compressedrow.nrows = a->compressedrow.nrows;
3244: if (a->compressedrow.use) {
3245: i = a->compressedrow.nrows;
3246: PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);
3247: PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));
3248: PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1);
3249: PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i);
3250: } else {
3251: c->compressedrow.use = PETSC_FALSE;
3252: c->compressedrow.i = NULL;
3253: c->compressedrow.rindex = NULL;
3254: }
3255: C->nonzerostate = A->nonzerostate;
3257: PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);
3258: return 0;
3259: }
3261: PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3262: {
3263: MatCreate(PetscObjectComm((PetscObject)A),B);
3264: MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);
3265: MatSetType(*B,MATSEQBAIJ);
3266: MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);
3267: return 0;
3268: }
3270: /* Used for both SeqBAIJ and SeqSBAIJ matrices */
3271: PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)
3272: {
3273: PetscInt header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k;
3274: PetscInt *rowidxs,*colidxs;
3275: PetscScalar *matvals;
3277: PetscViewerSetUp(viewer);
3279: /* read matrix header */
3280: PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);
3282: M = header[1]; N = header[2]; nz = header[3];
3287: /* set block sizes from the viewer's .info file */
3288: MatLoad_Binary_BlockSizes(mat,viewer);
3289: /* set local and global sizes if not set already */
3290: if (mat->rmap->n < 0) mat->rmap->n = M;
3291: if (mat->cmap->n < 0) mat->cmap->n = N;
3292: if (mat->rmap->N < 0) mat->rmap->N = M;
3293: if (mat->cmap->N < 0) mat->cmap->N = N;
3294: PetscLayoutSetUp(mat->rmap);
3295: PetscLayoutSetUp(mat->cmap);
3297: /* check if the matrix sizes are correct */
3298: MatGetSize(mat,&rows,&cols);
3300: MatGetBlockSize(mat,&bs);
3301: MatGetLocalSize(mat,&m,&n);
3302: mbs = m/bs; nbs = n/bs;
3304: /* read in row lengths, column indices and nonzero values */
3305: PetscMalloc1(m+1,&rowidxs);
3306: PetscViewerBinaryRead(viewer,rowidxs+1,m,NULL,PETSC_INT);
3307: rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3308: sum = rowidxs[m];
3311: /* read in column indices and nonzero values */
3312: PetscMalloc2(rowidxs[m],&colidxs,nz,&matvals);
3313: PetscViewerBinaryRead(viewer,colidxs,rowidxs[m],NULL,PETSC_INT);
3314: PetscViewerBinaryRead(viewer,matvals,rowidxs[m],NULL,PETSC_SCALAR);
3316: { /* preallocate matrix storage */
3317: PetscBT bt; /* helper bit set to count nonzeros */
3318: PetscInt *nnz;
3319: PetscBool sbaij;
3321: PetscBTCreate(nbs,&bt);
3322: PetscCalloc1(mbs,&nnz);
3323: PetscObjectTypeCompare((PetscObject)mat,MATSEQSBAIJ,&sbaij);
3324: for (i=0; i<mbs; i++) {
3325: PetscBTMemzero(nbs,bt);
3326: for (k=0; k<bs; k++) {
3327: PetscInt row = bs*i + k;
3328: for (j=rowidxs[row]; j<rowidxs[row+1]; j++) {
3329: PetscInt col = colidxs[j];
3330: if (!sbaij || col >= row)
3331: if (!PetscBTLookupSet(bt,col/bs)) nnz[i]++;
3332: }
3333: }
3334: }
3335: PetscBTDestroy(&bt);
3336: MatSeqBAIJSetPreallocation(mat,bs,0,nnz);
3337: MatSeqSBAIJSetPreallocation(mat,bs,0,nnz);
3338: PetscFree(nnz);
3339: }
3341: /* store matrix values */
3342: for (i=0; i<m; i++) {
3343: PetscInt row = i, s = rowidxs[i], e = rowidxs[i+1];
3344: (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);
3345: }
3347: PetscFree(rowidxs);
3348: PetscFree2(colidxs,matvals);
3349: MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);
3350: MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);
3351: return 0;
3352: }
3354: PetscErrorCode MatLoad_SeqBAIJ(Mat mat,PetscViewer viewer)
3355: {
3356: PetscBool isbinary;
3358: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
3360: MatLoad_SeqBAIJ_Binary(mat,viewer);
3361: return 0;
3362: }
3364: /*@C
3365: MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3366: compressed row) format. For good matrix assembly performance the
3367: user should preallocate the matrix storage by setting the parameter nz
3368: (or the array nnz). By setting these parameters accurately, performance
3369: during matrix assembly can be increased by more than a factor of 50.
3371: Collective
3373: Input Parameters:
3374: + comm - MPI communicator, set to PETSC_COMM_SELF
3375: . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3376: blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3377: . m - number of rows
3378: . n - number of columns
3379: . nz - number of nonzero blocks per block row (same for all rows)
3380: - nnz - array containing the number of nonzero blocks in the various block rows
3381: (possibly different for each block row) or NULL
3383: Output Parameter:
3384: . A - the matrix
3386: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3387: MatXXXXSetPreallocation() paradigm instead of this routine directly.
3388: [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3390: Options Database Keys:
3391: + -mat_no_unroll - uses code that does not unroll the loops in the
3392: block calculations (much slower)
3393: - -mat_block_size - size of the blocks to use
3395: Level: intermediate
3397: Notes:
3398: The number of rows and columns must be divisible by blocksize.
3400: If the nnz parameter is given then the nz parameter is ignored
3402: A nonzero block is any block that as 1 or more nonzeros in it
3404: The block AIJ format is fully compatible with standard Fortran 77
3405: storage. That is, the stored row and column indices can begin at
3406: either one (as in Fortran) or zero. See the users' manual for details.
3408: Specify the preallocated storage with either nz or nnz (not both).
3409: Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3410: allocation. See Users-Manual: ch_mat for details.
3411: matrices.
3413: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3414: @*/
3415: PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3416: {
3417: MatCreate(comm,A);
3418: MatSetSizes(*A,m,n,m,n);
3419: MatSetType(*A,MATSEQBAIJ);
3420: MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);
3421: return 0;
3422: }
3424: /*@C
3425: MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3426: per row in the matrix. For good matrix assembly performance the
3427: user should preallocate the matrix storage by setting the parameter nz
3428: (or the array nnz). By setting these parameters accurately, performance
3429: during matrix assembly can be increased by more than a factor of 50.
3431: Collective
3433: Input Parameters:
3434: + B - the matrix
3435: . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3436: blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3437: . nz - number of block nonzeros per block row (same for all rows)
3438: - nnz - array containing the number of block nonzeros in the various block rows
3439: (possibly different for each block row) or NULL
3441: Options Database Keys:
3442: + -mat_no_unroll - uses code that does not unroll the loops in the
3443: block calculations (much slower)
3444: - -mat_block_size - size of the blocks to use
3446: Level: intermediate
3448: Notes:
3449: If the nnz parameter is given then the nz parameter is ignored
3451: You can call MatGetInfo() to get information on how effective the preallocation was;
3452: for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3453: You can also run with the option -info and look for messages with the string
3454: malloc in them to see if additional memory allocation was needed.
3456: The block AIJ format is fully compatible with standard Fortran 77
3457: storage. That is, the stored row and column indices can begin at
3458: either one (as in Fortran) or zero. See the users' manual for details.
3460: Specify the preallocated storage with either nz or nnz (not both).
3461: Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3462: allocation. See Users-Manual: ch_mat for details.
3464: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3465: @*/
3466: PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3467: {
3471: PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));
3472: return 0;
3473: }
3475: /*@C
3476: MatSeqBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values
3478: Collective
3480: Input Parameters:
3481: + B - the matrix
3482: . i - the indices into j for the start of each local row (starts with zero)
3483: . j - the column indices for each local row (starts with zero) these must be sorted for each row
3484: - v - optional values in the matrix
3486: Level: advanced
3488: Notes:
3489: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs
3490: may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3491: over rows within a block and the last index is over columns within a block row. Fortran programs will likely set
3492: MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3493: block column and the second index is over columns within a block.
3495: Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3497: .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3498: @*/
3499: PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3500: {
3504: PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));
3505: return 0;
3506: }
3508: /*@
3509: MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3511: Collective
3513: Input Parameters:
3514: + comm - must be an MPI communicator of size 1
3515: . bs - size of block
3516: . m - number of rows
3517: . n - number of columns
3518: . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3519: . j - column indices
3520: - a - matrix values
3522: Output Parameter:
3523: . mat - the matrix
3525: Level: advanced
3527: Notes:
3528: The i, j, and a arrays are not copied by this routine, the user must free these arrays
3529: once the matrix is destroyed
3531: You cannot set new nonzero locations into this matrix, that will generate an error.
3533: The i and j indices are 0 based
3535: When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3537: The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3538: the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3539: block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory
3540: with column-major ordering within blocks.
3542: .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3544: @*/
3545: PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat)
3546: {
3547: PetscInt ii;
3548: Mat_SeqBAIJ *baij;
3553: MatCreate(comm,mat);
3554: MatSetSizes(*mat,m,n,m,n);
3555: MatSetType(*mat,MATSEQBAIJ);
3556: MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,NULL);
3557: baij = (Mat_SeqBAIJ*)(*mat)->data;
3558: PetscMalloc2(m,&baij->imax,m,&baij->ilen);
3559: PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));
3561: baij->i = i;
3562: baij->j = j;
3563: baij->a = a;
3565: baij->singlemalloc = PETSC_FALSE;
3566: baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3567: baij->free_a = PETSC_FALSE;
3568: baij->free_ij = PETSC_FALSE;
3570: for (ii=0; ii<m; ii++) {
3571: baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3573: }
3574: if (PetscDefined(USE_DEBUG)) {
3575: for (ii=0; ii<baij->i[m]; ii++) {
3578: }
3579: }
3581: MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
3582: MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
3583: return 0;
3584: }
3586: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3587: {
3588: PetscMPIInt size;
3590: MPI_Comm_size(comm,&size);
3591: if (size == 1 && scall == MAT_REUSE_MATRIX) {
3592: MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);
3593: } else {
3594: MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);
3595: }
3596: return 0;
3597: }