Actual source code: baij.c
2: /*
3: Defines the basic matrix operations for the BAIJ (compressed row)
4: matrix storage format.
5: */
6: #include <../src/mat/impls/baij/seq/baij.h>
7: #include <petscblaslapack.h>
8: #include <petsc/private/kernels/blockinvert.h>
9: #include <petsc/private/kernels/blockmatmult.h>
11: #if defined(PETSC_HAVE_HYPRE)
12: PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
13: #endif
15: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
16: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*);
17: #endif
18: PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
20: PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
21: {
22: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data;
24: PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
25: MatScalar *v = a->a,*odiag,*diag,work[25],*v_work;
26: PetscReal shift = 0.0;
27: PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE;
30: allowzeropivot = PetscNot(A->erroriffailure);
32: if (a->idiagvalid) {
33: if (values) *values = a->idiag;
34: return(0);
35: }
36: MatMarkDiagonal_SeqBAIJ(A);
37: diag_offset = a->diag;
38: if (!a->idiag) {
39: PetscMalloc1(bs2*mbs,&a->idiag);
40: PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));
41: }
42: diag = a->idiag;
43: if (values) *values = a->idiag;
44: /* factor and invert each block */
45: switch (bs) {
46: case 1:
47: for (i=0; i<mbs; i++) {
48: odiag = v + 1*diag_offset[i];
49: diag[0] = odiag[0];
51: if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
52: if (allowzeropivot) {
53: A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
54: A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
55: A->factorerror_zeropivot_row = i;
56: PetscInfo1(A,"Zero pivot, row %D\n",i);
57: } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
58: }
60: diag[0] = (PetscScalar)1.0 / (diag[0] + shift);
61: diag += 1;
62: }
63: break;
64: case 2:
65: for (i=0; i<mbs; i++) {
66: odiag = v + 4*diag_offset[i];
67: diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
68: PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);
69: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
70: diag += 4;
71: }
72: break;
73: case 3:
74: for (i=0; i<mbs; i++) {
75: odiag = v + 9*diag_offset[i];
76: diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
77: diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
78: diag[8] = odiag[8];
79: PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);
80: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
81: diag += 9;
82: }
83: break;
84: case 4:
85: for (i=0; i<mbs; i++) {
86: odiag = v + 16*diag_offset[i];
87: PetscArraycpy(diag,odiag,16);
88: PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);
89: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
90: diag += 16;
91: }
92: break;
93: case 5:
94: for (i=0; i<mbs; i++) {
95: odiag = v + 25*diag_offset[i];
96: PetscArraycpy(diag,odiag,25);
97: PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);
98: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
99: diag += 25;
100: }
101: break;
102: case 6:
103: for (i=0; i<mbs; i++) {
104: odiag = v + 36*diag_offset[i];
105: PetscArraycpy(diag,odiag,36);
106: PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);
107: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
108: diag += 36;
109: }
110: break;
111: case 7:
112: for (i=0; i<mbs; i++) {
113: odiag = v + 49*diag_offset[i];
114: PetscArraycpy(diag,odiag,49);
115: PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);
116: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
117: diag += 49;
118: }
119: break;
120: default:
121: PetscMalloc2(bs,&v_work,bs,&v_pivots);
122: for (i=0; i<mbs; i++) {
123: odiag = v + bs2*diag_offset[i];
124: PetscArraycpy(diag,odiag,bs2);
125: PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);
126: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
127: diag += bs2;
128: }
129: PetscFree2(v_work,v_pivots);
130: }
131: a->idiagvalid = PETSC_TRUE;
132: return(0);
133: }
135: PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
136: {
137: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
138: PetscScalar *x,*work,*w,*workt,*t;
139: const MatScalar *v,*aa = a->a, *idiag;
140: const PetscScalar *b,*xb;
141: PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
142: PetscErrorCode ierr;
143: PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
144: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
147: its = its*lits;
148: if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
149: if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
150: if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift");
151: if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for non-trivial relaxation factor");
152: if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts");
154: if (!a->idiagvalid) {MatInvertBlockDiagonal(A,NULL);}
156: if (!m) return(0);
157: diag = a->diag;
158: idiag = a->idiag;
159: k = PetscMax(A->rmap->n,A->cmap->n);
160: if (!a->mult_work) {
161: PetscMalloc1(k+1,&a->mult_work);
162: }
163: if (!a->sor_workt) {
164: PetscMalloc1(k,&a->sor_workt);
165: }
166: if (!a->sor_work) {
167: PetscMalloc1(bs,&a->sor_work);
168: }
169: work = a->mult_work;
170: t = a->sor_workt;
171: w = a->sor_work;
173: VecGetArray(xx,&x);
174: VecGetArrayRead(bb,&b);
176: if (flag & SOR_ZERO_INITIAL_GUESS) {
177: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
178: switch (bs) {
179: case 1:
180: PetscKernel_v_gets_A_times_w_1(x,idiag,b);
181: t[0] = b[0];
182: i2 = 1;
183: idiag += 1;
184: for (i=1; i<m; i++) {
185: v = aa + ai[i];
186: vi = aj + ai[i];
187: nz = diag[i] - ai[i];
188: s[0] = b[i2];
189: for (j=0; j<nz; j++) {
190: xw[0] = x[vi[j]];
191: PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
192: }
193: t[i2] = s[0];
194: PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
195: x[i2] = xw[0];
196: idiag += 1;
197: i2 += 1;
198: }
199: break;
200: case 2:
201: PetscKernel_v_gets_A_times_w_2(x,idiag,b);
202: t[0] = b[0]; t[1] = b[1];
203: i2 = 2;
204: idiag += 4;
205: for (i=1; i<m; i++) {
206: v = aa + 4*ai[i];
207: vi = aj + ai[i];
208: nz = diag[i] - ai[i];
209: s[0] = b[i2]; s[1] = b[i2+1];
210: for (j=0; j<nz; j++) {
211: idx = 2*vi[j];
212: it = 4*j;
213: xw[0] = x[idx]; xw[1] = x[1+idx];
214: PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
215: }
216: t[i2] = s[0]; t[i2+1] = s[1];
217: PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
218: x[i2] = xw[0]; x[i2+1] = xw[1];
219: idiag += 4;
220: i2 += 2;
221: }
222: break;
223: case 3:
224: PetscKernel_v_gets_A_times_w_3(x,idiag,b);
225: t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
226: i2 = 3;
227: idiag += 9;
228: for (i=1; i<m; i++) {
229: v = aa + 9*ai[i];
230: vi = aj + ai[i];
231: nz = diag[i] - ai[i];
232: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
233: while (nz--) {
234: idx = 3*(*vi++);
235: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
236: PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
237: v += 9;
238: }
239: t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
240: PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
241: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
242: idiag += 9;
243: i2 += 3;
244: }
245: break;
246: case 4:
247: PetscKernel_v_gets_A_times_w_4(x,idiag,b);
248: t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
249: i2 = 4;
250: idiag += 16;
251: for (i=1; i<m; i++) {
252: v = aa + 16*ai[i];
253: vi = aj + ai[i];
254: nz = diag[i] - ai[i];
255: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
256: while (nz--) {
257: idx = 4*(*vi++);
258: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
259: PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
260: v += 16;
261: }
262: t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
263: PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
264: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
265: idiag += 16;
266: i2 += 4;
267: }
268: break;
269: case 5:
270: PetscKernel_v_gets_A_times_w_5(x,idiag,b);
271: t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
272: i2 = 5;
273: idiag += 25;
274: for (i=1; i<m; i++) {
275: v = aa + 25*ai[i];
276: vi = aj + ai[i];
277: nz = diag[i] - ai[i];
278: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
279: while (nz--) {
280: idx = 5*(*vi++);
281: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
282: PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
283: v += 25;
284: }
285: t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
286: PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
287: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
288: idiag += 25;
289: i2 += 5;
290: }
291: break;
292: case 6:
293: PetscKernel_v_gets_A_times_w_6(x,idiag,b);
294: t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
295: i2 = 6;
296: idiag += 36;
297: for (i=1; i<m; i++) {
298: v = aa + 36*ai[i];
299: vi = aj + ai[i];
300: nz = diag[i] - ai[i];
301: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
302: while (nz--) {
303: idx = 6*(*vi++);
304: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
305: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
306: PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
307: v += 36;
308: }
309: t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
310: t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
311: PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
312: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
313: idiag += 36;
314: i2 += 6;
315: }
316: break;
317: case 7:
318: PetscKernel_v_gets_A_times_w_7(x,idiag,b);
319: t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
320: t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
321: i2 = 7;
322: idiag += 49;
323: for (i=1; i<m; i++) {
324: v = aa + 49*ai[i];
325: vi = aj + ai[i];
326: nz = diag[i] - ai[i];
327: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
328: s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
329: while (nz--) {
330: idx = 7*(*vi++);
331: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
332: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
333: PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
334: v += 49;
335: }
336: t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
337: t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
338: PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
339: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
340: x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
341: idiag += 49;
342: i2 += 7;
343: }
344: break;
345: default:
346: PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
347: PetscArraycpy(t,b,bs);
348: i2 = bs;
349: idiag += bs2;
350: for (i=1; i<m; i++) {
351: v = aa + bs2*ai[i];
352: vi = aj + ai[i];
353: nz = diag[i] - ai[i];
355: PetscArraycpy(w,b+i2,bs);
356: /* copy all rows of x that are needed into contiguous space */
357: workt = work;
358: for (j=0; j<nz; j++) {
359: PetscArraycpy(workt,x + bs*(*vi++),bs);
360: workt += bs;
361: }
362: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
363: PetscArraycpy(t+i2,w,bs);
364: PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
366: idiag += bs2;
367: i2 += bs;
368: }
369: break;
370: }
371: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
372: PetscLogFlops(1.0*bs2*a->nz);
373: xb = t;
374: }
375: else xb = b;
376: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
377: idiag = a->idiag+bs2*(a->mbs-1);
378: i2 = bs * (m-1);
379: switch (bs) {
380: case 1:
381: s[0] = xb[i2];
382: PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
383: x[i2] = xw[0];
384: i2 -= 1;
385: for (i=m-2; i>=0; i--) {
386: v = aa + (diag[i]+1);
387: vi = aj + diag[i] + 1;
388: nz = ai[i+1] - diag[i] - 1;
389: s[0] = xb[i2];
390: for (j=0; j<nz; j++) {
391: xw[0] = x[vi[j]];
392: PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
393: }
394: PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
395: x[i2] = xw[0];
396: idiag -= 1;
397: i2 -= 1;
398: }
399: break;
400: case 2:
401: s[0] = xb[i2]; s[1] = xb[i2+1];
402: PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
403: x[i2] = xw[0]; x[i2+1] = xw[1];
404: i2 -= 2;
405: idiag -= 4;
406: for (i=m-2; i>=0; i--) {
407: v = aa + 4*(diag[i] + 1);
408: vi = aj + diag[i] + 1;
409: nz = ai[i+1] - diag[i] - 1;
410: s[0] = xb[i2]; s[1] = xb[i2+1];
411: for (j=0; j<nz; j++) {
412: idx = 2*vi[j];
413: it = 4*j;
414: xw[0] = x[idx]; xw[1] = x[1+idx];
415: PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
416: }
417: PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
418: x[i2] = xw[0]; x[i2+1] = xw[1];
419: idiag -= 4;
420: i2 -= 2;
421: }
422: break;
423: case 3:
424: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
425: PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
426: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
427: i2 -= 3;
428: idiag -= 9;
429: for (i=m-2; i>=0; i--) {
430: v = aa + 9*(diag[i]+1);
431: vi = aj + diag[i] + 1;
432: nz = ai[i+1] - diag[i] - 1;
433: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
434: while (nz--) {
435: idx = 3*(*vi++);
436: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
437: PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
438: v += 9;
439: }
440: PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
441: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
442: idiag -= 9;
443: i2 -= 3;
444: }
445: break;
446: case 4:
447: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
448: PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
449: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
450: i2 -= 4;
451: idiag -= 16;
452: for (i=m-2; i>=0; i--) {
453: v = aa + 16*(diag[i]+1);
454: vi = aj + diag[i] + 1;
455: nz = ai[i+1] - diag[i] - 1;
456: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
457: while (nz--) {
458: idx = 4*(*vi++);
459: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
460: PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
461: v += 16;
462: }
463: PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
464: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
465: idiag -= 16;
466: i2 -= 4;
467: }
468: break;
469: case 5:
470: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
471: PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
472: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
473: i2 -= 5;
474: idiag -= 25;
475: for (i=m-2; i>=0; i--) {
476: v = aa + 25*(diag[i]+1);
477: vi = aj + diag[i] + 1;
478: nz = ai[i+1] - diag[i] - 1;
479: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
480: while (nz--) {
481: idx = 5*(*vi++);
482: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
483: PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
484: v += 25;
485: }
486: PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
487: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
488: idiag -= 25;
489: i2 -= 5;
490: }
491: break;
492: case 6:
493: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
494: PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
495: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
496: i2 -= 6;
497: idiag -= 36;
498: for (i=m-2; i>=0; i--) {
499: v = aa + 36*(diag[i]+1);
500: vi = aj + diag[i] + 1;
501: nz = ai[i+1] - diag[i] - 1;
502: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
503: while (nz--) {
504: idx = 6*(*vi++);
505: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
506: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
507: PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
508: v += 36;
509: }
510: PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
511: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
512: idiag -= 36;
513: i2 -= 6;
514: }
515: break;
516: case 7:
517: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
518: s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
519: PetscKernel_v_gets_A_times_w_7(x,idiag,b);
520: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
521: x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
522: i2 -= 7;
523: idiag -= 49;
524: for (i=m-2; i>=0; i--) {
525: v = aa + 49*(diag[i]+1);
526: vi = aj + diag[i] + 1;
527: nz = ai[i+1] - diag[i] - 1;
528: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
529: s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
530: while (nz--) {
531: idx = 7*(*vi++);
532: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
533: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
534: PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
535: v += 49;
536: }
537: PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
538: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
539: x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
540: idiag -= 49;
541: i2 -= 7;
542: }
543: break;
544: default:
545: PetscArraycpy(w,xb+i2,bs);
546: PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
547: i2 -= bs;
548: idiag -= bs2;
549: for (i=m-2; i>=0; i--) {
550: v = aa + bs2*(diag[i]+1);
551: vi = aj + diag[i] + 1;
552: nz = ai[i+1] - diag[i] - 1;
554: PetscArraycpy(w,xb+i2,bs);
555: /* copy all rows of x that are needed into contiguous space */
556: workt = work;
557: for (j=0; j<nz; j++) {
558: PetscArraycpy(workt,x + bs*(*vi++),bs);
559: workt += bs;
560: }
561: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
562: PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
564: idiag -= bs2;
565: i2 -= bs;
566: }
567: break;
568: }
569: PetscLogFlops(1.0*bs2*(a->nz));
570: }
571: its--;
572: }
573: while (its--) {
574: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
575: idiag = a->idiag;
576: i2 = 0;
577: switch (bs) {
578: case 1:
579: for (i=0; i<m; i++) {
580: v = aa + ai[i];
581: vi = aj + ai[i];
582: nz = ai[i+1] - ai[i];
583: s[0] = b[i2];
584: for (j=0; j<nz; j++) {
585: xw[0] = x[vi[j]];
586: PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
587: }
588: PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
589: x[i2] += xw[0];
590: idiag += 1;
591: i2 += 1;
592: }
593: break;
594: case 2:
595: for (i=0; i<m; i++) {
596: v = aa + 4*ai[i];
597: vi = aj + ai[i];
598: nz = ai[i+1] - ai[i];
599: s[0] = b[i2]; s[1] = b[i2+1];
600: for (j=0; j<nz; j++) {
601: idx = 2*vi[j];
602: it = 4*j;
603: xw[0] = x[idx]; xw[1] = x[1+idx];
604: PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
605: }
606: PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
607: x[i2] += xw[0]; x[i2+1] += xw[1];
608: idiag += 4;
609: i2 += 2;
610: }
611: break;
612: case 3:
613: for (i=0; i<m; i++) {
614: v = aa + 9*ai[i];
615: vi = aj + ai[i];
616: nz = ai[i+1] - ai[i];
617: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
618: while (nz--) {
619: idx = 3*(*vi++);
620: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
621: PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
622: v += 9;
623: }
624: PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
625: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
626: idiag += 9;
627: i2 += 3;
628: }
629: break;
630: case 4:
631: for (i=0; i<m; i++) {
632: v = aa + 16*ai[i];
633: vi = aj + ai[i];
634: nz = ai[i+1] - ai[i];
635: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
636: while (nz--) {
637: idx = 4*(*vi++);
638: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
639: PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
640: v += 16;
641: }
642: PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
643: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
644: idiag += 16;
645: i2 += 4;
646: }
647: break;
648: case 5:
649: for (i=0; i<m; i++) {
650: v = aa + 25*ai[i];
651: vi = aj + ai[i];
652: nz = ai[i+1] - ai[i];
653: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
654: while (nz--) {
655: idx = 5*(*vi++);
656: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
657: PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
658: v += 25;
659: }
660: PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
661: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
662: idiag += 25;
663: i2 += 5;
664: }
665: break;
666: case 6:
667: for (i=0; i<m; i++) {
668: v = aa + 36*ai[i];
669: vi = aj + ai[i];
670: nz = ai[i+1] - ai[i];
671: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
672: while (nz--) {
673: idx = 6*(*vi++);
674: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
675: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
676: PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
677: v += 36;
678: }
679: PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
680: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
681: x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
682: idiag += 36;
683: i2 += 6;
684: }
685: break;
686: case 7:
687: for (i=0; i<m; i++) {
688: v = aa + 49*ai[i];
689: vi = aj + ai[i];
690: nz = ai[i+1] - ai[i];
691: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
692: s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
693: while (nz--) {
694: idx = 7*(*vi++);
695: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
696: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
697: PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
698: v += 49;
699: }
700: PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
701: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
702: x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
703: idiag += 49;
704: i2 += 7;
705: }
706: break;
707: default:
708: for (i=0; i<m; i++) {
709: v = aa + bs2*ai[i];
710: vi = aj + ai[i];
711: nz = ai[i+1] - ai[i];
713: PetscArraycpy(w,b+i2,bs);
714: /* copy all rows of x that are needed into contiguous space */
715: workt = work;
716: for (j=0; j<nz; j++) {
717: PetscArraycpy(workt,x + bs*(*vi++),bs);
718: workt += bs;
719: }
720: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
721: PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
723: idiag += bs2;
724: i2 += bs;
725: }
726: break;
727: }
728: PetscLogFlops(2.0*bs2*a->nz);
729: }
730: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
731: idiag = a->idiag+bs2*(a->mbs-1);
732: i2 = bs * (m-1);
733: switch (bs) {
734: case 1:
735: for (i=m-1; i>=0; i--) {
736: v = aa + ai[i];
737: vi = aj + ai[i];
738: nz = ai[i+1] - ai[i];
739: s[0] = b[i2];
740: for (j=0; j<nz; j++) {
741: xw[0] = x[vi[j]];
742: PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
743: }
744: PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
745: x[i2] += xw[0];
746: idiag -= 1;
747: i2 -= 1;
748: }
749: break;
750: case 2:
751: for (i=m-1; i>=0; i--) {
752: v = aa + 4*ai[i];
753: vi = aj + ai[i];
754: nz = ai[i+1] - ai[i];
755: s[0] = b[i2]; s[1] = b[i2+1];
756: for (j=0; j<nz; j++) {
757: idx = 2*vi[j];
758: it = 4*j;
759: xw[0] = x[idx]; xw[1] = x[1+idx];
760: PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
761: }
762: PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
763: x[i2] += xw[0]; x[i2+1] += xw[1];
764: idiag -= 4;
765: i2 -= 2;
766: }
767: break;
768: case 3:
769: for (i=m-1; i>=0; i--) {
770: v = aa + 9*ai[i];
771: vi = aj + ai[i];
772: nz = ai[i+1] - ai[i];
773: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
774: while (nz--) {
775: idx = 3*(*vi++);
776: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
777: PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
778: v += 9;
779: }
780: PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
781: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
782: idiag -= 9;
783: i2 -= 3;
784: }
785: break;
786: case 4:
787: for (i=m-1; i>=0; i--) {
788: v = aa + 16*ai[i];
789: vi = aj + ai[i];
790: nz = ai[i+1] - ai[i];
791: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
792: while (nz--) {
793: idx = 4*(*vi++);
794: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
795: PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
796: v += 16;
797: }
798: PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
799: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
800: idiag -= 16;
801: i2 -= 4;
802: }
803: break;
804: case 5:
805: for (i=m-1; i>=0; i--) {
806: v = aa + 25*ai[i];
807: vi = aj + ai[i];
808: nz = ai[i+1] - ai[i];
809: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
810: while (nz--) {
811: idx = 5*(*vi++);
812: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
813: PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
814: v += 25;
815: }
816: PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
817: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
818: idiag -= 25;
819: i2 -= 5;
820: }
821: break;
822: case 6:
823: for (i=m-1; i>=0; i--) {
824: v = aa + 36*ai[i];
825: vi = aj + ai[i];
826: nz = ai[i+1] - ai[i];
827: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
828: while (nz--) {
829: idx = 6*(*vi++);
830: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
831: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
832: PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
833: v += 36;
834: }
835: PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
836: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
837: x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
838: idiag -= 36;
839: i2 -= 6;
840: }
841: break;
842: case 7:
843: for (i=m-1; i>=0; i--) {
844: v = aa + 49*ai[i];
845: vi = aj + ai[i];
846: nz = ai[i+1] - ai[i];
847: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
848: s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
849: while (nz--) {
850: idx = 7*(*vi++);
851: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
852: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
853: PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
854: v += 49;
855: }
856: PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
857: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
858: x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
859: idiag -= 49;
860: i2 -= 7;
861: }
862: break;
863: default:
864: for (i=m-1; i>=0; i--) {
865: v = aa + bs2*ai[i];
866: vi = aj + ai[i];
867: nz = ai[i+1] - ai[i];
869: PetscArraycpy(w,b+i2,bs);
870: /* copy all rows of x that are needed into contiguous space */
871: workt = work;
872: for (j=0; j<nz; j++) {
873: PetscArraycpy(workt,x + bs*(*vi++),bs);
874: workt += bs;
875: }
876: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
877: PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
879: idiag -= bs2;
880: i2 -= bs;
881: }
882: break;
883: }
884: PetscLogFlops(2.0*bs2*(a->nz));
885: }
886: }
887: VecRestoreArray(xx,&x);
888: VecRestoreArrayRead(bb,&b);
889: return(0);
890: }
893: /*
894: Special version for direct calls from Fortran (Used in PETSc-fun3d)
895: */
896: #if defined(PETSC_HAVE_FORTRAN_CAPS)
897: #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
898: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
899: #define matsetvaluesblocked4_ matsetvaluesblocked4
900: #endif
902: PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
903: {
904: Mat A = *AA;
905: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
906: PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
907: PetscInt *ai =a->i,*ailen=a->ilen;
908: PetscInt *aj =a->j,stepval,lastcol = -1;
909: const PetscScalar *value = v;
910: MatScalar *ap,*aa = a->a,*bap;
911: PetscErrorCode ierr;
914: if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
915: stepval = (n-1)*4;
916: for (k=0; k<m; k++) { /* loop over added rows */
917: row = im[k];
918: rp = aj + ai[row];
919: ap = aa + 16*ai[row];
920: nrow = ailen[row];
921: low = 0;
922: high = nrow;
923: for (l=0; l<n; l++) { /* loop over added columns */
924: col = in[l];
925: if (col <= lastcol) low = 0;
926: else high = nrow;
927: lastcol = col;
928: value = v + k*(stepval+4 + l)*4;
929: while (high-low > 7) {
930: t = (low+high)/2;
931: if (rp[t] > col) high = t;
932: else low = t;
933: }
934: for (i=low; i<high; i++) {
935: if (rp[i] > col) break;
936: if (rp[i] == col) {
937: bap = ap + 16*i;
938: for (ii=0; ii<4; ii++,value+=stepval) {
939: for (jj=ii; jj<16; jj+=4) {
940: bap[jj] += *value++;
941: }
942: }
943: goto noinsert2;
944: }
945: }
946: N = nrow++ - 1;
947: high++; /* added new column index thus must search to one higher than before */
948: /* shift up all the later entries in this row */
949: for (ii=N; ii>=i; ii--) {
950: rp[ii+1] = rp[ii];
951: PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16);CHKERRV(ierr);
952: }
953: if (N >= i) {
954: PetscArrayzero(ap+16*i,16);CHKERRV(ierr);
955: }
956: rp[i] = col;
957: bap = ap + 16*i;
958: for (ii=0; ii<4; ii++,value+=stepval) {
959: for (jj=ii; jj<16; jj+=4) {
960: bap[jj] = *value++;
961: }
962: }
963: noinsert2:;
964: low = i;
965: }
966: ailen[row] = nrow;
967: }
968: PetscFunctionReturnVoid();
969: }
971: #if defined(PETSC_HAVE_FORTRAN_CAPS)
972: #define matsetvalues4_ MATSETVALUES4
973: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
974: #define matsetvalues4_ matsetvalues4
975: #endif
977: PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
978: {
979: Mat A = *AA;
980: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
981: PetscInt *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm;
982: PetscInt *ai=a->i,*ailen=a->ilen;
983: PetscInt *aj=a->j,brow,bcol;
984: PetscInt ridx,cidx,lastcol = -1;
985: MatScalar *ap,value,*aa=a->a,*bap;
989: for (k=0; k<m; k++) { /* loop over added rows */
990: row = im[k]; brow = row/4;
991: rp = aj + ai[brow];
992: ap = aa + 16*ai[brow];
993: nrow = ailen[brow];
994: low = 0;
995: high = nrow;
996: for (l=0; l<n; l++) { /* loop over added columns */
997: col = in[l]; bcol = col/4;
998: ridx = row % 4; cidx = col % 4;
999: value = v[l + k*n];
1000: if (col <= lastcol) low = 0;
1001: else high = nrow;
1002: lastcol = col;
1003: while (high-low > 7) {
1004: t = (low+high)/2;
1005: if (rp[t] > bcol) high = t;
1006: else low = t;
1007: }
1008: for (i=low; i<high; i++) {
1009: if (rp[i] > bcol) break;
1010: if (rp[i] == bcol) {
1011: bap = ap + 16*i + 4*cidx + ridx;
1012: *bap += value;
1013: goto noinsert1;
1014: }
1015: }
1016: N = nrow++ - 1;
1017: high++; /* added new column thus must search to one higher than before */
1018: /* shift up all the later entries in this row */
1019: PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRV(ierr);
1020: PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1));CHKERRV(ierr);
1021: PetscArrayzero(ap+16*i,16);CHKERRV(ierr);
1022: rp[i] = bcol;
1023: ap[16*i + 4*cidx + ridx] = value;
1024: noinsert1:;
1025: low = i;
1026: }
1027: ailen[brow] = nrow;
1028: }
1029: PetscFunctionReturnVoid();
1030: }
1032: /*
1033: Checks for missing diagonals
1034: */
1035: PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d)
1036: {
1037: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1039: PetscInt *diag,*ii = a->i,i;
1042: MatMarkDiagonal_SeqBAIJ(A);
1043: *missing = PETSC_FALSE;
1044: if (A->rmap->n > 0 && !ii) {
1045: *missing = PETSC_TRUE;
1046: if (d) *d = 0;
1047: PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");
1048: } else {
1049: PetscInt n;
1050: n = PetscMin(a->mbs, a->nbs);
1051: diag = a->diag;
1052: for (i=0; i<n; i++) {
1053: if (diag[i] >= ii[i+1]) {
1054: *missing = PETSC_TRUE;
1055: if (d) *d = i;
1056: PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);
1057: break;
1058: }
1059: }
1060: }
1061: return(0);
1062: }
1064: PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1065: {
1066: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1068: PetscInt i,j,m = a->mbs;
1071: if (!a->diag) {
1072: PetscMalloc1(m,&a->diag);
1073: PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));
1074: a->free_diag = PETSC_TRUE;
1075: }
1076: for (i=0; i<m; i++) {
1077: a->diag[i] = a->i[i+1];
1078: for (j=a->i[i]; j<a->i[i+1]; j++) {
1079: if (a->j[j] == i) {
1080: a->diag[i] = j;
1081: break;
1082: }
1083: }
1084: }
1085: return(0);
1086: }
1089: static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done)
1090: {
1091: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1093: PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1094: PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1097: *nn = n;
1098: if (!ia) return(0);
1099: if (symmetric) {
1100: MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);
1101: nz = tia[n];
1102: } else {
1103: tia = a->i; tja = a->j;
1104: }
1106: if (!blockcompressed && bs > 1) {
1107: (*nn) *= bs;
1108: /* malloc & create the natural set of indices */
1109: PetscMalloc1((n+1)*bs,ia);
1110: if (n) {
1111: (*ia)[0] = oshift;
1112: for (j=1; j<bs; j++) {
1113: (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1114: }
1115: }
1117: for (i=1; i<n; i++) {
1118: (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1119: for (j=1; j<bs; j++) {
1120: (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1121: }
1122: }
1123: if (n) {
1124: (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1125: }
1127: if (inja) {
1128: PetscMalloc1(nz*bs*bs,ja);
1129: cnt = 0;
1130: for (i=0; i<n; i++) {
1131: for (j=0; j<bs; j++) {
1132: for (k=tia[i]; k<tia[i+1]; k++) {
1133: for (l=0; l<bs; l++) {
1134: (*ja)[cnt++] = bs*tja[k] + l;
1135: }
1136: }
1137: }
1138: }
1139: }
1141: if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1142: PetscFree(tia);
1143: PetscFree(tja);
1144: }
1145: } else if (oshift == 1) {
1146: if (symmetric) {
1147: nz = tia[A->rmap->n/bs];
1148: /* add 1 to i and j indices */
1149: for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1150: *ia = tia;
1151: if (ja) {
1152: for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1153: *ja = tja;
1154: }
1155: } else {
1156: nz = a->i[A->rmap->n/bs];
1157: /* malloc space and add 1 to i and j indices */
1158: PetscMalloc1(A->rmap->n/bs+1,ia);
1159: for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1160: if (ja) {
1161: PetscMalloc1(nz,ja);
1162: for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1163: }
1164: }
1165: } else {
1166: *ia = tia;
1167: if (ja) *ja = tja;
1168: }
1169: return(0);
1170: }
1172: static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done)
1173: {
1177: if (!ia) return(0);
1178: if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1179: PetscFree(*ia);
1180: if (ja) {PetscFree(*ja);}
1181: }
1182: return(0);
1183: }
1185: PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1186: {
1187: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1191: #if defined(PETSC_USE_LOG)
1192: PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1193: #endif
1194: MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);
1195: ISDestroy(&a->row);
1196: ISDestroy(&a->col);
1197: if (a->free_diag) {PetscFree(a->diag);}
1198: PetscFree(a->idiag);
1199: if (a->free_imax_ilen) {PetscFree2(a->imax,a->ilen);}
1200: PetscFree(a->solve_work);
1201: PetscFree(a->mult_work);
1202: PetscFree(a->sor_workt);
1203: PetscFree(a->sor_work);
1204: ISDestroy(&a->icol);
1205: PetscFree(a->saved_values);
1206: PetscFree2(a->compressedrow.i,a->compressedrow.rindex);
1208: MatDestroy(&a->sbaijMat);
1209: MatDestroy(&a->parent);
1210: PetscFree(A->data);
1212: PetscObjectChangeTypeName((PetscObject)A,NULL);
1213: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJGetArray_C",NULL);
1214: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJRestoreArray_C",NULL);
1215: PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);
1216: PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);
1217: PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);
1218: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);
1219: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);
1220: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);
1221: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);
1222: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);
1223: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);
1224: PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);
1225: #if defined(PETSC_HAVE_HYPRE)
1226: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);
1227: #endif
1228: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);
1229: return(0);
1230: }
1232: PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1233: {
1234: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1238: switch (op) {
1239: case MAT_ROW_ORIENTED:
1240: a->roworiented = flg;
1241: break;
1242: case MAT_KEEP_NONZERO_PATTERN:
1243: a->keepnonzeropattern = flg;
1244: break;
1245: case MAT_NEW_NONZERO_LOCATIONS:
1246: a->nonew = (flg ? 0 : 1);
1247: break;
1248: case MAT_NEW_NONZERO_LOCATION_ERR:
1249: a->nonew = (flg ? -1 : 0);
1250: break;
1251: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1252: a->nonew = (flg ? -2 : 0);
1253: break;
1254: case MAT_UNUSED_NONZERO_LOCATION_ERR:
1255: a->nounused = (flg ? -1 : 0);
1256: break;
1257: case MAT_FORCE_DIAGONAL_ENTRIES:
1258: case MAT_IGNORE_OFF_PROC_ENTRIES:
1259: case MAT_USE_HASH_TABLE:
1260: case MAT_SORTED_FULL:
1261: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1262: break;
1263: case MAT_SPD:
1264: case MAT_SYMMETRIC:
1265: case MAT_STRUCTURALLY_SYMMETRIC:
1266: case MAT_HERMITIAN:
1267: case MAT_SYMMETRY_ETERNAL:
1268: case MAT_SUBMAT_SINGLEIS:
1269: case MAT_STRUCTURE_ONLY:
1270: /* These options are handled directly by MatSetOption() */
1271: break;
1272: default:
1273: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1274: }
1275: return(0);
1276: }
1278: /* used for both SeqBAIJ and SeqSBAIJ matrices */
1279: PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1280: {
1282: PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1283: MatScalar *aa_i;
1284: PetscScalar *v_i;
1287: bs = A->rmap->bs;
1288: bs2 = bs*bs;
1289: if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1291: bn = row/bs; /* Block number */
1292: bp = row % bs; /* Block Position */
1293: M = ai[bn+1] - ai[bn];
1294: *nz = bs*M;
1296: if (v) {
1297: *v = NULL;
1298: if (*nz) {
1299: PetscMalloc1(*nz,v);
1300: for (i=0; i<M; i++) { /* for each block in the block row */
1301: v_i = *v + i*bs;
1302: aa_i = aa + bs2*(ai[bn] + i);
1303: for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1304: }
1305: }
1306: }
1308: if (idx) {
1309: *idx = NULL;
1310: if (*nz) {
1311: PetscMalloc1(*nz,idx);
1312: for (i=0; i<M; i++) { /* for each block in the block row */
1313: idx_i = *idx + i*bs;
1314: itmp = bs*aj[ai[bn] + i];
1315: for (j=0; j<bs; j++) idx_i[j] = itmp++;
1316: }
1317: }
1318: }
1319: return(0);
1320: }
1322: PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1323: {
1324: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1328: MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);
1329: return(0);
1330: }
1332: PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1333: {
1337: if (nz) *nz = 0;
1338: if (idx) {PetscFree(*idx);}
1339: if (v) {PetscFree(*v);}
1340: return(0);
1341: }
1343: PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1344: {
1345: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data,*at;
1346: Mat C;
1348: PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill;
1349: PetscInt bs2=a->bs2,*ati,*atj,anzj,kr;
1350: MatScalar *ata,*aa=a->a;
1353: PetscCalloc1(1+nbs,&atfill);
1354: if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1355: for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
1357: MatCreate(PetscObjectComm((PetscObject)A),&C);
1358: MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);
1359: MatSetType(C,((PetscObject)A)->type_name);
1360: MatSeqBAIJSetPreallocation(C,bs,0,atfill);
1362: at = (Mat_SeqBAIJ*)C->data;
1363: ati = at->i;
1364: for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i];
1365: } else {
1366: C = *B;
1367: at = (Mat_SeqBAIJ*)C->data;
1368: ati = at->i;
1369: }
1371: atj = at->j;
1372: ata = at->a;
1374: /* Copy ati into atfill so we have locations of the next free space in atj */
1375: PetscArraycpy(atfill,ati,nbs);
1377: /* Walk through A row-wise and mark nonzero entries of A^T. */
1378: for (i=0; i<mbs; i++) {
1379: anzj = ai[i+1] - ai[i];
1380: for (j=0; j<anzj; j++) {
1381: atj[atfill[*aj]] = i;
1382: for (kr=0; kr<bs; kr++) {
1383: for (k=0; k<bs; k++) {
1384: ata[bs2*atfill[*aj]+k*bs+kr] = *aa++;
1385: }
1386: }
1387: atfill[*aj++] += 1;
1388: }
1389: }
1390: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1391: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1393: /* Clean up temporary space and complete requests. */
1394: PetscFree(atfill);
1396: if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1397: MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));
1398: *B = C;
1399: } else {
1400: MatHeaderMerge(A,&C);
1401: }
1402: return(0);
1403: }
1405: PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f)
1406: {
1408: Mat Btrans;
1411: *f = PETSC_FALSE;
1412: MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);
1413: MatEqual_SeqBAIJ(B,Btrans,f);
1414: MatDestroy(&Btrans);
1415: return(0);
1416: }
1418: /* Used for both SeqBAIJ and SeqSBAIJ matrices */
1419: PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)
1420: {
1421: Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)mat->data;
1422: PetscInt header[4],M,N,m,bs,nz,cnt,i,j,k,l;
1423: PetscInt *rowlens,*colidxs;
1424: PetscScalar *matvals;
1428: PetscViewerSetUp(viewer);
1430: M = mat->rmap->N;
1431: N = mat->cmap->N;
1432: m = mat->rmap->n;
1433: bs = mat->rmap->bs;
1434: nz = bs*bs*A->nz;
1436: /* write matrix header */
1437: header[0] = MAT_FILE_CLASSID;
1438: header[1] = M; header[2] = N; header[3] = nz;
1439: PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);
1441: /* store row lengths */
1442: PetscMalloc1(m,&rowlens);
1443: for (cnt=0, i=0; i<A->mbs; i++)
1444: for (j=0; j<bs; j++)
1445: rowlens[cnt++] = bs*(A->i[i+1] - A->i[i]);
1446: PetscViewerBinaryWrite(viewer,rowlens,m,PETSC_INT);
1447: PetscFree(rowlens);
1449: /* store column indices */
1450: PetscMalloc1(nz,&colidxs);
1451: for (cnt=0, i=0; i<A->mbs; i++)
1452: for (k=0; k<bs; k++)
1453: for (j=A->i[i]; j<A->i[i+1]; j++)
1454: for (l=0; l<bs; l++)
1455: colidxs[cnt++] = bs*A->j[j] + l;
1456: if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1457: PetscViewerBinaryWrite(viewer,colidxs,nz,PETSC_INT);
1458: PetscFree(colidxs);
1460: /* store nonzero values */
1461: PetscMalloc1(nz,&matvals);
1462: for (cnt=0, i=0; i<A->mbs; i++)
1463: for (k=0; k<bs; k++)
1464: for (j=A->i[i]; j<A->i[i+1]; j++)
1465: for (l=0; l<bs; l++)
1466: matvals[cnt++] = A->a[bs*(bs*j + l) + k];
1467: if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1468: PetscViewerBinaryWrite(viewer,matvals,nz,PETSC_SCALAR);
1469: PetscFree(matvals);
1471: /* write block size option to the viewer's .info file */
1472: MatView_Binary_BlockSizes(mat,viewer);
1473: return(0);
1474: }
1476: static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)
1477: {
1479: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1480: PetscInt i,bs = A->rmap->bs,k;
1483: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1484: for (i=0; i<a->mbs; i++) {
1485: PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);
1486: for (k=a->i[i]; k<a->i[i+1]; k++) {
1487: PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);
1488: }
1489: PetscViewerASCIIPrintf(viewer,"\n");
1490: }
1491: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1492: return(0);
1493: }
1495: static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1496: {
1497: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1498: PetscErrorCode ierr;
1499: PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1500: PetscViewerFormat format;
1503: if (A->structure_only) {
1504: MatView_SeqBAIJ_ASCII_structonly(A,viewer);
1505: return(0);
1506: }
1508: PetscViewerGetFormat(viewer,&format);
1509: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1510: PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);
1511: } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1512: const char *matname;
1513: Mat aij;
1514: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);
1515: PetscObjectGetName((PetscObject)A,&matname);
1516: PetscObjectSetName((PetscObject)aij,matname);
1517: MatView(aij,viewer);
1518: MatDestroy(&aij);
1519: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1520: return(0);
1521: } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1522: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1523: for (i=0; i<a->mbs; i++) {
1524: for (j=0; j<bs; j++) {
1525: PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1526: for (k=a->i[i]; k<a->i[i+1]; k++) {
1527: for (l=0; l<bs; l++) {
1528: #if defined(PETSC_USE_COMPLEX)
1529: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1530: PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1531: (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1532: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1533: PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1534: (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1535: } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1536: PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));
1537: }
1538: #else
1539: if (a->a[bs2*k + l*bs + j] != 0.0) {
1540: PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);
1541: }
1542: #endif
1543: }
1544: }
1545: PetscViewerASCIIPrintf(viewer,"\n");
1546: }
1547: }
1548: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1549: } else {
1550: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1551: for (i=0; i<a->mbs; i++) {
1552: for (j=0; j<bs; j++) {
1553: PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1554: for (k=a->i[i]; k<a->i[i+1]; k++) {
1555: for (l=0; l<bs; l++) {
1556: #if defined(PETSC_USE_COMPLEX)
1557: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1558: PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1559: (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1560: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1561: PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1562: (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1563: } else {
1564: PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));
1565: }
1566: #else
1567: PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);
1568: #endif
1569: }
1570: }
1571: PetscViewerASCIIPrintf(viewer,"\n");
1572: }
1573: }
1574: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1575: }
1576: PetscViewerFlush(viewer);
1577: return(0);
1578: }
1580: #include <petscdraw.h>
1581: static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1582: {
1583: Mat A = (Mat) Aa;
1584: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
1585: PetscErrorCode ierr;
1586: PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1587: PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1588: MatScalar *aa;
1589: PetscViewer viewer;
1590: PetscViewerFormat format;
1593: PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);
1594: PetscViewerGetFormat(viewer,&format);
1595: PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);
1597: /* loop over matrix elements drawing boxes */
1599: if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1600: PetscDrawCollectiveBegin(draw);
1601: /* Blue for negative, Cyan for zero and Red for positive */
1602: color = PETSC_DRAW_BLUE;
1603: for (i=0,row=0; i<mbs; i++,row+=bs) {
1604: for (j=a->i[i]; j<a->i[i+1]; j++) {
1605: y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1606: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1607: aa = a->a + j*bs2;
1608: for (k=0; k<bs; k++) {
1609: for (l=0; l<bs; l++) {
1610: if (PetscRealPart(*aa++) >= 0.) continue;
1611: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1612: }
1613: }
1614: }
1615: }
1616: color = PETSC_DRAW_CYAN;
1617: for (i=0,row=0; i<mbs; i++,row+=bs) {
1618: for (j=a->i[i]; j<a->i[i+1]; j++) {
1619: y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1620: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1621: aa = a->a + j*bs2;
1622: for (k=0; k<bs; k++) {
1623: for (l=0; l<bs; l++) {
1624: if (PetscRealPart(*aa++) != 0.) continue;
1625: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1626: }
1627: }
1628: }
1629: }
1630: color = PETSC_DRAW_RED;
1631: for (i=0,row=0; i<mbs; i++,row+=bs) {
1632: for (j=a->i[i]; j<a->i[i+1]; j++) {
1633: y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1634: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1635: aa = a->a + j*bs2;
1636: for (k=0; k<bs; k++) {
1637: for (l=0; l<bs; l++) {
1638: if (PetscRealPart(*aa++) <= 0.) continue;
1639: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1640: }
1641: }
1642: }
1643: }
1644: PetscDrawCollectiveEnd(draw);
1645: } else {
1646: /* use contour shading to indicate magnitude of values */
1647: /* first determine max of all nonzero values */
1648: PetscReal minv = 0.0, maxv = 0.0;
1649: PetscDraw popup;
1651: for (i=0; i<a->nz*a->bs2; i++) {
1652: if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1653: }
1654: if (minv >= maxv) maxv = minv + PETSC_SMALL;
1655: PetscDrawGetPopup(draw,&popup);
1656: PetscDrawScalePopup(popup,0.0,maxv);
1658: PetscDrawCollectiveBegin(draw);
1659: for (i=0,row=0; i<mbs; i++,row+=bs) {
1660: for (j=a->i[i]; j<a->i[i+1]; j++) {
1661: y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1662: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1663: aa = a->a + j*bs2;
1664: for (k=0; k<bs; k++) {
1665: for (l=0; l<bs; l++) {
1666: MatScalar v = *aa++;
1667: color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1668: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1669: }
1670: }
1671: }
1672: }
1673: PetscDrawCollectiveEnd(draw);
1674: }
1675: return(0);
1676: }
1678: static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1679: {
1681: PetscReal xl,yl,xr,yr,w,h;
1682: PetscDraw draw;
1683: PetscBool isnull;
1686: PetscViewerDrawGetDraw(viewer,0,&draw);
1687: PetscDrawIsNull(draw,&isnull);
1688: if (isnull) return(0);
1690: xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1691: xr += w; yr += h; xl = -w; yl = -h;
1692: PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
1693: PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
1694: PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
1695: PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);
1696: PetscDrawSave(draw);
1697: return(0);
1698: }
1700: PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1701: {
1703: PetscBool iascii,isbinary,isdraw;
1706: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1707: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1708: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1709: if (iascii) {
1710: MatView_SeqBAIJ_ASCII(A,viewer);
1711: } else if (isbinary) {
1712: MatView_SeqBAIJ_Binary(A,viewer);
1713: } else if (isdraw) {
1714: MatView_SeqBAIJ_Draw(A,viewer);
1715: } else {
1716: Mat B;
1717: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
1718: MatView(B,viewer);
1719: MatDestroy(&B);
1720: }
1721: return(0);
1722: }
1725: PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1726: {
1727: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1728: PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1729: PetscInt *ai = a->i,*ailen = a->ilen;
1730: PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1731: MatScalar *ap,*aa = a->a;
1734: for (k=0; k<m; k++) { /* loop over rows */
1735: row = im[k]; brow = row/bs;
1736: if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1737: if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1738: rp = aj + ai[brow]; ap = aa + bs2*ai[brow];
1739: nrow = ailen[brow];
1740: for (l=0; l<n; l++) { /* loop over columns */
1741: if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1742: if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1743: col = in[l];
1744: bcol = col/bs;
1745: cidx = col%bs;
1746: ridx = row%bs;
1747: high = nrow;
1748: low = 0; /* assume unsorted */
1749: while (high-low > 5) {
1750: t = (low+high)/2;
1751: if (rp[t] > bcol) high = t;
1752: else low = t;
1753: }
1754: for (i=low; i<high; i++) {
1755: if (rp[i] > bcol) break;
1756: if (rp[i] == bcol) {
1757: *v++ = ap[bs2*i+bs*cidx+ridx];
1758: goto finished;
1759: }
1760: }
1761: *v++ = 0.0;
1762: finished:;
1763: }
1764: }
1765: return(0);
1766: }
1768: PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1769: {
1770: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1771: PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1772: PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1773: PetscErrorCode ierr;
1774: PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1775: PetscBool roworiented=a->roworiented;
1776: const PetscScalar *value = v;
1777: MatScalar *ap=NULL,*aa = a->a,*bap;
1780: if (roworiented) {
1781: stepval = (n-1)*bs;
1782: } else {
1783: stepval = (m-1)*bs;
1784: }
1785: for (k=0; k<m; k++) { /* loop over added rows */
1786: row = im[k];
1787: if (row < 0) continue;
1788: if (PetscUnlikelyDebug(row >= a->mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1789: rp = aj + ai[row];
1790: if (!A->structure_only) ap = aa + bs2*ai[row];
1791: rmax = imax[row];
1792: nrow = ailen[row];
1793: low = 0;
1794: high = nrow;
1795: for (l=0; l<n; l++) { /* loop over added columns */
1796: if (in[l] < 0) continue;
1797: if (PetscUnlikelyDebug(in[l] >= a->nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1798: col = in[l];
1799: if (!A->structure_only) {
1800: if (roworiented) {
1801: value = v + (k*(stepval+bs) + l)*bs;
1802: } else {
1803: value = v + (l*(stepval+bs) + k)*bs;
1804: }
1805: }
1806: if (col <= lastcol) low = 0;
1807: else high = nrow;
1808: lastcol = col;
1809: while (high-low > 7) {
1810: t = (low+high)/2;
1811: if (rp[t] > col) high = t;
1812: else low = t;
1813: }
1814: for (i=low; i<high; i++) {
1815: if (rp[i] > col) break;
1816: if (rp[i] == col) {
1817: if (A->structure_only) goto noinsert2;
1818: bap = ap + bs2*i;
1819: if (roworiented) {
1820: if (is == ADD_VALUES) {
1821: for (ii=0; ii<bs; ii++,value+=stepval) {
1822: for (jj=ii; jj<bs2; jj+=bs) {
1823: bap[jj] += *value++;
1824: }
1825: }
1826: } else {
1827: for (ii=0; ii<bs; ii++,value+=stepval) {
1828: for (jj=ii; jj<bs2; jj+=bs) {
1829: bap[jj] = *value++;
1830: }
1831: }
1832: }
1833: } else {
1834: if (is == ADD_VALUES) {
1835: for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1836: for (jj=0; jj<bs; jj++) {
1837: bap[jj] += value[jj];
1838: }
1839: bap += bs;
1840: }
1841: } else {
1842: for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1843: for (jj=0; jj<bs; jj++) {
1844: bap[jj] = value[jj];
1845: }
1846: bap += bs;
1847: }
1848: }
1849: }
1850: goto noinsert2;
1851: }
1852: }
1853: if (nonew == 1) goto noinsert2;
1854: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1855: if (A->structure_only) {
1856: MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar);
1857: } else {
1858: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1859: }
1860: N = nrow++ - 1; high++;
1861: /* shift up all the later entries in this row */
1862: PetscArraymove(rp+i+1,rp+i,N-i+1);
1863: rp[i] = col;
1864: if (!A->structure_only) {
1865: PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));
1866: bap = ap + bs2*i;
1867: if (roworiented) {
1868: for (ii=0; ii<bs; ii++,value+=stepval) {
1869: for (jj=ii; jj<bs2; jj+=bs) {
1870: bap[jj] = *value++;
1871: }
1872: }
1873: } else {
1874: for (ii=0; ii<bs; ii++,value+=stepval) {
1875: for (jj=0; jj<bs; jj++) {
1876: *bap++ = *value++;
1877: }
1878: }
1879: }
1880: }
1881: noinsert2:;
1882: low = i;
1883: }
1884: ailen[row] = nrow;
1885: }
1886: return(0);
1887: }
1889: PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1890: {
1891: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1892: PetscInt fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax;
1893: PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen;
1895: PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1896: MatScalar *aa = a->a,*ap;
1897: PetscReal ratio=0.6;
1900: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
1902: if (m) rmax = ailen[0];
1903: for (i=1; i<mbs; i++) {
1904: /* move each row back by the amount of empty slots (fshift) before it*/
1905: fshift += imax[i-1] - ailen[i-1];
1906: rmax = PetscMax(rmax,ailen[i]);
1907: if (fshift) {
1908: ip = aj + ai[i];
1909: ap = aa + bs2*ai[i];
1910: N = ailen[i];
1911: PetscArraymove(ip-fshift,ip,N);
1912: if (!A->structure_only) {
1913: PetscArraymove(ap-bs2*fshift,ap,bs2*N);
1914: }
1915: }
1916: ai[i] = ai[i-1] + ailen[i-1];
1917: }
1918: if (mbs) {
1919: fshift += imax[mbs-1] - ailen[mbs-1];
1920: ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1921: }
1923: /* reset ilen and imax for each row */
1924: a->nonzerorowcnt = 0;
1925: if (A->structure_only) {
1926: PetscFree2(a->imax,a->ilen);
1927: } else { /* !A->structure_only */
1928: for (i=0; i<mbs; i++) {
1929: ailen[i] = imax[i] = ai[i+1] - ai[i];
1930: a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1931: }
1932: }
1933: a->nz = ai[mbs];
1935: /* diagonals may have moved, so kill the diagonal pointers */
1936: a->idiagvalid = PETSC_FALSE;
1937: if (fshift && a->diag) {
1938: PetscFree(a->diag);
1939: PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));
1940: a->diag = NULL;
1941: }
1942: if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1943: PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);
1944: PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);
1945: PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);
1947: A->info.mallocs += a->reallocs;
1948: a->reallocs = 0;
1949: A->info.nz_unneeded = (PetscReal)fshift*bs2;
1950: a->rmax = rmax;
1952: if (!A->structure_only) {
1953: MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);
1954: }
1955: return(0);
1956: }
1958: /*
1959: This function returns an array of flags which indicate the locations of contiguous
1960: blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9]
1961: then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1962: Assume: sizes should be long enough to hold all the values.
1963: */
1964: static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1965: {
1966: PetscInt i,j,k,row;
1967: PetscBool flg;
1970: for (i=0,j=0; i<n; j++) {
1971: row = idx[i];
1972: if (row%bs!=0) { /* Not the begining of a block */
1973: sizes[j] = 1;
1974: i++;
1975: } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1976: sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */
1977: i++;
1978: } else { /* Begining of the block, so check if the complete block exists */
1979: flg = PETSC_TRUE;
1980: for (k=1; k<bs; k++) {
1981: if (row+k != idx[i+k]) { /* break in the block */
1982: flg = PETSC_FALSE;
1983: break;
1984: }
1985: }
1986: if (flg) { /* No break in the bs */
1987: sizes[j] = bs;
1988: i += bs;
1989: } else {
1990: sizes[j] = 1;
1991: i++;
1992: }
1993: }
1994: }
1995: *bs_max = j;
1996: return(0);
1997: }
1999: PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2000: {
2001: Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
2002: PetscErrorCode ierr;
2003: PetscInt i,j,k,count,*rows;
2004: PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2005: PetscScalar zero = 0.0;
2006: MatScalar *aa;
2007: const PetscScalar *xx;
2008: PetscScalar *bb;
2011: /* fix right hand side if needed */
2012: if (x && b) {
2013: VecGetArrayRead(x,&xx);
2014: VecGetArray(b,&bb);
2015: for (i=0; i<is_n; i++) {
2016: bb[is_idx[i]] = diag*xx[is_idx[i]];
2017: }
2018: VecRestoreArrayRead(x,&xx);
2019: VecRestoreArray(b,&bb);
2020: }
2022: /* Make a copy of the IS and sort it */
2023: /* allocate memory for rows,sizes */
2024: PetscMalloc2(is_n,&rows,2*is_n,&sizes);
2026: /* copy IS values to rows, and sort them */
2027: for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2028: PetscSortInt(is_n,rows);
2030: if (baij->keepnonzeropattern) {
2031: for (i=0; i<is_n; i++) sizes[i] = 1;
2032: bs_max = is_n;
2033: } else {
2034: MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);
2035: A->nonzerostate++;
2036: }
2038: for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2039: row = rows[j];
2040: if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2041: count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2042: aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2043: if (sizes[i] == bs && !baij->keepnonzeropattern) {
2044: if (diag != (PetscScalar)0.0) {
2045: if (baij->ilen[row/bs] > 0) {
2046: baij->ilen[row/bs] = 1;
2047: baij->j[baij->i[row/bs]] = row/bs;
2049: PetscArrayzero(aa,count*bs);
2050: }
2051: /* Now insert all the diagonal values for this bs */
2052: for (k=0; k<bs; k++) {
2053: (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);
2054: }
2055: } else { /* (diag == 0.0) */
2056: baij->ilen[row/bs] = 0;
2057: } /* end (diag == 0.0) */
2058: } else { /* (sizes[i] != bs) */
2059: if (PetscUnlikelyDebug(sizes[i] != 1)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2060: for (k=0; k<count; k++) {
2061: aa[0] = zero;
2062: aa += bs;
2063: }
2064: if (diag != (PetscScalar)0.0) {
2065: (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);
2066: }
2067: }
2068: }
2070: PetscFree2(rows,sizes);
2071: MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
2072: return(0);
2073: }
2075: PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2076: {
2077: Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
2078: PetscErrorCode ierr;
2079: PetscInt i,j,k,count;
2080: PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col;
2081: PetscScalar zero = 0.0;
2082: MatScalar *aa;
2083: const PetscScalar *xx;
2084: PetscScalar *bb;
2085: PetscBool *zeroed,vecs = PETSC_FALSE;
2088: /* fix right hand side if needed */
2089: if (x && b) {
2090: VecGetArrayRead(x,&xx);
2091: VecGetArray(b,&bb);
2092: vecs = PETSC_TRUE;
2093: }
2095: /* zero the columns */
2096: PetscCalloc1(A->rmap->n,&zeroed);
2097: for (i=0; i<is_n; i++) {
2098: if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2099: zeroed[is_idx[i]] = PETSC_TRUE;
2100: }
2101: for (i=0; i<A->rmap->N; i++) {
2102: if (!zeroed[i]) {
2103: row = i/bs;
2104: for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2105: for (k=0; k<bs; k++) {
2106: col = bs*baij->j[j] + k;
2107: if (zeroed[col]) {
2108: aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2109: if (vecs) bb[i] -= aa[0]*xx[col];
2110: aa[0] = 0.0;
2111: }
2112: }
2113: }
2114: } else if (vecs) bb[i] = diag*xx[i];
2115: }
2116: PetscFree(zeroed);
2117: if (vecs) {
2118: VecRestoreArrayRead(x,&xx);
2119: VecRestoreArray(b,&bb);
2120: }
2122: /* zero the rows */
2123: for (i=0; i<is_n; i++) {
2124: row = is_idx[i];
2125: count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2126: aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2127: for (k=0; k<count; k++) {
2128: aa[0] = zero;
2129: aa += bs;
2130: }
2131: if (diag != (PetscScalar)0.0) {
2132: (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);
2133: }
2134: }
2135: MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
2136: return(0);
2137: }
2139: PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2140: {
2141: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2142: PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2143: PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2144: PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2146: PetscInt ridx,cidx,bs2=a->bs2;
2147: PetscBool roworiented=a->roworiented;
2148: MatScalar *ap=NULL,value=0.0,*aa=a->a,*bap;
2151: for (k=0; k<m; k++) { /* loop over added rows */
2152: row = im[k];
2153: brow = row/bs;
2154: if (row < 0) continue;
2155: if (PetscUnlikelyDebug(row >= A->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2156: rp = aj + ai[brow];
2157: if (!A->structure_only) ap = aa + bs2*ai[brow];
2158: rmax = imax[brow];
2159: nrow = ailen[brow];
2160: low = 0;
2161: high = nrow;
2162: for (l=0; l<n; l++) { /* loop over added columns */
2163: if (in[l] < 0) continue;
2164: if (PetscUnlikelyDebug(in[l] >= A->cmap->n)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2165: col = in[l]; bcol = col/bs;
2166: ridx = row % bs; cidx = col % bs;
2167: if (!A->structure_only) {
2168: if (roworiented) {
2169: value = v[l + k*n];
2170: } else {
2171: value = v[k + l*m];
2172: }
2173: }
2174: if (col <= lastcol) low = 0; else high = nrow;
2175: lastcol = col;
2176: while (high-low > 7) {
2177: t = (low+high)/2;
2178: if (rp[t] > bcol) high = t;
2179: else low = t;
2180: }
2181: for (i=low; i<high; i++) {
2182: if (rp[i] > bcol) break;
2183: if (rp[i] == bcol) {
2184: bap = ap + bs2*i + bs*cidx + ridx;
2185: if (!A->structure_only) {
2186: if (is == ADD_VALUES) *bap += value;
2187: else *bap = value;
2188: }
2189: goto noinsert1;
2190: }
2191: }
2192: if (nonew == 1) goto noinsert1;
2193: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2194: if (A->structure_only) {
2195: MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar);
2196: } else {
2197: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2198: }
2199: N = nrow++ - 1; high++;
2200: /* shift up all the later entries in this row */
2201: PetscArraymove(rp+i+1,rp+i,N-i+1);
2202: rp[i] = bcol;
2203: if (!A->structure_only) {
2204: PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));
2205: PetscArrayzero(ap+bs2*i,bs2);
2206: ap[bs2*i + bs*cidx + ridx] = value;
2207: }
2208: a->nz++;
2209: A->nonzerostate++;
2210: noinsert1:;
2211: low = i;
2212: }
2213: ailen[brow] = nrow;
2214: }
2215: return(0);
2216: }
2218: PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2219: {
2220: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
2221: Mat outA;
2223: PetscBool row_identity,col_identity;
2226: if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2227: ISIdentity(row,&row_identity);
2228: ISIdentity(col,&col_identity);
2229: if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2231: outA = inA;
2232: inA->factortype = MAT_FACTOR_LU;
2233: PetscFree(inA->solvertype);
2234: PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);
2236: MatMarkDiagonal_SeqBAIJ(inA);
2238: PetscObjectReference((PetscObject)row);
2239: ISDestroy(&a->row);
2240: a->row = row;
2241: PetscObjectReference((PetscObject)col);
2242: ISDestroy(&a->col);
2243: a->col = col;
2245: /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2246: ISDestroy(&a->icol);
2247: ISInvertPermutation(col,PETSC_DECIDE,&a->icol);
2248: PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);
2250: MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));
2251: if (!a->solve_work) {
2252: PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);
2253: PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));
2254: }
2255: MatLUFactorNumeric(outA,inA,info);
2256: return(0);
2257: }
2259: PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2260: {
2261: Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2262: PetscInt i,nz,mbs;
2265: nz = baij->maxnz;
2266: mbs = baij->mbs;
2267: for (i=0; i<nz; i++) {
2268: baij->j[i] = indices[i];
2269: }
2270: baij->nz = nz;
2271: for (i=0; i<mbs; i++) {
2272: baij->ilen[i] = baij->imax[i];
2273: }
2274: return(0);
2275: }
2277: /*@
2278: MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2279: in the matrix.
2281: Input Parameters:
2282: + mat - the SeqBAIJ matrix
2283: - indices - the column indices
2285: Level: advanced
2287: Notes:
2288: This can be called if you have precomputed the nonzero structure of the
2289: matrix and want to provide it to the matrix object to improve the performance
2290: of the MatSetValues() operation.
2292: You MUST have set the correct numbers of nonzeros per row in the call to
2293: MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2295: MUST be called before any calls to MatSetValues();
2297: @*/
2298: PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2299: {
2305: PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));
2306: return(0);
2307: }
2309: PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2310: {
2311: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2313: PetscInt i,j,n,row,bs,*ai,*aj,mbs;
2314: PetscReal atmp;
2315: PetscScalar *x,zero = 0.0;
2316: MatScalar *aa;
2317: PetscInt ncols,brow,krow,kcol;
2320: if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2321: bs = A->rmap->bs;
2322: aa = a->a;
2323: ai = a->i;
2324: aj = a->j;
2325: mbs = a->mbs;
2327: VecSet(v,zero);
2328: VecGetArray(v,&x);
2329: VecGetLocalSize(v,&n);
2330: if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2331: for (i=0; i<mbs; i++) {
2332: ncols = ai[1] - ai[0]; ai++;
2333: brow = bs*i;
2334: for (j=0; j<ncols; j++) {
2335: for (kcol=0; kcol<bs; kcol++) {
2336: for (krow=0; krow<bs; krow++) {
2337: atmp = PetscAbsScalar(*aa);aa++;
2338: row = brow + krow; /* row index */
2339: if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2340: }
2341: }
2342: aj++;
2343: }
2344: }
2345: VecRestoreArray(v,&x);
2346: return(0);
2347: }
2349: PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2350: {
2354: /* If the two matrices have the same copy implementation, use fast copy. */
2355: if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2356: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2357: Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2358: PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2360: if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2361: if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2362: PetscArraycpy(b->a,a->a,bs2*a->i[ambs]);
2363: PetscObjectStateIncrease((PetscObject)B);
2364: } else {
2365: MatCopy_Basic(A,B,str);
2366: }
2367: return(0);
2368: }
2370: PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2371: {
2375: MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL);
2376: return(0);
2377: }
2379: static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2380: {
2381: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2384: *array = a->a;
2385: return(0);
2386: }
2388: static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2389: {
2391: *array = NULL;
2392: return(0);
2393: }
2395: PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2396: {
2397: PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2398: Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data;
2399: Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data;
2403: /* Set the number of nonzeros in the new matrix */
2404: MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);
2405: return(0);
2406: }
2408: PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2409: {
2410: Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2412: PetscInt bs=Y->rmap->bs,bs2=bs*bs;
2413: PetscBLASInt one=1;
2416: if (str == SAME_NONZERO_PATTERN) {
2417: PetscScalar alpha = a;
2418: PetscBLASInt bnz;
2419: PetscBLASIntCast(x->nz*bs2,&bnz);
2420: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2421: PetscObjectStateIncrease((PetscObject)Y);
2422: } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2423: MatAXPY_Basic(Y,a,X,str);
2424: } else {
2425: Mat B;
2426: PetscInt *nnz;
2427: if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2428: PetscMalloc1(Y->rmap->N,&nnz);
2429: MatCreate(PetscObjectComm((PetscObject)Y),&B);
2430: PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);
2431: MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);
2432: MatSetBlockSizesFromMats(B,Y,Y);
2433: MatSetType(B,(MatType) ((PetscObject)Y)->type_name);
2434: MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);
2435: MatSeqBAIJSetPreallocation(B,bs,0,nnz);
2436: MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
2437: MatHeaderReplace(Y,&B);
2438: PetscFree(nnz);
2439: }
2440: return(0);
2441: }
2443: PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A)
2444: {
2445: #if defined(PETSC_USE_COMPLEX)
2446: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2447: PetscInt i,nz = a->bs2*a->i[a->mbs];
2448: MatScalar *aa = a->a;
2451: for (i=0; i<nz; i++) aa[i] = PetscConj(aa[i]);
2452: #else
2454: #endif
2455: return(0);
2456: }
2458: PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2459: {
2460: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2461: PetscInt i,nz = a->bs2*a->i[a->mbs];
2462: MatScalar *aa = a->a;
2465: for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2466: return(0);
2467: }
2469: PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2470: {
2471: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2472: PetscInt i,nz = a->bs2*a->i[a->mbs];
2473: MatScalar *aa = a->a;
2476: for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2477: return(0);
2478: }
2480: /*
2481: Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code
2482: */
2483: PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done)
2484: {
2485: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2487: PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2488: PetscInt nz = a->i[m],row,*jj,mr,col;
2491: *nn = n;
2492: if (!ia) return(0);
2493: if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2494: else {
2495: PetscCalloc1(n,&collengths);
2496: PetscMalloc1(n+1,&cia);
2497: PetscMalloc1(nz,&cja);
2498: jj = a->j;
2499: for (i=0; i<nz; i++) {
2500: collengths[jj[i]]++;
2501: }
2502: cia[0] = oshift;
2503: for (i=0; i<n; i++) {
2504: cia[i+1] = cia[i] + collengths[i];
2505: }
2506: PetscArrayzero(collengths,n);
2507: jj = a->j;
2508: for (row=0; row<m; row++) {
2509: mr = a->i[row+1] - a->i[row];
2510: for (i=0; i<mr; i++) {
2511: col = *jj++;
2513: cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2514: }
2515: }
2516: PetscFree(collengths);
2517: *ia = cia; *ja = cja;
2518: }
2519: return(0);
2520: }
2522: PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done)
2523: {
2527: if (!ia) return(0);
2528: PetscFree(*ia);
2529: PetscFree(*ja);
2530: return(0);
2531: }
2533: /*
2534: MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2535: MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2536: spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2537: */
2538: PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done)
2539: {
2540: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2542: PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2543: PetscInt nz = a->i[m],row,*jj,mr,col;
2544: PetscInt *cspidx;
2547: *nn = n;
2548: if (!ia) return(0);
2550: PetscCalloc1(n,&collengths);
2551: PetscMalloc1(n+1,&cia);
2552: PetscMalloc1(nz,&cja);
2553: PetscMalloc1(nz,&cspidx);
2554: jj = a->j;
2555: for (i=0; i<nz; i++) {
2556: collengths[jj[i]]++;
2557: }
2558: cia[0] = oshift;
2559: for (i=0; i<n; i++) {
2560: cia[i+1] = cia[i] + collengths[i];
2561: }
2562: PetscArrayzero(collengths,n);
2563: jj = a->j;
2564: for (row=0; row<m; row++) {
2565: mr = a->i[row+1] - a->i[row];
2566: for (i=0; i<mr; i++) {
2567: col = *jj++;
2568: cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2569: cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2570: }
2571: }
2572: PetscFree(collengths);
2573: *ia = cia;
2574: *ja = cja;
2575: *spidx = cspidx;
2576: return(0);
2577: }
2579: PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done)
2580: {
2584: MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);
2585: PetscFree(*spidx);
2586: return(0);
2587: }
2589: PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2590: {
2592: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data;
2595: if (!Y->preallocated || !aij->nz) {
2596: MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);
2597: }
2598: MatShift_Basic(Y,a);
2599: return(0);
2600: }
2602: /* -------------------------------------------------------------------*/
2603: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2604: MatGetRow_SeqBAIJ,
2605: MatRestoreRow_SeqBAIJ,
2606: MatMult_SeqBAIJ_N,
2607: /* 4*/ MatMultAdd_SeqBAIJ_N,
2608: MatMultTranspose_SeqBAIJ,
2609: MatMultTransposeAdd_SeqBAIJ,
2610: NULL,
2611: NULL,
2612: NULL,
2613: /* 10*/ NULL,
2614: MatLUFactor_SeqBAIJ,
2615: NULL,
2616: NULL,
2617: MatTranspose_SeqBAIJ,
2618: /* 15*/ MatGetInfo_SeqBAIJ,
2619: MatEqual_SeqBAIJ,
2620: MatGetDiagonal_SeqBAIJ,
2621: MatDiagonalScale_SeqBAIJ,
2622: MatNorm_SeqBAIJ,
2623: /* 20*/ NULL,
2624: MatAssemblyEnd_SeqBAIJ,
2625: MatSetOption_SeqBAIJ,
2626: MatZeroEntries_SeqBAIJ,
2627: /* 24*/ MatZeroRows_SeqBAIJ,
2628: NULL,
2629: NULL,
2630: NULL,
2631: NULL,
2632: /* 29*/ MatSetUp_SeqBAIJ,
2633: NULL,
2634: NULL,
2635: NULL,
2636: NULL,
2637: /* 34*/ MatDuplicate_SeqBAIJ,
2638: NULL,
2639: NULL,
2640: MatILUFactor_SeqBAIJ,
2641: NULL,
2642: /* 39*/ MatAXPY_SeqBAIJ,
2643: MatCreateSubMatrices_SeqBAIJ,
2644: MatIncreaseOverlap_SeqBAIJ,
2645: MatGetValues_SeqBAIJ,
2646: MatCopy_SeqBAIJ,
2647: /* 44*/ NULL,
2648: MatScale_SeqBAIJ,
2649: MatShift_SeqBAIJ,
2650: NULL,
2651: MatZeroRowsColumns_SeqBAIJ,
2652: /* 49*/ NULL,
2653: MatGetRowIJ_SeqBAIJ,
2654: MatRestoreRowIJ_SeqBAIJ,
2655: MatGetColumnIJ_SeqBAIJ,
2656: MatRestoreColumnIJ_SeqBAIJ,
2657: /* 54*/ MatFDColoringCreate_SeqXAIJ,
2658: NULL,
2659: NULL,
2660: NULL,
2661: MatSetValuesBlocked_SeqBAIJ,
2662: /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2663: MatDestroy_SeqBAIJ,
2664: MatView_SeqBAIJ,
2665: NULL,
2666: NULL,
2667: /* 64*/ NULL,
2668: NULL,
2669: NULL,
2670: NULL,
2671: NULL,
2672: /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2673: NULL,
2674: MatConvert_Basic,
2675: NULL,
2676: NULL,
2677: /* 74*/ NULL,
2678: MatFDColoringApply_BAIJ,
2679: NULL,
2680: NULL,
2681: NULL,
2682: /* 79*/ NULL,
2683: NULL,
2684: NULL,
2685: NULL,
2686: MatLoad_SeqBAIJ,
2687: /* 84*/ NULL,
2688: NULL,
2689: NULL,
2690: NULL,
2691: NULL,
2692: /* 89*/ NULL,
2693: NULL,
2694: NULL,
2695: NULL,
2696: NULL,
2697: /* 94*/ NULL,
2698: NULL,
2699: NULL,
2700: NULL,
2701: NULL,
2702: /* 99*/ NULL,
2703: NULL,
2704: NULL,
2705: MatConjugate_SeqBAIJ,
2706: NULL,
2707: /*104*/ NULL,
2708: MatRealPart_SeqBAIJ,
2709: MatImaginaryPart_SeqBAIJ,
2710: NULL,
2711: NULL,
2712: /*109*/ NULL,
2713: NULL,
2714: NULL,
2715: NULL,
2716: MatMissingDiagonal_SeqBAIJ,
2717: /*114*/ NULL,
2718: NULL,
2719: NULL,
2720: NULL,
2721: NULL,
2722: /*119*/ NULL,
2723: NULL,
2724: MatMultHermitianTranspose_SeqBAIJ,
2725: MatMultHermitianTransposeAdd_SeqBAIJ,
2726: NULL,
2727: /*124*/ NULL,
2728: NULL,
2729: MatInvertBlockDiagonal_SeqBAIJ,
2730: NULL,
2731: NULL,
2732: /*129*/ NULL,
2733: NULL,
2734: NULL,
2735: NULL,
2736: NULL,
2737: /*134*/ NULL,
2738: NULL,
2739: NULL,
2740: NULL,
2741: NULL,
2742: /*139*/ MatSetBlockSizes_Default,
2743: NULL,
2744: NULL,
2745: MatFDColoringSetUp_SeqXAIJ,
2746: NULL,
2747: /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
2748: MatDestroySubMatrices_SeqBAIJ
2749: };
2751: PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
2752: {
2753: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data;
2754: PetscInt nz = aij->i[aij->mbs]*aij->bs2;
2758: if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2760: /* allocate space for values if not already there */
2761: if (!aij->saved_values) {
2762: PetscMalloc1(nz+1,&aij->saved_values);
2763: PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));
2764: }
2766: /* copy values over */
2767: PetscArraycpy(aij->saved_values,aij->a,nz);
2768: return(0);
2769: }
2771: PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
2772: {
2773: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data;
2775: PetscInt nz = aij->i[aij->mbs]*aij->bs2;
2778: if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2779: if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2781: /* copy values over */
2782: PetscArraycpy(aij->a,aij->saved_values,nz);
2783: return(0);
2784: }
2786: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2787: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2789: PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2790: {
2791: Mat_SeqBAIJ *b;
2793: PetscInt i,mbs,nbs,bs2;
2794: PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2797: if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2798: if (nz == MAT_SKIP_ALLOCATION) {
2799: skipallocation = PETSC_TRUE;
2800: nz = 0;
2801: }
2803: MatSetBlockSize(B,PetscAbs(bs));
2804: PetscLayoutSetUp(B->rmap);
2805: PetscLayoutSetUp(B->cmap);
2806: PetscLayoutGetBlockSize(B->rmap,&bs);
2808: B->preallocated = PETSC_TRUE;
2810: mbs = B->rmap->n/bs;
2811: nbs = B->cmap->n/bs;
2812: bs2 = bs*bs;
2814: if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2816: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2817: if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2818: if (nnz) {
2819: for (i=0; i<mbs; i++) {
2820: if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2821: if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2822: }
2823: }
2825: b = (Mat_SeqBAIJ*)B->data;
2826: PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");
2827: PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);
2828: PetscOptionsEnd();
2830: if (!flg) {
2831: switch (bs) {
2832: case 1:
2833: B->ops->mult = MatMult_SeqBAIJ_1;
2834: B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2835: break;
2836: case 2:
2837: B->ops->mult = MatMult_SeqBAIJ_2;
2838: B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2839: break;
2840: case 3:
2841: B->ops->mult = MatMult_SeqBAIJ_3;
2842: B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2843: break;
2844: case 4:
2845: B->ops->mult = MatMult_SeqBAIJ_4;
2846: B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2847: break;
2848: case 5:
2849: B->ops->mult = MatMult_SeqBAIJ_5;
2850: B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2851: break;
2852: case 6:
2853: B->ops->mult = MatMult_SeqBAIJ_6;
2854: B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2855: break;
2856: case 7:
2857: B->ops->mult = MatMult_SeqBAIJ_7;
2858: B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2859: break;
2860: case 9:
2861: {
2862: PetscInt version = 1;
2863: PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);
2864: switch (version) {
2865: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2866: case 1:
2867: B->ops->mult = MatMult_SeqBAIJ_9_AVX2;
2868: B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
2869: PetscInfo1((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %D\n",bs);
2870: break;
2871: #endif
2872: default:
2873: B->ops->mult = MatMult_SeqBAIJ_N;
2874: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2875: PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);
2876: break;
2877: }
2878: break;
2879: }
2880: case 11:
2881: B->ops->mult = MatMult_SeqBAIJ_11;
2882: B->ops->multadd = MatMultAdd_SeqBAIJ_11;
2883: break;
2884: case 12:
2885: {
2886: PetscInt version = 1;
2887: PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);
2888: switch (version) {
2889: case 1:
2890: B->ops->mult = MatMult_SeqBAIJ_12_ver1;
2891: B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
2892: PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);
2893: break;
2894: case 2:
2895: B->ops->mult = MatMult_SeqBAIJ_12_ver2;
2896: B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2;
2897: PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);
2898: break;
2899: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2900: case 3:
2901: B->ops->mult = MatMult_SeqBAIJ_12_AVX2;
2902: B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
2903: PetscInfo1((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %D\n",bs);
2904: break;
2905: #endif
2906: default:
2907: B->ops->mult = MatMult_SeqBAIJ_N;
2908: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2909: PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);
2910: break;
2911: }
2912: break;
2913: }
2914: case 15:
2915: {
2916: PetscInt version = 1;
2917: PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);
2918: switch (version) {
2919: case 1:
2920: B->ops->mult = MatMult_SeqBAIJ_15_ver1;
2921: PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);
2922: break;
2923: case 2:
2924: B->ops->mult = MatMult_SeqBAIJ_15_ver2;
2925: PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);
2926: break;
2927: case 3:
2928: B->ops->mult = MatMult_SeqBAIJ_15_ver3;
2929: PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);
2930: break;
2931: case 4:
2932: B->ops->mult = MatMult_SeqBAIJ_15_ver4;
2933: PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);
2934: break;
2935: default:
2936: B->ops->mult = MatMult_SeqBAIJ_N;
2937: PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);
2938: break;
2939: }
2940: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2941: break;
2942: }
2943: default:
2944: B->ops->mult = MatMult_SeqBAIJ_N;
2945: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2946: PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);
2947: break;
2948: }
2949: }
2950: B->ops->sor = MatSOR_SeqBAIJ;
2951: b->mbs = mbs;
2952: b->nbs = nbs;
2953: if (!skipallocation) {
2954: if (!b->imax) {
2955: PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);
2956: PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));
2958: b->free_imax_ilen = PETSC_TRUE;
2959: }
2960: /* b->ilen will count nonzeros in each block row so far. */
2961: for (i=0; i<mbs; i++) b->ilen[i] = 0;
2962: if (!nnz) {
2963: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2964: else if (nz < 0) nz = 1;
2965: nz = PetscMin(nz,nbs);
2966: for (i=0; i<mbs; i++) b->imax[i] = nz;
2967: PetscIntMultError(nz,mbs,&nz);
2968: } else {
2969: PetscInt64 nz64 = 0;
2970: for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz64 += nnz[i];}
2971: PetscIntCast(nz64,&nz);
2972: }
2974: /* allocate the matrix space */
2975: MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);
2976: if (B->structure_only) {
2977: PetscMalloc1(nz,&b->j);
2978: PetscMalloc1(B->rmap->N+1,&b->i);
2979: PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));
2980: } else {
2981: PetscInt nzbs2 = 0;
2982: PetscIntMultError(nz,bs2,&nzbs2);
2983: PetscMalloc3(nzbs2,&b->a,nz,&b->j,B->rmap->N+1,&b->i);
2984: PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));
2985: PetscArrayzero(b->a,nz*bs2);
2986: }
2987: PetscArrayzero(b->j,nz);
2989: if (B->structure_only) {
2990: b->singlemalloc = PETSC_FALSE;
2991: b->free_a = PETSC_FALSE;
2992: } else {
2993: b->singlemalloc = PETSC_TRUE;
2994: b->free_a = PETSC_TRUE;
2995: }
2996: b->free_ij = PETSC_TRUE;
2998: b->i[0] = 0;
2999: for (i=1; i<mbs+1; i++) {
3000: b->i[i] = b->i[i-1] + b->imax[i-1];
3001: }
3003: } else {
3004: b->free_a = PETSC_FALSE;
3005: b->free_ij = PETSC_FALSE;
3006: }
3008: b->bs2 = bs2;
3009: b->mbs = mbs;
3010: b->nz = 0;
3011: b->maxnz = nz;
3012: B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3013: B->was_assembled = PETSC_FALSE;
3014: B->assembled = PETSC_FALSE;
3015: if (realalloc) {MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);}
3016: return(0);
3017: }
3019: PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3020: {
3021: PetscInt i,m,nz,nz_max=0,*nnz;
3022: PetscScalar *values=NULL;
3023: PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
3027: if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3028: PetscLayoutSetBlockSize(B->rmap,bs);
3029: PetscLayoutSetBlockSize(B->cmap,bs);
3030: PetscLayoutSetUp(B->rmap);
3031: PetscLayoutSetUp(B->cmap);
3032: PetscLayoutGetBlockSize(B->rmap,&bs);
3033: m = B->rmap->n/bs;
3035: if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
3036: PetscMalloc1(m+1, &nnz);
3037: for (i=0; i<m; i++) {
3038: nz = ii[i+1]- ii[i];
3039: if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
3040: nz_max = PetscMax(nz_max, nz);
3041: nnz[i] = nz;
3042: }
3043: MatSeqBAIJSetPreallocation(B,bs,0,nnz);
3044: PetscFree(nnz);
3046: values = (PetscScalar*)V;
3047: if (!values) {
3048: PetscCalloc1(bs*bs*(nz_max+1),&values);
3049: }
3050: for (i=0; i<m; i++) {
3051: PetscInt ncols = ii[i+1] - ii[i];
3052: const PetscInt *icols = jj + ii[i];
3053: if (bs == 1 || !roworiented) {
3054: const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3055: MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);
3056: } else {
3057: PetscInt j;
3058: for (j=0; j<ncols; j++) {
3059: const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
3060: MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);
3061: }
3062: }
3063: }
3064: if (!V) { PetscFree(values); }
3065: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3066: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3067: MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3068: return(0);
3069: }
3071: /*@C
3072: MatSeqBAIJGetArray - gives access to the array where the data for a MATSEQBAIJ matrix is stored
3074: Not Collective
3076: Input Parameter:
3077: . mat - a MATSEQBAIJ matrix
3079: Output Parameter:
3080: . array - pointer to the data
3082: Level: intermediate
3084: .seealso: MatSeqBAIJRestoreArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray()
3085: @*/
3086: PetscErrorCode MatSeqBAIJGetArray(Mat A,PetscScalar **array)
3087: {
3091: PetscUseMethod(A,"MatSeqBAIJGetArray_C",(Mat,PetscScalar**),(A,array));
3092: return(0);
3093: }
3095: /*@C
3096: MatSeqBAIJRestoreArray - returns access to the array where the data for a MATSEQBAIJ matrix is stored obtained by MatSeqBAIJGetArray()
3098: Not Collective
3100: Input Parameters:
3101: + mat - a MATSEQBAIJ matrix
3102: - array - pointer to the data
3104: Level: intermediate
3106: .seealso: MatSeqBAIJGetArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray()
3107: @*/
3108: PetscErrorCode MatSeqBAIJRestoreArray(Mat A,PetscScalar **array)
3109: {
3113: PetscUseMethod(A,"MatSeqBAIJRestoreArray_C",(Mat,PetscScalar**),(A,array));
3114: return(0);
3115: }
3117: /*MC
3118: MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3119: block sparse compressed row format.
3121: Options Database Keys:
3122: + -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3123: - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS)
3125: Level: beginner
3127: Notes:
3128: MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
3129: space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
3131: Run with -info to see what version of the matrix-vector product is being used
3133: .seealso: MatCreateSeqBAIJ()
3134: M*/
3136: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3138: PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3139: {
3141: PetscMPIInt size;
3142: Mat_SeqBAIJ *b;
3145: MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);
3146: if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3148: PetscNewLog(B,&b);
3149: B->data = (void*)b;
3150: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
3152: b->row = NULL;
3153: b->col = NULL;
3154: b->icol = NULL;
3155: b->reallocs = 0;
3156: b->saved_values = NULL;
3158: b->roworiented = PETSC_TRUE;
3159: b->nonew = 0;
3160: b->diag = NULL;
3161: B->spptr = NULL;
3162: B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2;
3163: b->keepnonzeropattern = PETSC_FALSE;
3165: PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJGetArray_C",MatSeqBAIJGetArray_SeqBAIJ);
3166: PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJRestoreArray_C",MatSeqBAIJRestoreArray_SeqBAIJ);
3167: PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);
3168: PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);
3169: PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);
3170: PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);
3171: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);
3172: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);
3173: PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);
3174: PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);
3175: PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);
3176: #if defined(PETSC_HAVE_HYPRE)
3177: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);
3178: #endif
3179: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);
3180: PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);
3181: return(0);
3182: }
3184: PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3185: {
3186: Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3188: PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3191: if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3193: if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3194: c->imax = a->imax;
3195: c->ilen = a->ilen;
3196: c->free_imax_ilen = PETSC_FALSE;
3197: } else {
3198: PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);
3199: PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));
3200: for (i=0; i<mbs; i++) {
3201: c->imax[i] = a->imax[i];
3202: c->ilen[i] = a->ilen[i];
3203: }
3204: c->free_imax_ilen = PETSC_TRUE;
3205: }
3207: /* allocate the matrix space */
3208: if (mallocmatspace) {
3209: if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3210: PetscCalloc1(bs2*nz,&c->a);
3211: PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));
3213: c->i = a->i;
3214: c->j = a->j;
3215: c->singlemalloc = PETSC_FALSE;
3216: c->free_a = PETSC_TRUE;
3217: c->free_ij = PETSC_FALSE;
3218: c->parent = A;
3219: C->preallocated = PETSC_TRUE;
3220: C->assembled = PETSC_TRUE;
3222: PetscObjectReference((PetscObject)A);
3223: MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3224: MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3225: } else {
3226: PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);
3227: PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));
3229: c->singlemalloc = PETSC_TRUE;
3230: c->free_a = PETSC_TRUE;
3231: c->free_ij = PETSC_TRUE;
3233: PetscArraycpy(c->i,a->i,mbs+1);
3234: if (mbs > 0) {
3235: PetscArraycpy(c->j,a->j,nz);
3236: if (cpvalues == MAT_COPY_VALUES) {
3237: PetscArraycpy(c->a,a->a,bs2*nz);
3238: } else {
3239: PetscArrayzero(c->a,bs2*nz);
3240: }
3241: }
3242: C->preallocated = PETSC_TRUE;
3243: C->assembled = PETSC_TRUE;
3244: }
3245: }
3247: c->roworiented = a->roworiented;
3248: c->nonew = a->nonew;
3250: PetscLayoutReference(A->rmap,&C->rmap);
3251: PetscLayoutReference(A->cmap,&C->cmap);
3253: c->bs2 = a->bs2;
3254: c->mbs = a->mbs;
3255: c->nbs = a->nbs;
3257: if (a->diag) {
3258: if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3259: c->diag = a->diag;
3260: c->free_diag = PETSC_FALSE;
3261: } else {
3262: PetscMalloc1(mbs+1,&c->diag);
3263: PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));
3264: for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3265: c->free_diag = PETSC_TRUE;
3266: }
3267: } else c->diag = NULL;
3269: c->nz = a->nz;
3270: c->maxnz = a->nz; /* Since we allocate exactly the right amount */
3271: c->solve_work = NULL;
3272: c->mult_work = NULL;
3273: c->sor_workt = NULL;
3274: c->sor_work = NULL;
3276: c->compressedrow.use = a->compressedrow.use;
3277: c->compressedrow.nrows = a->compressedrow.nrows;
3278: if (a->compressedrow.use) {
3279: i = a->compressedrow.nrows;
3280: PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);
3281: PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));
3282: PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1);
3283: PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i);
3284: } else {
3285: c->compressedrow.use = PETSC_FALSE;
3286: c->compressedrow.i = NULL;
3287: c->compressedrow.rindex = NULL;
3288: }
3289: C->nonzerostate = A->nonzerostate;
3291: PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);
3292: return(0);
3293: }
3295: PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3296: {
3300: MatCreate(PetscObjectComm((PetscObject)A),B);
3301: MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);
3302: MatSetType(*B,MATSEQBAIJ);
3303: MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);
3304: return(0);
3305: }
3307: /* Used for both SeqBAIJ and SeqSBAIJ matrices */
3308: PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)
3309: {
3310: PetscInt header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k;
3311: PetscInt *rowidxs,*colidxs;
3312: PetscScalar *matvals;
3316: PetscViewerSetUp(viewer);
3318: /* read matrix header */
3319: PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);
3320: if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3321: M = header[1]; N = header[2]; nz = header[3];
3322: if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3323: if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3324: if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as SeqBAIJ");
3326: /* set block sizes from the viewer's .info file */
3327: MatLoad_Binary_BlockSizes(mat,viewer);
3328: /* set local and global sizes if not set already */
3329: if (mat->rmap->n < 0) mat->rmap->n = M;
3330: if (mat->cmap->n < 0) mat->cmap->n = N;
3331: if (mat->rmap->N < 0) mat->rmap->N = M;
3332: if (mat->cmap->N < 0) mat->cmap->N = N;
3333: PetscLayoutSetUp(mat->rmap);
3334: PetscLayoutSetUp(mat->cmap);
3336: /* check if the matrix sizes are correct */
3337: MatGetSize(mat,&rows,&cols);
3338: if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3339: MatGetBlockSize(mat,&bs);
3340: MatGetLocalSize(mat,&m,&n);
3341: mbs = m/bs; nbs = n/bs;
3343: /* read in row lengths, column indices and nonzero values */
3344: PetscMalloc1(m+1,&rowidxs);
3345: PetscViewerBinaryRead(viewer,rowidxs+1,m,NULL,PETSC_INT);
3346: rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3347: sum = rowidxs[m];
3348: if (sum != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3350: /* read in column indices and nonzero values */
3351: PetscMalloc2(rowidxs[m],&colidxs,nz,&matvals);
3352: PetscViewerBinaryRead(viewer,colidxs,rowidxs[m],NULL,PETSC_INT);
3353: PetscViewerBinaryRead(viewer,matvals,rowidxs[m],NULL,PETSC_SCALAR);
3355: { /* preallocate matrix storage */
3356: PetscBT bt; /* helper bit set to count nonzeros */
3357: PetscInt *nnz;
3358: PetscBool sbaij;
3360: PetscBTCreate(nbs,&bt);
3361: PetscCalloc1(mbs,&nnz);
3362: PetscObjectTypeCompare((PetscObject)mat,MATSEQSBAIJ,&sbaij);
3363: for (i=0; i<mbs; i++) {
3364: PetscBTMemzero(nbs,bt);
3365: for (k=0; k<bs; k++) {
3366: PetscInt row = bs*i + k;
3367: for (j=rowidxs[row]; j<rowidxs[row+1]; j++) {
3368: PetscInt col = colidxs[j];
3369: if (!sbaij || col >= row)
3370: if (!PetscBTLookupSet(bt,col/bs)) nnz[i]++;
3371: }
3372: }
3373: }
3374: PetscBTDestroy(&bt);
3375: MatSeqBAIJSetPreallocation(mat,bs,0,nnz);
3376: MatSeqSBAIJSetPreallocation(mat,bs,0,nnz);
3377: PetscFree(nnz);
3378: }
3380: /* store matrix values */
3381: for (i=0; i<m; i++) {
3382: PetscInt row = i, s = rowidxs[i], e = rowidxs[i+1];
3383: (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);
3384: }
3386: PetscFree(rowidxs);
3387: PetscFree2(colidxs,matvals);
3388: MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);
3389: MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);
3390: return(0);
3391: }
3393: PetscErrorCode MatLoad_SeqBAIJ(Mat mat,PetscViewer viewer)
3394: {
3396: PetscBool isbinary;
3399: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
3400: if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
3401: MatLoad_SeqBAIJ_Binary(mat,viewer);
3402: return(0);
3403: }
3405: /*@C
3406: MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3407: compressed row) format. For good matrix assembly performance the
3408: user should preallocate the matrix storage by setting the parameter nz
3409: (or the array nnz). By setting these parameters accurately, performance
3410: during matrix assembly can be increased by more than a factor of 50.
3412: Collective
3414: Input Parameters:
3415: + comm - MPI communicator, set to PETSC_COMM_SELF
3416: . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3417: blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3418: . m - number of rows
3419: . n - number of columns
3420: . nz - number of nonzero blocks per block row (same for all rows)
3421: - nnz - array containing the number of nonzero blocks in the various block rows
3422: (possibly different for each block row) or NULL
3424: Output Parameter:
3425: . A - the matrix
3427: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3428: MatXXXXSetPreallocation() paradigm instead of this routine directly.
3429: [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3431: Options Database Keys:
3432: + -mat_no_unroll - uses code that does not unroll the loops in the
3433: block calculations (much slower)
3434: - -mat_block_size - size of the blocks to use
3436: Level: intermediate
3438: Notes:
3439: The number of rows and columns must be divisible by blocksize.
3441: If the nnz parameter is given then the nz parameter is ignored
3443: A nonzero block is any block that as 1 or more nonzeros in it
3445: The block AIJ format is fully compatible with standard Fortran 77
3446: storage. That is, the stored row and column indices can begin at
3447: either one (as in Fortran) or zero. See the users' manual for details.
3449: Specify the preallocated storage with either nz or nnz (not both).
3450: Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3451: allocation. See Users-Manual: ch_mat for details.
3452: matrices.
3454: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3455: @*/
3456: PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3457: {
3461: MatCreate(comm,A);
3462: MatSetSizes(*A,m,n,m,n);
3463: MatSetType(*A,MATSEQBAIJ);
3464: MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);
3465: return(0);
3466: }
3468: /*@C
3469: MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3470: per row in the matrix. For good matrix assembly performance the
3471: user should preallocate the matrix storage by setting the parameter nz
3472: (or the array nnz). By setting these parameters accurately, performance
3473: during matrix assembly can be increased by more than a factor of 50.
3475: Collective
3477: Input Parameters:
3478: + B - the matrix
3479: . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3480: blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3481: . nz - number of block nonzeros per block row (same for all rows)
3482: - nnz - array containing the number of block nonzeros in the various block rows
3483: (possibly different for each block row) or NULL
3485: Options Database Keys:
3486: + -mat_no_unroll - uses code that does not unroll the loops in the
3487: block calculations (much slower)
3488: - -mat_block_size - size of the blocks to use
3490: Level: intermediate
3492: Notes:
3493: If the nnz parameter is given then the nz parameter is ignored
3495: You can call MatGetInfo() to get information on how effective the preallocation was;
3496: for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3497: You can also run with the option -info and look for messages with the string
3498: malloc in them to see if additional memory allocation was needed.
3500: The block AIJ format is fully compatible with standard Fortran 77
3501: storage. That is, the stored row and column indices can begin at
3502: either one (as in Fortran) or zero. See the users' manual for details.
3504: Specify the preallocated storage with either nz or nnz (not both).
3505: Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3506: allocation. See Users-Manual: ch_mat for details.
3508: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3509: @*/
3510: PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3511: {
3518: PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));
3519: return(0);
3520: }
3522: /*@C
3523: MatSeqBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values
3525: Collective
3527: Input Parameters:
3528: + B - the matrix
3529: . i - the indices into j for the start of each local row (starts with zero)
3530: . j - the column indices for each local row (starts with zero) these must be sorted for each row
3531: - v - optional values in the matrix
3533: Level: advanced
3535: Notes:
3536: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs
3537: may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3538: over rows within a block and the last index is over columns within a block row. Fortran programs will likely set
3539: MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3540: block column and the second index is over columns within a block.
3542: Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3544: .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3545: @*/
3546: PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3547: {
3554: PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));
3555: return(0);
3556: }
3559: /*@
3560: MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3562: Collective
3564: Input Parameters:
3565: + comm - must be an MPI communicator of size 1
3566: . bs - size of block
3567: . m - number of rows
3568: . n - number of columns
3569: . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3570: . j - column indices
3571: - a - matrix values
3573: Output Parameter:
3574: . mat - the matrix
3576: Level: advanced
3578: Notes:
3579: The i, j, and a arrays are not copied by this routine, the user must free these arrays
3580: once the matrix is destroyed
3582: You cannot set new nonzero locations into this matrix, that will generate an error.
3584: The i and j indices are 0 based
3586: When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3588: The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3589: the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3590: block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory
3591: with column-major ordering within blocks.
3593: .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3595: @*/
3596: PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat)
3597: {
3599: PetscInt ii;
3600: Mat_SeqBAIJ *baij;
3603: if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3604: if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3606: MatCreate(comm,mat);
3607: MatSetSizes(*mat,m,n,m,n);
3608: MatSetType(*mat,MATSEQBAIJ);
3609: MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,NULL);
3610: baij = (Mat_SeqBAIJ*)(*mat)->data;
3611: PetscMalloc2(m,&baij->imax,m,&baij->ilen);
3612: PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));
3614: baij->i = i;
3615: baij->j = j;
3616: baij->a = a;
3618: baij->singlemalloc = PETSC_FALSE;
3619: baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3620: baij->free_a = PETSC_FALSE;
3621: baij->free_ij = PETSC_FALSE;
3623: for (ii=0; ii<m; ii++) {
3624: baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3625: if (PetscUnlikelyDebug(i[ii+1] - i[ii] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3626: }
3627: if (PetscDefined(USE_DEBUG)) {
3628: for (ii=0; ii<baij->i[m]; ii++) {
3629: if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3630: if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3631: }
3632: }
3634: MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
3635: MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
3636: return(0);
3637: }
3639: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3640: {
3642: PetscMPIInt size;
3645: MPI_Comm_size(comm,&size);
3646: if (size == 1 && scall == MAT_REUSE_MATRIX) {
3647: MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);
3648: } else {
3649: MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);
3650: }
3651: return(0);
3652: }