Actual source code: baij.c
petsc-3.12.5 2020-03-29
2: /*
3: Defines the basic matrix operations for the BAIJ (compressed row)
4: matrix storage format.
5: */
6: #include <../src/mat/impls/baij/seq/baij.h>
7: #include <petscblaslapack.h>
8: #include <petsc/private/kernels/blockinvert.h>
9: #include <petsc/private/kernels/blockmatmult.h>
11: #if defined(PETSC_HAVE_HYPRE)
12: PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
13: #endif
15: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
16: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*);
17: #endif
18: PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
19: PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
21: PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
22: {
23: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data;
25: PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
26: MatScalar *v = a->a,*odiag,*diag,work[25],*v_work;
27: PetscReal shift = 0.0;
28: PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE;
31: allowzeropivot = PetscNot(A->erroriffailure);
33: if (a->idiagvalid) {
34: if (values) *values = a->idiag;
35: return(0);
36: }
37: MatMarkDiagonal_SeqBAIJ(A);
38: diag_offset = a->diag;
39: if (!a->idiag) {
40: PetscMalloc1(bs2*mbs,&a->idiag);
41: PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));
42: }
43: diag = a->idiag;
44: if (values) *values = a->idiag;
45: /* factor and invert each block */
46: switch (bs) {
47: case 1:
48: for (i=0; i<mbs; i++) {
49: odiag = v + 1*diag_offset[i];
50: diag[0] = odiag[0];
52: if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
53: if (allowzeropivot) {
54: A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
55: A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
56: A->factorerror_zeropivot_row = i;
57: PetscInfo1(A,"Zero pivot, row %D\n",i);
58: } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
59: }
61: diag[0] = (PetscScalar)1.0 / (diag[0] + shift);
62: diag += 1;
63: }
64: break;
65: case 2:
66: for (i=0; i<mbs; i++) {
67: odiag = v + 4*diag_offset[i];
68: diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
69: PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);
70: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
71: diag += 4;
72: }
73: break;
74: case 3:
75: for (i=0; i<mbs; i++) {
76: odiag = v + 9*diag_offset[i];
77: diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78: diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
79: diag[8] = odiag[8];
80: PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);
81: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
82: diag += 9;
83: }
84: break;
85: case 4:
86: for (i=0; i<mbs; i++) {
87: odiag = v + 16*diag_offset[i];
88: PetscArraycpy(diag,odiag,16);
89: PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);
90: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
91: diag += 16;
92: }
93: break;
94: case 5:
95: for (i=0; i<mbs; i++) {
96: odiag = v + 25*diag_offset[i];
97: PetscArraycpy(diag,odiag,25);
98: PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);
99: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
100: diag += 25;
101: }
102: break;
103: case 6:
104: for (i=0; i<mbs; i++) {
105: odiag = v + 36*diag_offset[i];
106: PetscArraycpy(diag,odiag,36);
107: PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);
108: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
109: diag += 36;
110: }
111: break;
112: case 7:
113: for (i=0; i<mbs; i++) {
114: odiag = v + 49*diag_offset[i];
115: PetscArraycpy(diag,odiag,49);
116: PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);
117: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
118: diag += 49;
119: }
120: break;
121: default:
122: PetscMalloc2(bs,&v_work,bs,&v_pivots);
123: for (i=0; i<mbs; i++) {
124: odiag = v + bs2*diag_offset[i];
125: PetscArraycpy(diag,odiag,bs2);
126: PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);
127: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
128: diag += bs2;
129: }
130: PetscFree2(v_work,v_pivots);
131: }
132: a->idiagvalid = PETSC_TRUE;
133: return(0);
134: }
136: PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
137: {
138: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
139: PetscScalar *x,*work,*w,*workt,*t;
140: const MatScalar *v,*aa = a->a, *idiag;
141: const PetscScalar *b,*xb;
142: PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
143: PetscErrorCode ierr;
144: PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
145: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
148: its = its*lits;
149: if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
150: if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
151: if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
152: if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
153: if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
155: if (!a->idiagvalid) {MatInvertBlockDiagonal(A,NULL);}
157: if (!m) return(0);
158: diag = a->diag;
159: idiag = a->idiag;
160: k = PetscMax(A->rmap->n,A->cmap->n);
161: if (!a->mult_work) {
162: PetscMalloc1(k+1,&a->mult_work);
163: }
164: if (!a->sor_workt) {
165: PetscMalloc1(k,&a->sor_workt);
166: }
167: if (!a->sor_work) {
168: PetscMalloc1(bs,&a->sor_work);
169: }
170: work = a->mult_work;
171: t = a->sor_workt;
172: w = a->sor_work;
174: VecGetArray(xx,&x);
175: VecGetArrayRead(bb,&b);
177: if (flag & SOR_ZERO_INITIAL_GUESS) {
178: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
179: switch (bs) {
180: case 1:
181: PetscKernel_v_gets_A_times_w_1(x,idiag,b);
182: t[0] = b[0];
183: i2 = 1;
184: idiag += 1;
185: for (i=1; i<m; i++) {
186: v = aa + ai[i];
187: vi = aj + ai[i];
188: nz = diag[i] - ai[i];
189: s[0] = b[i2];
190: for (j=0; j<nz; j++) {
191: xw[0] = x[vi[j]];
192: PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
193: }
194: t[i2] = s[0];
195: PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
196: x[i2] = xw[0];
197: idiag += 1;
198: i2 += 1;
199: }
200: break;
201: case 2:
202: PetscKernel_v_gets_A_times_w_2(x,idiag,b);
203: t[0] = b[0]; t[1] = b[1];
204: i2 = 2;
205: idiag += 4;
206: for (i=1; i<m; i++) {
207: v = aa + 4*ai[i];
208: vi = aj + ai[i];
209: nz = diag[i] - ai[i];
210: s[0] = b[i2]; s[1] = b[i2+1];
211: for (j=0; j<nz; j++) {
212: idx = 2*vi[j];
213: it = 4*j;
214: xw[0] = x[idx]; xw[1] = x[1+idx];
215: PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
216: }
217: t[i2] = s[0]; t[i2+1] = s[1];
218: PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
219: x[i2] = xw[0]; x[i2+1] = xw[1];
220: idiag += 4;
221: i2 += 2;
222: }
223: break;
224: case 3:
225: PetscKernel_v_gets_A_times_w_3(x,idiag,b);
226: t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
227: i2 = 3;
228: idiag += 9;
229: for (i=1; i<m; i++) {
230: v = aa + 9*ai[i];
231: vi = aj + ai[i];
232: nz = diag[i] - ai[i];
233: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
234: while (nz--) {
235: idx = 3*(*vi++);
236: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
237: PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
238: v += 9;
239: }
240: t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
241: PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
242: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
243: idiag += 9;
244: i2 += 3;
245: }
246: break;
247: case 4:
248: PetscKernel_v_gets_A_times_w_4(x,idiag,b);
249: t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
250: i2 = 4;
251: idiag += 16;
252: for (i=1; i<m; i++) {
253: v = aa + 16*ai[i];
254: vi = aj + ai[i];
255: nz = diag[i] - ai[i];
256: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
257: while (nz--) {
258: idx = 4*(*vi++);
259: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
260: PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
261: v += 16;
262: }
263: t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
264: PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
265: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
266: idiag += 16;
267: i2 += 4;
268: }
269: break;
270: case 5:
271: PetscKernel_v_gets_A_times_w_5(x,idiag,b);
272: t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
273: i2 = 5;
274: idiag += 25;
275: for (i=1; i<m; i++) {
276: v = aa + 25*ai[i];
277: vi = aj + ai[i];
278: nz = diag[i] - ai[i];
279: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
280: while (nz--) {
281: idx = 5*(*vi++);
282: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
283: PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
284: v += 25;
285: }
286: t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
287: PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
288: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
289: idiag += 25;
290: i2 += 5;
291: }
292: break;
293: case 6:
294: PetscKernel_v_gets_A_times_w_6(x,idiag,b);
295: t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
296: i2 = 6;
297: idiag += 36;
298: for (i=1; i<m; i++) {
299: v = aa + 36*ai[i];
300: vi = aj + ai[i];
301: nz = diag[i] - ai[i];
302: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
303: while (nz--) {
304: idx = 6*(*vi++);
305: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
306: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
307: PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
308: v += 36;
309: }
310: t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
311: t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
312: PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
313: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
314: idiag += 36;
315: i2 += 6;
316: }
317: break;
318: case 7:
319: PetscKernel_v_gets_A_times_w_7(x,idiag,b);
320: t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
321: t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
322: i2 = 7;
323: idiag += 49;
324: for (i=1; i<m; i++) {
325: v = aa + 49*ai[i];
326: vi = aj + ai[i];
327: nz = diag[i] - ai[i];
328: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
329: s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
330: while (nz--) {
331: idx = 7*(*vi++);
332: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
333: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
334: PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
335: v += 49;
336: }
337: t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
338: t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
339: PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
340: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
341: x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
342: idiag += 49;
343: i2 += 7;
344: }
345: break;
346: default:
347: PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
348: PetscArraycpy(t,b,bs);
349: i2 = bs;
350: idiag += bs2;
351: for (i=1; i<m; i++) {
352: v = aa + bs2*ai[i];
353: vi = aj + ai[i];
354: nz = diag[i] - ai[i];
356: PetscArraycpy(w,b+i2,bs);
357: /* copy all rows of x that are needed into contiguous space */
358: workt = work;
359: for (j=0; j<nz; j++) {
360: PetscArraycpy(workt,x + bs*(*vi++),bs);
361: workt += bs;
362: }
363: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
364: PetscArraycpy(t+i2,w,bs);
365: PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
367: idiag += bs2;
368: i2 += bs;
369: }
370: break;
371: }
372: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
373: PetscLogFlops(1.0*bs2*a->nz);
374: xb = t;
375: }
376: else xb = b;
377: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
378: idiag = a->idiag+bs2*(a->mbs-1);
379: i2 = bs * (m-1);
380: switch (bs) {
381: case 1:
382: s[0] = xb[i2];
383: PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
384: x[i2] = xw[0];
385: i2 -= 1;
386: for (i=m-2; i>=0; i--) {
387: v = aa + (diag[i]+1);
388: vi = aj + diag[i] + 1;
389: nz = ai[i+1] - diag[i] - 1;
390: s[0] = xb[i2];
391: for (j=0; j<nz; j++) {
392: xw[0] = x[vi[j]];
393: PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
394: }
395: PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
396: x[i2] = xw[0];
397: idiag -= 1;
398: i2 -= 1;
399: }
400: break;
401: case 2:
402: s[0] = xb[i2]; s[1] = xb[i2+1];
403: PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
404: x[i2] = xw[0]; x[i2+1] = xw[1];
405: i2 -= 2;
406: idiag -= 4;
407: for (i=m-2; i>=0; i--) {
408: v = aa + 4*(diag[i] + 1);
409: vi = aj + diag[i] + 1;
410: nz = ai[i+1] - diag[i] - 1;
411: s[0] = xb[i2]; s[1] = xb[i2+1];
412: for (j=0; j<nz; j++) {
413: idx = 2*vi[j];
414: it = 4*j;
415: xw[0] = x[idx]; xw[1] = x[1+idx];
416: PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
417: }
418: PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
419: x[i2] = xw[0]; x[i2+1] = xw[1];
420: idiag -= 4;
421: i2 -= 2;
422: }
423: break;
424: case 3:
425: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
426: PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
427: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
428: i2 -= 3;
429: idiag -= 9;
430: for (i=m-2; i>=0; i--) {
431: v = aa + 9*(diag[i]+1);
432: vi = aj + diag[i] + 1;
433: nz = ai[i+1] - diag[i] - 1;
434: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
435: while (nz--) {
436: idx = 3*(*vi++);
437: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
438: PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
439: v += 9;
440: }
441: PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
442: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
443: idiag -= 9;
444: i2 -= 3;
445: }
446: break;
447: case 4:
448: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
449: PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
450: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
451: i2 -= 4;
452: idiag -= 16;
453: for (i=m-2; i>=0; i--) {
454: v = aa + 16*(diag[i]+1);
455: vi = aj + diag[i] + 1;
456: nz = ai[i+1] - diag[i] - 1;
457: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
458: while (nz--) {
459: idx = 4*(*vi++);
460: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
461: PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
462: v += 16;
463: }
464: PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
465: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
466: idiag -= 16;
467: i2 -= 4;
468: }
469: break;
470: case 5:
471: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
472: PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
473: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
474: i2 -= 5;
475: idiag -= 25;
476: for (i=m-2; i>=0; i--) {
477: v = aa + 25*(diag[i]+1);
478: vi = aj + diag[i] + 1;
479: nz = ai[i+1] - diag[i] - 1;
480: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
481: while (nz--) {
482: idx = 5*(*vi++);
483: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
484: PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
485: v += 25;
486: }
487: PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
488: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
489: idiag -= 25;
490: i2 -= 5;
491: }
492: break;
493: case 6:
494: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
495: PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
496: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
497: i2 -= 6;
498: idiag -= 36;
499: for (i=m-2; i>=0; i--) {
500: v = aa + 36*(diag[i]+1);
501: vi = aj + diag[i] + 1;
502: nz = ai[i+1] - diag[i] - 1;
503: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
504: while (nz--) {
505: idx = 6*(*vi++);
506: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
507: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
508: PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
509: v += 36;
510: }
511: PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
512: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
513: idiag -= 36;
514: i2 -= 6;
515: }
516: break;
517: case 7:
518: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
519: s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
520: PetscKernel_v_gets_A_times_w_7(x,idiag,b);
521: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
522: x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
523: i2 -= 7;
524: idiag -= 49;
525: for (i=m-2; i>=0; i--) {
526: v = aa + 49*(diag[i]+1);
527: vi = aj + diag[i] + 1;
528: nz = ai[i+1] - diag[i] - 1;
529: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
530: s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
531: while (nz--) {
532: idx = 7*(*vi++);
533: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
534: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
535: PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
536: v += 49;
537: }
538: PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
539: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
540: x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
541: idiag -= 49;
542: i2 -= 7;
543: }
544: break;
545: default:
546: PetscArraycpy(w,xb+i2,bs);
547: PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
548: i2 -= bs;
549: idiag -= bs2;
550: for (i=m-2; i>=0; i--) {
551: v = aa + bs2*(diag[i]+1);
552: vi = aj + diag[i] + 1;
553: nz = ai[i+1] - diag[i] - 1;
555: PetscArraycpy(w,xb+i2,bs);
556: /* copy all rows of x that are needed into contiguous space */
557: workt = work;
558: for (j=0; j<nz; j++) {
559: PetscArraycpy(workt,x + bs*(*vi++),bs);
560: workt += bs;
561: }
562: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
563: PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
565: idiag -= bs2;
566: i2 -= bs;
567: }
568: break;
569: }
570: PetscLogFlops(1.0*bs2*(a->nz));
571: }
572: its--;
573: }
574: while (its--) {
575: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
576: idiag = a->idiag;
577: i2 = 0;
578: switch (bs) {
579: case 1:
580: for (i=0; i<m; i++) {
581: v = aa + ai[i];
582: vi = aj + ai[i];
583: nz = ai[i+1] - ai[i];
584: s[0] = b[i2];
585: for (j=0; j<nz; j++) {
586: xw[0] = x[vi[j]];
587: PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
588: }
589: PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
590: x[i2] += xw[0];
591: idiag += 1;
592: i2 += 1;
593: }
594: break;
595: case 2:
596: for (i=0; i<m; i++) {
597: v = aa + 4*ai[i];
598: vi = aj + ai[i];
599: nz = ai[i+1] - ai[i];
600: s[0] = b[i2]; s[1] = b[i2+1];
601: for (j=0; j<nz; j++) {
602: idx = 2*vi[j];
603: it = 4*j;
604: xw[0] = x[idx]; xw[1] = x[1+idx];
605: PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
606: }
607: PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
608: x[i2] += xw[0]; x[i2+1] += xw[1];
609: idiag += 4;
610: i2 += 2;
611: }
612: break;
613: case 3:
614: for (i=0; i<m; i++) {
615: v = aa + 9*ai[i];
616: vi = aj + ai[i];
617: nz = ai[i+1] - ai[i];
618: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
619: while (nz--) {
620: idx = 3*(*vi++);
621: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
622: PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
623: v += 9;
624: }
625: PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
626: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
627: idiag += 9;
628: i2 += 3;
629: }
630: break;
631: case 4:
632: for (i=0; i<m; i++) {
633: v = aa + 16*ai[i];
634: vi = aj + ai[i];
635: nz = ai[i+1] - ai[i];
636: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
637: while (nz--) {
638: idx = 4*(*vi++);
639: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
640: PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
641: v += 16;
642: }
643: PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
644: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
645: idiag += 16;
646: i2 += 4;
647: }
648: break;
649: case 5:
650: for (i=0; i<m; i++) {
651: v = aa + 25*ai[i];
652: vi = aj + ai[i];
653: nz = ai[i+1] - ai[i];
654: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
655: while (nz--) {
656: idx = 5*(*vi++);
657: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
658: PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
659: v += 25;
660: }
661: PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
662: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
663: idiag += 25;
664: i2 += 5;
665: }
666: break;
667: case 6:
668: for (i=0; i<m; i++) {
669: v = aa + 36*ai[i];
670: vi = aj + ai[i];
671: nz = ai[i+1] - ai[i];
672: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
673: while (nz--) {
674: idx = 6*(*vi++);
675: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
676: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
677: PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
678: v += 36;
679: }
680: PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
681: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
682: x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
683: idiag += 36;
684: i2 += 6;
685: }
686: break;
687: case 7:
688: for (i=0; i<m; i++) {
689: v = aa + 49*ai[i];
690: vi = aj + ai[i];
691: nz = ai[i+1] - ai[i];
692: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
693: s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
694: while (nz--) {
695: idx = 7*(*vi++);
696: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
697: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
698: PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
699: v += 49;
700: }
701: PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
702: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
703: x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
704: idiag += 49;
705: i2 += 7;
706: }
707: break;
708: default:
709: for (i=0; i<m; i++) {
710: v = aa + bs2*ai[i];
711: vi = aj + ai[i];
712: nz = ai[i+1] - ai[i];
714: PetscArraycpy(w,b+i2,bs);
715: /* copy all rows of x that are needed into contiguous space */
716: workt = work;
717: for (j=0; j<nz; j++) {
718: PetscArraycpy(workt,x + bs*(*vi++),bs);
719: workt += bs;
720: }
721: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
722: PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
724: idiag += bs2;
725: i2 += bs;
726: }
727: break;
728: }
729: PetscLogFlops(2.0*bs2*a->nz);
730: }
731: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
732: idiag = a->idiag+bs2*(a->mbs-1);
733: i2 = bs * (m-1);
734: switch (bs) {
735: case 1:
736: for (i=m-1; i>=0; i--) {
737: v = aa + ai[i];
738: vi = aj + ai[i];
739: nz = ai[i+1] - ai[i];
740: s[0] = b[i2];
741: for (j=0; j<nz; j++) {
742: xw[0] = x[vi[j]];
743: PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
744: }
745: PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
746: x[i2] += xw[0];
747: idiag -= 1;
748: i2 -= 1;
749: }
750: break;
751: case 2:
752: for (i=m-1; i>=0; i--) {
753: v = aa + 4*ai[i];
754: vi = aj + ai[i];
755: nz = ai[i+1] - ai[i];
756: s[0] = b[i2]; s[1] = b[i2+1];
757: for (j=0; j<nz; j++) {
758: idx = 2*vi[j];
759: it = 4*j;
760: xw[0] = x[idx]; xw[1] = x[1+idx];
761: PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
762: }
763: PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
764: x[i2] += xw[0]; x[i2+1] += xw[1];
765: idiag -= 4;
766: i2 -= 2;
767: }
768: break;
769: case 3:
770: for (i=m-1; i>=0; i--) {
771: v = aa + 9*ai[i];
772: vi = aj + ai[i];
773: nz = ai[i+1] - ai[i];
774: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
775: while (nz--) {
776: idx = 3*(*vi++);
777: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
778: PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
779: v += 9;
780: }
781: PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
782: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
783: idiag -= 9;
784: i2 -= 3;
785: }
786: break;
787: case 4:
788: for (i=m-1; i>=0; i--) {
789: v = aa + 16*ai[i];
790: vi = aj + ai[i];
791: nz = ai[i+1] - ai[i];
792: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
793: while (nz--) {
794: idx = 4*(*vi++);
795: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
796: PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
797: v += 16;
798: }
799: PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
800: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
801: idiag -= 16;
802: i2 -= 4;
803: }
804: break;
805: case 5:
806: for (i=m-1; i>=0; i--) {
807: v = aa + 25*ai[i];
808: vi = aj + ai[i];
809: nz = ai[i+1] - ai[i];
810: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
811: while (nz--) {
812: idx = 5*(*vi++);
813: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
814: PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
815: v += 25;
816: }
817: PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
818: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
819: idiag -= 25;
820: i2 -= 5;
821: }
822: break;
823: case 6:
824: for (i=m-1; i>=0; i--) {
825: v = aa + 36*ai[i];
826: vi = aj + ai[i];
827: nz = ai[i+1] - ai[i];
828: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
829: while (nz--) {
830: idx = 6*(*vi++);
831: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
832: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
833: PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
834: v += 36;
835: }
836: PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
837: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
838: x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
839: idiag -= 36;
840: i2 -= 6;
841: }
842: break;
843: case 7:
844: for (i=m-1; i>=0; i--) {
845: v = aa + 49*ai[i];
846: vi = aj + ai[i];
847: nz = ai[i+1] - ai[i];
848: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
849: s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
850: while (nz--) {
851: idx = 7*(*vi++);
852: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
853: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
854: PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
855: v += 49;
856: }
857: PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
858: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
859: x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
860: idiag -= 49;
861: i2 -= 7;
862: }
863: break;
864: default:
865: for (i=m-1; i>=0; i--) {
866: v = aa + bs2*ai[i];
867: vi = aj + ai[i];
868: nz = ai[i+1] - ai[i];
870: PetscArraycpy(w,b+i2,bs);
871: /* copy all rows of x that are needed into contiguous space */
872: workt = work;
873: for (j=0; j<nz; j++) {
874: PetscArraycpy(workt,x + bs*(*vi++),bs);
875: workt += bs;
876: }
877: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
878: PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
880: idiag -= bs2;
881: i2 -= bs;
882: }
883: break;
884: }
885: PetscLogFlops(2.0*bs2*(a->nz));
886: }
887: }
888: VecRestoreArray(xx,&x);
889: VecRestoreArrayRead(bb,&b);
890: return(0);
891: }
894: /*
895: Special version for direct calls from Fortran (Used in PETSc-fun3d)
896: */
897: #if defined(PETSC_HAVE_FORTRAN_CAPS)
898: #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
899: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
900: #define matsetvaluesblocked4_ matsetvaluesblocked4
901: #endif
903: PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
904: {
905: Mat A = *AA;
906: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
907: PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
908: PetscInt *ai =a->i,*ailen=a->ilen;
909: PetscInt *aj =a->j,stepval,lastcol = -1;
910: const PetscScalar *value = v;
911: MatScalar *ap,*aa = a->a,*bap;
912: PetscErrorCode ierr;
915: if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
916: stepval = (n-1)*4;
917: for (k=0; k<m; k++) { /* loop over added rows */
918: row = im[k];
919: rp = aj + ai[row];
920: ap = aa + 16*ai[row];
921: nrow = ailen[row];
922: low = 0;
923: high = nrow;
924: for (l=0; l<n; l++) { /* loop over added columns */
925: col = in[l];
926: if (col <= lastcol) low = 0;
927: else high = nrow;
928: lastcol = col;
929: value = v + k*(stepval+4 + l)*4;
930: while (high-low > 7) {
931: t = (low+high)/2;
932: if (rp[t] > col) high = t;
933: else low = t;
934: }
935: for (i=low; i<high; i++) {
936: if (rp[i] > col) break;
937: if (rp[i] == col) {
938: bap = ap + 16*i;
939: for (ii=0; ii<4; ii++,value+=stepval) {
940: for (jj=ii; jj<16; jj+=4) {
941: bap[jj] += *value++;
942: }
943: }
944: goto noinsert2;
945: }
946: }
947: N = nrow++ - 1;
948: high++; /* added new column index thus must search to one higher than before */
949: /* shift up all the later entries in this row */
950: for (ii=N; ii>=i; ii--) {
951: rp[ii+1] = rp[ii];
952: PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16);CHKERRV(ierr);
953: }
954: if (N >= i) {
955: PetscArrayzero(ap+16*i,16);CHKERRV(ierr);
956: }
957: rp[i] = col;
958: bap = ap + 16*i;
959: for (ii=0; ii<4; ii++,value+=stepval) {
960: for (jj=ii; jj<16; jj+=4) {
961: bap[jj] = *value++;
962: }
963: }
964: noinsert2:;
965: low = i;
966: }
967: ailen[row] = nrow;
968: }
969: PetscFunctionReturnVoid();
970: }
972: #if defined(PETSC_HAVE_FORTRAN_CAPS)
973: #define matsetvalues4_ MATSETVALUES4
974: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
975: #define matsetvalues4_ matsetvalues4
976: #endif
978: PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
979: {
980: Mat A = *AA;
981: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
982: PetscInt *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm;
983: PetscInt *ai=a->i,*ailen=a->ilen;
984: PetscInt *aj=a->j,brow,bcol;
985: PetscInt ridx,cidx,lastcol = -1;
986: MatScalar *ap,value,*aa=a->a,*bap;
990: for (k=0; k<m; k++) { /* loop over added rows */
991: row = im[k]; brow = row/4;
992: rp = aj + ai[brow];
993: ap = aa + 16*ai[brow];
994: nrow = ailen[brow];
995: low = 0;
996: high = nrow;
997: for (l=0; l<n; l++) { /* loop over added columns */
998: col = in[l]; bcol = col/4;
999: ridx = row % 4; cidx = col % 4;
1000: value = v[l + k*n];
1001: if (col <= lastcol) low = 0;
1002: else high = nrow;
1003: lastcol = col;
1004: while (high-low > 7) {
1005: t = (low+high)/2;
1006: if (rp[t] > bcol) high = t;
1007: else low = t;
1008: }
1009: for (i=low; i<high; i++) {
1010: if (rp[i] > bcol) break;
1011: if (rp[i] == bcol) {
1012: bap = ap + 16*i + 4*cidx + ridx;
1013: *bap += value;
1014: goto noinsert1;
1015: }
1016: }
1017: N = nrow++ - 1;
1018: high++; /* added new column thus must search to one higher than before */
1019: /* shift up all the later entries in this row */
1020: PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRV(ierr);
1021: PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1));CHKERRV(ierr);
1022: PetscArrayzero(ap+16*i,16);CHKERRV(ierr);
1023: rp[i] = bcol;
1024: ap[16*i + 4*cidx + ridx] = value;
1025: noinsert1:;
1026: low = i;
1027: }
1028: ailen[brow] = nrow;
1029: }
1030: PetscFunctionReturnVoid();
1031: }
1033: /*
1034: Checks for missing diagonals
1035: */
1036: PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d)
1037: {
1038: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1040: PetscInt *diag,*ii = a->i,i;
1043: MatMarkDiagonal_SeqBAIJ(A);
1044: *missing = PETSC_FALSE;
1045: if (A->rmap->n > 0 && !ii) {
1046: *missing = PETSC_TRUE;
1047: if (d) *d = 0;
1048: PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");
1049: } else {
1050: PetscInt n;
1051: n = PetscMin(a->mbs, a->nbs);
1052: diag = a->diag;
1053: for (i=0; i<n; i++) {
1054: if (diag[i] >= ii[i+1]) {
1055: *missing = PETSC_TRUE;
1056: if (d) *d = i;
1057: PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);
1058: break;
1059: }
1060: }
1061: }
1062: return(0);
1063: }
1065: PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1066: {
1067: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1069: PetscInt i,j,m = a->mbs;
1072: if (!a->diag) {
1073: PetscMalloc1(m,&a->diag);
1074: PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));
1075: a->free_diag = PETSC_TRUE;
1076: }
1077: for (i=0; i<m; i++) {
1078: a->diag[i] = a->i[i+1];
1079: for (j=a->i[i]; j<a->i[i+1]; j++) {
1080: if (a->j[j] == i) {
1081: a->diag[i] = j;
1082: break;
1083: }
1084: }
1085: }
1086: return(0);
1087: }
1090: static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done)
1091: {
1092: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1094: PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1095: PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1098: *nn = n;
1099: if (!ia) return(0);
1100: if (symmetric) {
1101: MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);
1102: nz = tia[n];
1103: } else {
1104: tia = a->i; tja = a->j;
1105: }
1107: if (!blockcompressed && bs > 1) {
1108: (*nn) *= bs;
1109: /* malloc & create the natural set of indices */
1110: PetscMalloc1((n+1)*bs,ia);
1111: if (n) {
1112: (*ia)[0] = oshift;
1113: for (j=1; j<bs; j++) {
1114: (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1115: }
1116: }
1118: for (i=1; i<n; i++) {
1119: (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1120: for (j=1; j<bs; j++) {
1121: (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1122: }
1123: }
1124: if (n) {
1125: (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1126: }
1128: if (inja) {
1129: PetscMalloc1(nz*bs*bs,ja);
1130: cnt = 0;
1131: for (i=0; i<n; i++) {
1132: for (j=0; j<bs; j++) {
1133: for (k=tia[i]; k<tia[i+1]; k++) {
1134: for (l=0; l<bs; l++) {
1135: (*ja)[cnt++] = bs*tja[k] + l;
1136: }
1137: }
1138: }
1139: }
1140: }
1142: if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1143: PetscFree(tia);
1144: PetscFree(tja);
1145: }
1146: } else if (oshift == 1) {
1147: if (symmetric) {
1148: nz = tia[A->rmap->n/bs];
1149: /* add 1 to i and j indices */
1150: for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1151: *ia = tia;
1152: if (ja) {
1153: for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1154: *ja = tja;
1155: }
1156: } else {
1157: nz = a->i[A->rmap->n/bs];
1158: /* malloc space and add 1 to i and j indices */
1159: PetscMalloc1(A->rmap->n/bs+1,ia);
1160: for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1161: if (ja) {
1162: PetscMalloc1(nz,ja);
1163: for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1164: }
1165: }
1166: } else {
1167: *ia = tia;
1168: if (ja) *ja = tja;
1169: }
1170: return(0);
1171: }
1173: static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done)
1174: {
1178: if (!ia) return(0);
1179: if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1180: PetscFree(*ia);
1181: if (ja) {PetscFree(*ja);}
1182: }
1183: return(0);
1184: }
1186: PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1187: {
1188: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1192: #if defined(PETSC_USE_LOG)
1193: PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1194: #endif
1195: MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);
1196: ISDestroy(&a->row);
1197: ISDestroy(&a->col);
1198: if (a->free_diag) {PetscFree(a->diag);}
1199: PetscFree(a->idiag);
1200: if (a->free_imax_ilen) {PetscFree2(a->imax,a->ilen);}
1201: PetscFree(a->solve_work);
1202: PetscFree(a->mult_work);
1203: PetscFree(a->sor_workt);
1204: PetscFree(a->sor_work);
1205: ISDestroy(&a->icol);
1206: PetscFree(a->saved_values);
1207: PetscFree2(a->compressedrow.i,a->compressedrow.rindex);
1209: MatDestroy(&a->sbaijMat);
1210: MatDestroy(&a->parent);
1211: PetscFree(A->data);
1213: PetscObjectChangeTypeName((PetscObject)A,0);
1214: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJGetArray_C",NULL);
1215: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJRestoreArray_C",NULL);
1216: PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);
1217: PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);
1218: PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);
1219: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);
1220: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);
1221: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);
1222: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);
1223: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);
1224: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);
1225: PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);
1226: #if defined(PETSC_HAVE_HYPRE)
1227: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);
1228: #endif
1229: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);
1230: PetscObjectComposeFunction((PetscObject)A,"MatPtAP_is_seqaij_C",NULL);
1231: return(0);
1232: }
1234: PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1235: {
1236: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1240: switch (op) {
1241: case MAT_ROW_ORIENTED:
1242: a->roworiented = flg;
1243: break;
1244: case MAT_KEEP_NONZERO_PATTERN:
1245: a->keepnonzeropattern = flg;
1246: break;
1247: case MAT_NEW_NONZERO_LOCATIONS:
1248: a->nonew = (flg ? 0 : 1);
1249: break;
1250: case MAT_NEW_NONZERO_LOCATION_ERR:
1251: a->nonew = (flg ? -1 : 0);
1252: break;
1253: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1254: a->nonew = (flg ? -2 : 0);
1255: break;
1256: case MAT_UNUSED_NONZERO_LOCATION_ERR:
1257: a->nounused = (flg ? -1 : 0);
1258: break;
1259: case MAT_NEW_DIAGONALS:
1260: case MAT_IGNORE_OFF_PROC_ENTRIES:
1261: case MAT_USE_HASH_TABLE:
1262: case MAT_SORTED_FULL:
1263: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1264: break;
1265: case MAT_SPD:
1266: case MAT_SYMMETRIC:
1267: case MAT_STRUCTURALLY_SYMMETRIC:
1268: case MAT_HERMITIAN:
1269: case MAT_SYMMETRY_ETERNAL:
1270: case MAT_SUBMAT_SINGLEIS:
1271: case MAT_STRUCTURE_ONLY:
1272: /* These options are handled directly by MatSetOption() */
1273: break;
1274: default:
1275: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1276: }
1277: return(0);
1278: }
1280: /* used for both SeqBAIJ and SeqSBAIJ matrices */
1281: PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1282: {
1284: PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1285: MatScalar *aa_i;
1286: PetscScalar *v_i;
1289: bs = A->rmap->bs;
1290: bs2 = bs*bs;
1291: if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1293: bn = row/bs; /* Block number */
1294: bp = row % bs; /* Block Position */
1295: M = ai[bn+1] - ai[bn];
1296: *nz = bs*M;
1298: if (v) {
1299: *v = 0;
1300: if (*nz) {
1301: PetscMalloc1(*nz,v);
1302: for (i=0; i<M; i++) { /* for each block in the block row */
1303: v_i = *v + i*bs;
1304: aa_i = aa + bs2*(ai[bn] + i);
1305: for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1306: }
1307: }
1308: }
1310: if (idx) {
1311: *idx = 0;
1312: if (*nz) {
1313: PetscMalloc1(*nz,idx);
1314: for (i=0; i<M; i++) { /* for each block in the block row */
1315: idx_i = *idx + i*bs;
1316: itmp = bs*aj[ai[bn] + i];
1317: for (j=0; j<bs; j++) idx_i[j] = itmp++;
1318: }
1319: }
1320: }
1321: return(0);
1322: }
1324: PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1325: {
1326: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1330: MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);
1331: return(0);
1332: }
1334: PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1335: {
1339: if (idx) {PetscFree(*idx);}
1340: if (v) {PetscFree(*v);}
1341: return(0);
1342: }
1344: PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1345: {
1346: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data,*at;
1347: Mat C;
1349: PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill;
1350: PetscInt bs2=a->bs2,*ati,*atj,anzj,kr;
1351: MatScalar *ata,*aa=a->a;
1354: PetscCalloc1(1+nbs,&atfill);
1355: if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1356: for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
1358: MatCreate(PetscObjectComm((PetscObject)A),&C);
1359: MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);
1360: MatSetType(C,((PetscObject)A)->type_name);
1361: MatSeqBAIJSetPreallocation(C,bs,0,atfill);
1363: at = (Mat_SeqBAIJ*)C->data;
1364: ati = at->i;
1365: for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i];
1366: } else {
1367: C = *B;
1368: at = (Mat_SeqBAIJ*)C->data;
1369: ati = at->i;
1370: }
1372: atj = at->j;
1373: ata = at->a;
1375: /* Copy ati into atfill so we have locations of the next free space in atj */
1376: PetscArraycpy(atfill,ati,nbs);
1378: /* Walk through A row-wise and mark nonzero entries of A^T. */
1379: for (i=0; i<mbs; i++) {
1380: anzj = ai[i+1] - ai[i];
1381: for (j=0; j<anzj; j++) {
1382: atj[atfill[*aj]] = i;
1383: for (kr=0; kr<bs; kr++) {
1384: for (k=0; k<bs; k++) {
1385: ata[bs2*atfill[*aj]+k*bs+kr] = *aa++;
1386: }
1387: }
1388: atfill[*aj++] += 1;
1389: }
1390: }
1391: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1392: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1394: /* Clean up temporary space and complete requests. */
1395: PetscFree(atfill);
1397: if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1398: MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));
1399: *B = C;
1400: } else {
1401: MatHeaderMerge(A,&C);
1402: }
1403: return(0);
1404: }
1406: PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f)
1407: {
1409: Mat Btrans;
1412: *f = PETSC_FALSE;
1413: MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);
1414: MatEqual_SeqBAIJ(B,Btrans,f);
1415: MatDestroy(&Btrans);
1416: return(0);
1417: }
1419: static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1420: {
1421: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1423: PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1424: int fd;
1425: PetscScalar *aa;
1426: FILE *file;
1429: PetscViewerBinaryGetDescriptor(viewer,&fd);
1430: PetscMalloc1(4+A->rmap->N,&col_lens);
1431: col_lens[0] = MAT_FILE_CLASSID;
1433: col_lens[1] = A->rmap->N;
1434: col_lens[2] = A->cmap->n;
1435: col_lens[3] = a->nz*bs2;
1437: /* store lengths of each row and write (including header) to file */
1438: count = 0;
1439: for (i=0; i<a->mbs; i++) {
1440: for (j=0; j<bs; j++) {
1441: col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1442: }
1443: }
1444: PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);
1445: PetscFree(col_lens);
1447: /* store column indices (zero start index) */
1448: PetscMalloc1((a->nz+1)*bs2,&jj);
1449: count = 0;
1450: for (i=0; i<a->mbs; i++) {
1451: for (j=0; j<bs; j++) {
1452: for (k=a->i[i]; k<a->i[i+1]; k++) {
1453: for (l=0; l<bs; l++) {
1454: jj[count++] = bs*a->j[k] + l;
1455: }
1456: }
1457: }
1458: }
1459: PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);
1460: PetscFree(jj);
1462: /* store nonzero values */
1463: PetscMalloc1((a->nz+1)*bs2,&aa);
1464: count = 0;
1465: for (i=0; i<a->mbs; i++) {
1466: for (j=0; j<bs; j++) {
1467: for (k=a->i[i]; k<a->i[i+1]; k++) {
1468: for (l=0; l<bs; l++) {
1469: aa[count++] = a->a[bs2*k + l*bs + j];
1470: }
1471: }
1472: }
1473: }
1474: PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);
1475: PetscFree(aa);
1477: PetscViewerBinaryGetInfoPointer(viewer,&file);
1478: if (file) {
1479: fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1480: }
1481: return(0);
1482: }
1484: static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)
1485: {
1487: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1488: PetscInt i,bs = A->rmap->bs,k;
1491: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1492: for (i=0; i<a->mbs; i++) {
1493: PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);
1494: for (k=a->i[i]; k<a->i[i+1]; k++) {
1495: PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);
1496: }
1497: PetscViewerASCIIPrintf(viewer,"\n");
1498: }
1499: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1500: return(0);
1501: }
1503: static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1504: {
1505: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1506: PetscErrorCode ierr;
1507: PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1508: PetscViewerFormat format;
1511: if (A->structure_only) {
1512: MatView_SeqBAIJ_ASCII_structonly(A,viewer);
1513: return(0);
1514: }
1516: PetscViewerGetFormat(viewer,&format);
1517: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1518: PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);
1519: } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1520: const char *matname;
1521: Mat aij;
1522: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);
1523: PetscObjectGetName((PetscObject)A,&matname);
1524: PetscObjectSetName((PetscObject)aij,matname);
1525: MatView(aij,viewer);
1526: MatDestroy(&aij);
1527: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1528: return(0);
1529: } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1530: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1531: for (i=0; i<a->mbs; i++) {
1532: for (j=0; j<bs; j++) {
1533: PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1534: for (k=a->i[i]; k<a->i[i+1]; k++) {
1535: for (l=0; l<bs; l++) {
1536: #if defined(PETSC_USE_COMPLEX)
1537: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1538: PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1539: (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1540: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1541: PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1542: (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1543: } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1544: PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));
1545: }
1546: #else
1547: if (a->a[bs2*k + l*bs + j] != 0.0) {
1548: PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);
1549: }
1550: #endif
1551: }
1552: }
1553: PetscViewerASCIIPrintf(viewer,"\n");
1554: }
1555: }
1556: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1557: } else {
1558: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1559: for (i=0; i<a->mbs; i++) {
1560: for (j=0; j<bs; j++) {
1561: PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1562: for (k=a->i[i]; k<a->i[i+1]; k++) {
1563: for (l=0; l<bs; l++) {
1564: #if defined(PETSC_USE_COMPLEX)
1565: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1566: PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1567: (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1568: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1569: PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1570: (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1571: } else {
1572: PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));
1573: }
1574: #else
1575: PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);
1576: #endif
1577: }
1578: }
1579: PetscViewerASCIIPrintf(viewer,"\n");
1580: }
1581: }
1582: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1583: }
1584: PetscViewerFlush(viewer);
1585: return(0);
1586: }
1588: #include <petscdraw.h>
1589: static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1590: {
1591: Mat A = (Mat) Aa;
1592: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
1593: PetscErrorCode ierr;
1594: PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1595: PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1596: MatScalar *aa;
1597: PetscViewer viewer;
1598: PetscViewerFormat format;
1601: PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);
1602: PetscViewerGetFormat(viewer,&format);
1603: PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);
1605: /* loop over matrix elements drawing boxes */
1607: if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1608: PetscDrawCollectiveBegin(draw);
1609: /* Blue for negative, Cyan for zero and Red for positive */
1610: color = PETSC_DRAW_BLUE;
1611: for (i=0,row=0; i<mbs; i++,row+=bs) {
1612: for (j=a->i[i]; j<a->i[i+1]; j++) {
1613: y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1614: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1615: aa = a->a + j*bs2;
1616: for (k=0; k<bs; k++) {
1617: for (l=0; l<bs; l++) {
1618: if (PetscRealPart(*aa++) >= 0.) continue;
1619: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1620: }
1621: }
1622: }
1623: }
1624: color = PETSC_DRAW_CYAN;
1625: for (i=0,row=0; i<mbs; i++,row+=bs) {
1626: for (j=a->i[i]; j<a->i[i+1]; j++) {
1627: y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1628: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1629: aa = a->a + j*bs2;
1630: for (k=0; k<bs; k++) {
1631: for (l=0; l<bs; l++) {
1632: if (PetscRealPart(*aa++) != 0.) continue;
1633: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1634: }
1635: }
1636: }
1637: }
1638: color = PETSC_DRAW_RED;
1639: for (i=0,row=0; i<mbs; i++,row+=bs) {
1640: for (j=a->i[i]; j<a->i[i+1]; j++) {
1641: y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1642: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1643: aa = a->a + j*bs2;
1644: for (k=0; k<bs; k++) {
1645: for (l=0; l<bs; l++) {
1646: if (PetscRealPart(*aa++) <= 0.) continue;
1647: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1648: }
1649: }
1650: }
1651: }
1652: PetscDrawCollectiveEnd(draw);
1653: } else {
1654: /* use contour shading to indicate magnitude of values */
1655: /* first determine max of all nonzero values */
1656: PetscReal minv = 0.0, maxv = 0.0;
1657: PetscDraw popup;
1659: for (i=0; i<a->nz*a->bs2; i++) {
1660: if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1661: }
1662: if (minv >= maxv) maxv = minv + PETSC_SMALL;
1663: PetscDrawGetPopup(draw,&popup);
1664: PetscDrawScalePopup(popup,0.0,maxv);
1666: PetscDrawCollectiveBegin(draw);
1667: for (i=0,row=0; i<mbs; i++,row+=bs) {
1668: for (j=a->i[i]; j<a->i[i+1]; j++) {
1669: y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1670: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1671: aa = a->a + j*bs2;
1672: for (k=0; k<bs; k++) {
1673: for (l=0; l<bs; l++) {
1674: MatScalar v = *aa++;
1675: color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1676: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1677: }
1678: }
1679: }
1680: }
1681: PetscDrawCollectiveEnd(draw);
1682: }
1683: return(0);
1684: }
1686: static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1687: {
1689: PetscReal xl,yl,xr,yr,w,h;
1690: PetscDraw draw;
1691: PetscBool isnull;
1694: PetscViewerDrawGetDraw(viewer,0,&draw);
1695: PetscDrawIsNull(draw,&isnull);
1696: if (isnull) return(0);
1698: xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1699: xr += w; yr += h; xl = -w; yl = -h;
1700: PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
1701: PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
1702: PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
1703: PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);
1704: PetscDrawSave(draw);
1705: return(0);
1706: }
1708: PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1709: {
1711: PetscBool iascii,isbinary,isdraw;
1714: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1715: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1716: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1717: if (iascii) {
1718: MatView_SeqBAIJ_ASCII(A,viewer);
1719: } else if (isbinary) {
1720: MatView_SeqBAIJ_Binary(A,viewer);
1721: } else if (isdraw) {
1722: MatView_SeqBAIJ_Draw(A,viewer);
1723: } else {
1724: Mat B;
1725: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
1726: MatView(B,viewer);
1727: MatDestroy(&B);
1728: }
1729: return(0);
1730: }
1733: PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1734: {
1735: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1736: PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1737: PetscInt *ai = a->i,*ailen = a->ilen;
1738: PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1739: MatScalar *ap,*aa = a->a;
1742: for (k=0; k<m; k++) { /* loop over rows */
1743: row = im[k]; brow = row/bs;
1744: if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1745: if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1746: rp = aj + ai[brow]; ap = aa + bs2*ai[brow];
1747: nrow = ailen[brow];
1748: for (l=0; l<n; l++) { /* loop over columns */
1749: if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1750: if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1751: col = in[l];
1752: bcol = col/bs;
1753: cidx = col%bs;
1754: ridx = row%bs;
1755: high = nrow;
1756: low = 0; /* assume unsorted */
1757: while (high-low > 5) {
1758: t = (low+high)/2;
1759: if (rp[t] > bcol) high = t;
1760: else low = t;
1761: }
1762: for (i=low; i<high; i++) {
1763: if (rp[i] > bcol) break;
1764: if (rp[i] == bcol) {
1765: *v++ = ap[bs2*i+bs*cidx+ridx];
1766: goto finished;
1767: }
1768: }
1769: *v++ = 0.0;
1770: finished:;
1771: }
1772: }
1773: return(0);
1774: }
1776: PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1777: {
1778: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1779: PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1780: PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1781: PetscErrorCode ierr;
1782: PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1783: PetscBool roworiented=a->roworiented;
1784: const PetscScalar *value = v;
1785: MatScalar *ap=NULL,*aa = a->a,*bap;
1788: if (roworiented) {
1789: stepval = (n-1)*bs;
1790: } else {
1791: stepval = (m-1)*bs;
1792: }
1793: for (k=0; k<m; k++) { /* loop over added rows */
1794: row = im[k];
1795: if (row < 0) continue;
1796: #if defined(PETSC_USE_DEBUG)
1797: if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1798: #endif
1799: rp = aj + ai[row];
1800: if (!A->structure_only) ap = aa + bs2*ai[row];
1801: rmax = imax[row];
1802: nrow = ailen[row];
1803: low = 0;
1804: high = nrow;
1805: for (l=0; l<n; l++) { /* loop over added columns */
1806: if (in[l] < 0) continue;
1807: #if defined(PETSC_USE_DEBUG)
1808: if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1809: #endif
1810: col = in[l];
1811: if (!A->structure_only) {
1812: if (roworiented) {
1813: value = v + (k*(stepval+bs) + l)*bs;
1814: } else {
1815: value = v + (l*(stepval+bs) + k)*bs;
1816: }
1817: }
1818: if (col <= lastcol) low = 0;
1819: else high = nrow;
1820: lastcol = col;
1821: while (high-low > 7) {
1822: t = (low+high)/2;
1823: if (rp[t] > col) high = t;
1824: else low = t;
1825: }
1826: for (i=low; i<high; i++) {
1827: if (rp[i] > col) break;
1828: if (rp[i] == col) {
1829: if (A->structure_only) goto noinsert2;
1830: bap = ap + bs2*i;
1831: if (roworiented) {
1832: if (is == ADD_VALUES) {
1833: for (ii=0; ii<bs; ii++,value+=stepval) {
1834: for (jj=ii; jj<bs2; jj+=bs) {
1835: bap[jj] += *value++;
1836: }
1837: }
1838: } else {
1839: for (ii=0; ii<bs; ii++,value+=stepval) {
1840: for (jj=ii; jj<bs2; jj+=bs) {
1841: bap[jj] = *value++;
1842: }
1843: }
1844: }
1845: } else {
1846: if (is == ADD_VALUES) {
1847: for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1848: for (jj=0; jj<bs; jj++) {
1849: bap[jj] += value[jj];
1850: }
1851: bap += bs;
1852: }
1853: } else {
1854: for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1855: for (jj=0; jj<bs; jj++) {
1856: bap[jj] = value[jj];
1857: }
1858: bap += bs;
1859: }
1860: }
1861: }
1862: goto noinsert2;
1863: }
1864: }
1865: if (nonew == 1) goto noinsert2;
1866: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1867: if (A->structure_only) {
1868: MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar);
1869: } else {
1870: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1871: }
1872: N = nrow++ - 1; high++;
1873: /* shift up all the later entries in this row */
1874: PetscArraymove(rp+i+1,rp+i,N-i+1);
1875: rp[i] = col;
1876: if (!A->structure_only) {
1877: PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));
1878: bap = ap + bs2*i;
1879: if (roworiented) {
1880: for (ii=0; ii<bs; ii++,value+=stepval) {
1881: for (jj=ii; jj<bs2; jj+=bs) {
1882: bap[jj] = *value++;
1883: }
1884: }
1885: } else {
1886: for (ii=0; ii<bs; ii++,value+=stepval) {
1887: for (jj=0; jj<bs; jj++) {
1888: *bap++ = *value++;
1889: }
1890: }
1891: }
1892: }
1893: noinsert2:;
1894: low = i;
1895: }
1896: ailen[row] = nrow;
1897: }
1898: return(0);
1899: }
1901: PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1902: {
1903: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1904: PetscInt fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax;
1905: PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen;
1907: PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1908: MatScalar *aa = a->a,*ap;
1909: PetscReal ratio=0.6;
1912: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
1914: if (m) rmax = ailen[0];
1915: for (i=1; i<mbs; i++) {
1916: /* move each row back by the amount of empty slots (fshift) before it*/
1917: fshift += imax[i-1] - ailen[i-1];
1918: rmax = PetscMax(rmax,ailen[i]);
1919: if (fshift) {
1920: ip = aj + ai[i];
1921: ap = aa + bs2*ai[i];
1922: N = ailen[i];
1923: PetscArraymove(ip-fshift,ip,N);
1924: if (!A->structure_only) {
1925: PetscArraymove(ap-bs2*fshift,ap,bs2*N);
1926: }
1927: }
1928: ai[i] = ai[i-1] + ailen[i-1];
1929: }
1930: if (mbs) {
1931: fshift += imax[mbs-1] - ailen[mbs-1];
1932: ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1933: }
1935: /* reset ilen and imax for each row */
1936: a->nonzerorowcnt = 0;
1937: if (A->structure_only) {
1938: PetscFree2(a->imax,a->ilen);
1939: } else { /* !A->structure_only */
1940: for (i=0; i<mbs; i++) {
1941: ailen[i] = imax[i] = ai[i+1] - ai[i];
1942: a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1943: }
1944: }
1945: a->nz = ai[mbs];
1947: /* diagonals may have moved, so kill the diagonal pointers */
1948: a->idiagvalid = PETSC_FALSE;
1949: if (fshift && a->diag) {
1950: PetscFree(a->diag);
1951: PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));
1952: a->diag = 0;
1953: }
1954: if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1955: PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);
1956: PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);
1957: PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);
1959: A->info.mallocs += a->reallocs;
1960: a->reallocs = 0;
1961: A->info.nz_unneeded = (PetscReal)fshift*bs2;
1962: a->rmax = rmax;
1964: if (!A->structure_only) {
1965: MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);
1966: }
1967: return(0);
1968: }
1970: /*
1971: This function returns an array of flags which indicate the locations of contiguous
1972: blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9]
1973: then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1974: Assume: sizes should be long enough to hold all the values.
1975: */
1976: static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1977: {
1978: PetscInt i,j,k,row;
1979: PetscBool flg;
1982: for (i=0,j=0; i<n; j++) {
1983: row = idx[i];
1984: if (row%bs!=0) { /* Not the begining of a block */
1985: sizes[j] = 1;
1986: i++;
1987: } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1988: sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */
1989: i++;
1990: } else { /* Begining of the block, so check if the complete block exists */
1991: flg = PETSC_TRUE;
1992: for (k=1; k<bs; k++) {
1993: if (row+k != idx[i+k]) { /* break in the block */
1994: flg = PETSC_FALSE;
1995: break;
1996: }
1997: }
1998: if (flg) { /* No break in the bs */
1999: sizes[j] = bs;
2000: i += bs;
2001: } else {
2002: sizes[j] = 1;
2003: i++;
2004: }
2005: }
2006: }
2007: *bs_max = j;
2008: return(0);
2009: }
2011: PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2012: {
2013: Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
2014: PetscErrorCode ierr;
2015: PetscInt i,j,k,count,*rows;
2016: PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2017: PetscScalar zero = 0.0;
2018: MatScalar *aa;
2019: const PetscScalar *xx;
2020: PetscScalar *bb;
2023: /* fix right hand side if needed */
2024: if (x && b) {
2025: VecGetArrayRead(x,&xx);
2026: VecGetArray(b,&bb);
2027: for (i=0; i<is_n; i++) {
2028: bb[is_idx[i]] = diag*xx[is_idx[i]];
2029: }
2030: VecRestoreArrayRead(x,&xx);
2031: VecRestoreArray(b,&bb);
2032: }
2034: /* Make a copy of the IS and sort it */
2035: /* allocate memory for rows,sizes */
2036: PetscMalloc2(is_n,&rows,2*is_n,&sizes);
2038: /* copy IS values to rows, and sort them */
2039: for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2040: PetscSortInt(is_n,rows);
2042: if (baij->keepnonzeropattern) {
2043: for (i=0; i<is_n; i++) sizes[i] = 1;
2044: bs_max = is_n;
2045: } else {
2046: MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);
2047: A->nonzerostate++;
2048: }
2050: for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2051: row = rows[j];
2052: if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2053: count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2054: aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2055: if (sizes[i] == bs && !baij->keepnonzeropattern) {
2056: if (diag != (PetscScalar)0.0) {
2057: if (baij->ilen[row/bs] > 0) {
2058: baij->ilen[row/bs] = 1;
2059: baij->j[baij->i[row/bs]] = row/bs;
2061: PetscArrayzero(aa,count*bs);
2062: }
2063: /* Now insert all the diagonal values for this bs */
2064: for (k=0; k<bs; k++) {
2065: (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);
2066: }
2067: } else { /* (diag == 0.0) */
2068: baij->ilen[row/bs] = 0;
2069: } /* end (diag == 0.0) */
2070: } else { /* (sizes[i] != bs) */
2071: #if defined(PETSC_USE_DEBUG)
2072: if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2073: #endif
2074: for (k=0; k<count; k++) {
2075: aa[0] = zero;
2076: aa += bs;
2077: }
2078: if (diag != (PetscScalar)0.0) {
2079: (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);
2080: }
2081: }
2082: }
2084: PetscFree2(rows,sizes);
2085: MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
2086: return(0);
2087: }
2089: PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2090: {
2091: Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
2092: PetscErrorCode ierr;
2093: PetscInt i,j,k,count;
2094: PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col;
2095: PetscScalar zero = 0.0;
2096: MatScalar *aa;
2097: const PetscScalar *xx;
2098: PetscScalar *bb;
2099: PetscBool *zeroed,vecs = PETSC_FALSE;
2102: /* fix right hand side if needed */
2103: if (x && b) {
2104: VecGetArrayRead(x,&xx);
2105: VecGetArray(b,&bb);
2106: vecs = PETSC_TRUE;
2107: }
2109: /* zero the columns */
2110: PetscCalloc1(A->rmap->n,&zeroed);
2111: for (i=0; i<is_n; i++) {
2112: if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2113: zeroed[is_idx[i]] = PETSC_TRUE;
2114: }
2115: for (i=0; i<A->rmap->N; i++) {
2116: if (!zeroed[i]) {
2117: row = i/bs;
2118: for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2119: for (k=0; k<bs; k++) {
2120: col = bs*baij->j[j] + k;
2121: if (zeroed[col]) {
2122: aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2123: if (vecs) bb[i] -= aa[0]*xx[col];
2124: aa[0] = 0.0;
2125: }
2126: }
2127: }
2128: } else if (vecs) bb[i] = diag*xx[i];
2129: }
2130: PetscFree(zeroed);
2131: if (vecs) {
2132: VecRestoreArrayRead(x,&xx);
2133: VecRestoreArray(b,&bb);
2134: }
2136: /* zero the rows */
2137: for (i=0; i<is_n; i++) {
2138: row = is_idx[i];
2139: count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2140: aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2141: for (k=0; k<count; k++) {
2142: aa[0] = zero;
2143: aa += bs;
2144: }
2145: if (diag != (PetscScalar)0.0) {
2146: (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);
2147: }
2148: }
2149: MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
2150: return(0);
2151: }
2153: PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2154: {
2155: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2156: PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2157: PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2158: PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2160: PetscInt ridx,cidx,bs2=a->bs2;
2161: PetscBool roworiented=a->roworiented;
2162: MatScalar *ap=NULL,value=0.0,*aa=a->a,*bap;
2165: for (k=0; k<m; k++) { /* loop over added rows */
2166: row = im[k];
2167: brow = row/bs;
2168: if (row < 0) continue;
2169: #if defined(PETSC_USE_DEBUG)
2170: if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2171: #endif
2172: rp = aj + ai[brow];
2173: if (!A->structure_only) ap = aa + bs2*ai[brow];
2174: rmax = imax[brow];
2175: nrow = ailen[brow];
2176: low = 0;
2177: high = nrow;
2178: for (l=0; l<n; l++) { /* loop over added columns */
2179: if (in[l] < 0) continue;
2180: #if defined(PETSC_USE_DEBUG)
2181: if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2182: #endif
2183: col = in[l]; bcol = col/bs;
2184: ridx = row % bs; cidx = col % bs;
2185: if (!A->structure_only) {
2186: if (roworiented) {
2187: value = v[l + k*n];
2188: } else {
2189: value = v[k + l*m];
2190: }
2191: }
2192: if (col <= lastcol) low = 0; else high = nrow;
2193: lastcol = col;
2194: while (high-low > 7) {
2195: t = (low+high)/2;
2196: if (rp[t] > bcol) high = t;
2197: else low = t;
2198: }
2199: for (i=low; i<high; i++) {
2200: if (rp[i] > bcol) break;
2201: if (rp[i] == bcol) {
2202: bap = ap + bs2*i + bs*cidx + ridx;
2203: if (!A->structure_only) {
2204: if (is == ADD_VALUES) *bap += value;
2205: else *bap = value;
2206: }
2207: goto noinsert1;
2208: }
2209: }
2210: if (nonew == 1) goto noinsert1;
2211: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2212: if (A->structure_only) {
2213: MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar);
2214: } else {
2215: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2216: }
2217: N = nrow++ - 1; high++;
2218: /* shift up all the later entries in this row */
2219: PetscArraymove(rp+i+1,rp+i,N-i+1);
2220: rp[i] = bcol;
2221: if (!A->structure_only) {
2222: PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));
2223: PetscArrayzero(ap+bs2*i,bs2);
2224: ap[bs2*i + bs*cidx + ridx] = value;
2225: }
2226: a->nz++;
2227: A->nonzerostate++;
2228: noinsert1:;
2229: low = i;
2230: }
2231: ailen[brow] = nrow;
2232: }
2233: return(0);
2234: }
2236: PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2237: {
2238: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
2239: Mat outA;
2241: PetscBool row_identity,col_identity;
2244: if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2245: ISIdentity(row,&row_identity);
2246: ISIdentity(col,&col_identity);
2247: if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2249: outA = inA;
2250: inA->factortype = MAT_FACTOR_LU;
2251: PetscFree(inA->solvertype);
2252: PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);
2254: MatMarkDiagonal_SeqBAIJ(inA);
2256: PetscObjectReference((PetscObject)row);
2257: ISDestroy(&a->row);
2258: a->row = row;
2259: PetscObjectReference((PetscObject)col);
2260: ISDestroy(&a->col);
2261: a->col = col;
2263: /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2264: ISDestroy(&a->icol);
2265: ISInvertPermutation(col,PETSC_DECIDE,&a->icol);
2266: PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);
2268: MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));
2269: if (!a->solve_work) {
2270: PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);
2271: PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));
2272: }
2273: MatLUFactorNumeric(outA,inA,info);
2274: return(0);
2275: }
2277: PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2278: {
2279: Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2280: PetscInt i,nz,mbs;
2283: nz = baij->maxnz;
2284: mbs = baij->mbs;
2285: for (i=0; i<nz; i++) {
2286: baij->j[i] = indices[i];
2287: }
2288: baij->nz = nz;
2289: for (i=0; i<mbs; i++) {
2290: baij->ilen[i] = baij->imax[i];
2291: }
2292: return(0);
2293: }
2295: /*@
2296: MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2297: in the matrix.
2299: Input Parameters:
2300: + mat - the SeqBAIJ matrix
2301: - indices - the column indices
2303: Level: advanced
2305: Notes:
2306: This can be called if you have precomputed the nonzero structure of the
2307: matrix and want to provide it to the matrix object to improve the performance
2308: of the MatSetValues() operation.
2310: You MUST have set the correct numbers of nonzeros per row in the call to
2311: MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2313: MUST be called before any calls to MatSetValues();
2315: @*/
2316: PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2317: {
2323: PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));
2324: return(0);
2325: }
2327: PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2328: {
2329: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2331: PetscInt i,j,n,row,bs,*ai,*aj,mbs;
2332: PetscReal atmp;
2333: PetscScalar *x,zero = 0.0;
2334: MatScalar *aa;
2335: PetscInt ncols,brow,krow,kcol;
2338: if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2339: bs = A->rmap->bs;
2340: aa = a->a;
2341: ai = a->i;
2342: aj = a->j;
2343: mbs = a->mbs;
2345: VecSet(v,zero);
2346: VecGetArray(v,&x);
2347: VecGetLocalSize(v,&n);
2348: if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2349: for (i=0; i<mbs; i++) {
2350: ncols = ai[1] - ai[0]; ai++;
2351: brow = bs*i;
2352: for (j=0; j<ncols; j++) {
2353: for (kcol=0; kcol<bs; kcol++) {
2354: for (krow=0; krow<bs; krow++) {
2355: atmp = PetscAbsScalar(*aa);aa++;
2356: row = brow + krow; /* row index */
2357: if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2358: }
2359: }
2360: aj++;
2361: }
2362: }
2363: VecRestoreArray(v,&x);
2364: return(0);
2365: }
2367: PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2368: {
2372: /* If the two matrices have the same copy implementation, use fast copy. */
2373: if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2374: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2375: Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2376: PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2378: if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2379: if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2380: PetscArraycpy(b->a,a->a,bs2*a->i[ambs]);
2381: PetscObjectStateIncrease((PetscObject)B);
2382: } else {
2383: MatCopy_Basic(A,B,str);
2384: }
2385: return(0);
2386: }
2388: PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2389: {
2393: MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);
2394: return(0);
2395: }
2397: static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2398: {
2399: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2402: *array = a->a;
2403: return(0);
2404: }
2406: static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2407: {
2409: *array = NULL;
2410: return(0);
2411: }
2413: PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2414: {
2415: PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2416: Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data;
2417: Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data;
2421: /* Set the number of nonzeros in the new matrix */
2422: MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);
2423: return(0);
2424: }
2426: PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2427: {
2428: Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2430: PetscInt bs=Y->rmap->bs,bs2=bs*bs;
2431: PetscBLASInt one=1;
2434: if (str == SAME_NONZERO_PATTERN) {
2435: PetscScalar alpha = a;
2436: PetscBLASInt bnz;
2437: PetscBLASIntCast(x->nz*bs2,&bnz);
2438: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2439: PetscObjectStateIncrease((PetscObject)Y);
2440: } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2441: MatAXPY_Basic(Y,a,X,str);
2442: } else {
2443: Mat B;
2444: PetscInt *nnz;
2445: if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2446: PetscMalloc1(Y->rmap->N,&nnz);
2447: MatCreate(PetscObjectComm((PetscObject)Y),&B);
2448: PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);
2449: MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);
2450: MatSetBlockSizesFromMats(B,Y,Y);
2451: MatSetType(B,(MatType) ((PetscObject)Y)->type_name);
2452: MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);
2453: MatSeqBAIJSetPreallocation(B,bs,0,nnz);
2454: MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
2455: MatHeaderReplace(Y,&B);
2456: PetscFree(nnz);
2457: }
2458: return(0);
2459: }
2461: PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2462: {
2463: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2464: PetscInt i,nz = a->bs2*a->i[a->mbs];
2465: MatScalar *aa = a->a;
2468: for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2469: return(0);
2470: }
2472: PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2473: {
2474: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2475: PetscInt i,nz = a->bs2*a->i[a->mbs];
2476: MatScalar *aa = a->a;
2479: for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2480: return(0);
2481: }
2483: /*
2484: Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2485: */
2486: PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done)
2487: {
2488: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2490: PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2491: PetscInt nz = a->i[m],row,*jj,mr,col;
2494: *nn = n;
2495: if (!ia) return(0);
2496: if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2497: else {
2498: PetscCalloc1(n,&collengths);
2499: PetscMalloc1(n+1,&cia);
2500: PetscMalloc1(nz,&cja);
2501: jj = a->j;
2502: for (i=0; i<nz; i++) {
2503: collengths[jj[i]]++;
2504: }
2505: cia[0] = oshift;
2506: for (i=0; i<n; i++) {
2507: cia[i+1] = cia[i] + collengths[i];
2508: }
2509: PetscArrayzero(collengths,n);
2510: jj = a->j;
2511: for (row=0; row<m; row++) {
2512: mr = a->i[row+1] - a->i[row];
2513: for (i=0; i<mr; i++) {
2514: col = *jj++;
2516: cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2517: }
2518: }
2519: PetscFree(collengths);
2520: *ia = cia; *ja = cja;
2521: }
2522: return(0);
2523: }
2525: PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done)
2526: {
2530: if (!ia) return(0);
2531: PetscFree(*ia);
2532: PetscFree(*ja);
2533: return(0);
2534: }
2536: /*
2537: MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2538: MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2539: spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2540: */
2541: PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done)
2542: {
2543: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2545: PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2546: PetscInt nz = a->i[m],row,*jj,mr,col;
2547: PetscInt *cspidx;
2550: *nn = n;
2551: if (!ia) return(0);
2553: PetscCalloc1(n,&collengths);
2554: PetscMalloc1(n+1,&cia);
2555: PetscMalloc1(nz,&cja);
2556: PetscMalloc1(nz,&cspidx);
2557: jj = a->j;
2558: for (i=0; i<nz; i++) {
2559: collengths[jj[i]]++;
2560: }
2561: cia[0] = oshift;
2562: for (i=0; i<n; i++) {
2563: cia[i+1] = cia[i] + collengths[i];
2564: }
2565: PetscArrayzero(collengths,n);
2566: jj = a->j;
2567: for (row=0; row<m; row++) {
2568: mr = a->i[row+1] - a->i[row];
2569: for (i=0; i<mr; i++) {
2570: col = *jj++;
2571: cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2572: cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2573: }
2574: }
2575: PetscFree(collengths);
2576: *ia = cia;
2577: *ja = cja;
2578: *spidx = cspidx;
2579: return(0);
2580: }
2582: PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done)
2583: {
2587: MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);
2588: PetscFree(*spidx);
2589: return(0);
2590: }
2592: PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2593: {
2595: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data;
2598: if (!Y->preallocated || !aij->nz) {
2599: MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);
2600: }
2601: MatShift_Basic(Y,a);
2602: return(0);
2603: }
2605: /* -------------------------------------------------------------------*/
2606: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2607: MatGetRow_SeqBAIJ,
2608: MatRestoreRow_SeqBAIJ,
2609: MatMult_SeqBAIJ_N,
2610: /* 4*/ MatMultAdd_SeqBAIJ_N,
2611: MatMultTranspose_SeqBAIJ,
2612: MatMultTransposeAdd_SeqBAIJ,
2613: 0,
2614: 0,
2615: 0,
2616: /* 10*/ 0,
2617: MatLUFactor_SeqBAIJ,
2618: 0,
2619: 0,
2620: MatTranspose_SeqBAIJ,
2621: /* 15*/ MatGetInfo_SeqBAIJ,
2622: MatEqual_SeqBAIJ,
2623: MatGetDiagonal_SeqBAIJ,
2624: MatDiagonalScale_SeqBAIJ,
2625: MatNorm_SeqBAIJ,
2626: /* 20*/ 0,
2627: MatAssemblyEnd_SeqBAIJ,
2628: MatSetOption_SeqBAIJ,
2629: MatZeroEntries_SeqBAIJ,
2630: /* 24*/ MatZeroRows_SeqBAIJ,
2631: 0,
2632: 0,
2633: 0,
2634: 0,
2635: /* 29*/ MatSetUp_SeqBAIJ,
2636: 0,
2637: 0,
2638: 0,
2639: 0,
2640: /* 34*/ MatDuplicate_SeqBAIJ,
2641: 0,
2642: 0,
2643: MatILUFactor_SeqBAIJ,
2644: 0,
2645: /* 39*/ MatAXPY_SeqBAIJ,
2646: MatCreateSubMatrices_SeqBAIJ,
2647: MatIncreaseOverlap_SeqBAIJ,
2648: MatGetValues_SeqBAIJ,
2649: MatCopy_SeqBAIJ,
2650: /* 44*/ 0,
2651: MatScale_SeqBAIJ,
2652: MatShift_SeqBAIJ,
2653: 0,
2654: MatZeroRowsColumns_SeqBAIJ,
2655: /* 49*/ 0,
2656: MatGetRowIJ_SeqBAIJ,
2657: MatRestoreRowIJ_SeqBAIJ,
2658: MatGetColumnIJ_SeqBAIJ,
2659: MatRestoreColumnIJ_SeqBAIJ,
2660: /* 54*/ MatFDColoringCreate_SeqXAIJ,
2661: 0,
2662: 0,
2663: 0,
2664: MatSetValuesBlocked_SeqBAIJ,
2665: /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2666: MatDestroy_SeqBAIJ,
2667: MatView_SeqBAIJ,
2668: 0,
2669: 0,
2670: /* 64*/ 0,
2671: 0,
2672: 0,
2673: 0,
2674: 0,
2675: /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2676: 0,
2677: MatConvert_Basic,
2678: 0,
2679: 0,
2680: /* 74*/ 0,
2681: MatFDColoringApply_BAIJ,
2682: 0,
2683: 0,
2684: 0,
2685: /* 79*/ 0,
2686: 0,
2687: 0,
2688: 0,
2689: MatLoad_SeqBAIJ,
2690: /* 84*/ 0,
2691: 0,
2692: 0,
2693: 0,
2694: 0,
2695: /* 89*/ 0,
2696: 0,
2697: 0,
2698: 0,
2699: 0,
2700: /* 94*/ 0,
2701: 0,
2702: 0,
2703: 0,
2704: 0,
2705: /* 99*/ 0,
2706: 0,
2707: 0,
2708: 0,
2709: 0,
2710: /*104*/ 0,
2711: MatRealPart_SeqBAIJ,
2712: MatImaginaryPart_SeqBAIJ,
2713: 0,
2714: 0,
2715: /*109*/ 0,
2716: 0,
2717: 0,
2718: 0,
2719: MatMissingDiagonal_SeqBAIJ,
2720: /*114*/ 0,
2721: 0,
2722: 0,
2723: 0,
2724: 0,
2725: /*119*/ 0,
2726: 0,
2727: MatMultHermitianTranspose_SeqBAIJ,
2728: MatMultHermitianTransposeAdd_SeqBAIJ,
2729: 0,
2730: /*124*/ 0,
2731: 0,
2732: MatInvertBlockDiagonal_SeqBAIJ,
2733: 0,
2734: 0,
2735: /*129*/ 0,
2736: 0,
2737: 0,
2738: 0,
2739: 0,
2740: /*134*/ 0,
2741: 0,
2742: 0,
2743: 0,
2744: 0,
2745: /*139*/ MatSetBlockSizes_Default,
2746: 0,
2747: 0,
2748: MatFDColoringSetUp_SeqXAIJ,
2749: 0,
2750: /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
2751: MatDestroySubMatrices_SeqBAIJ
2752: };
2754: PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
2755: {
2756: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data;
2757: PetscInt nz = aij->i[aij->mbs]*aij->bs2;
2761: if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2763: /* allocate space for values if not already there */
2764: if (!aij->saved_values) {
2765: PetscMalloc1(nz+1,&aij->saved_values);
2766: PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));
2767: }
2769: /* copy values over */
2770: PetscArraycpy(aij->saved_values,aij->a,nz);
2771: return(0);
2772: }
2774: PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
2775: {
2776: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data;
2778: PetscInt nz = aij->i[aij->mbs]*aij->bs2;
2781: if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2782: if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2784: /* copy values over */
2785: PetscArraycpy(aij->a,aij->saved_values,nz);
2786: return(0);
2787: }
2789: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2790: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2792: PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2793: {
2794: Mat_SeqBAIJ *b;
2796: PetscInt i,mbs,nbs,bs2;
2797: PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2800: if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2801: if (nz == MAT_SKIP_ALLOCATION) {
2802: skipallocation = PETSC_TRUE;
2803: nz = 0;
2804: }
2806: MatSetBlockSize(B,PetscAbs(bs));
2807: PetscLayoutSetUp(B->rmap);
2808: PetscLayoutSetUp(B->cmap);
2809: PetscLayoutGetBlockSize(B->rmap,&bs);
2811: B->preallocated = PETSC_TRUE;
2813: mbs = B->rmap->n/bs;
2814: nbs = B->cmap->n/bs;
2815: bs2 = bs*bs;
2817: if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2819: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2820: if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2821: if (nnz) {
2822: for (i=0; i<mbs; i++) {
2823: if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2824: if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2825: }
2826: }
2828: b = (Mat_SeqBAIJ*)B->data;
2829: PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");
2830: PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);
2831: PetscOptionsEnd();
2833: if (!flg) {
2834: switch (bs) {
2835: case 1:
2836: B->ops->mult = MatMult_SeqBAIJ_1;
2837: B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2838: break;
2839: case 2:
2840: B->ops->mult = MatMult_SeqBAIJ_2;
2841: B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2842: break;
2843: case 3:
2844: B->ops->mult = MatMult_SeqBAIJ_3;
2845: B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2846: break;
2847: case 4:
2848: B->ops->mult = MatMult_SeqBAIJ_4;
2849: B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2850: break;
2851: case 5:
2852: B->ops->mult = MatMult_SeqBAIJ_5;
2853: B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2854: break;
2855: case 6:
2856: B->ops->mult = MatMult_SeqBAIJ_6;
2857: B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2858: break;
2859: case 7:
2860: B->ops->mult = MatMult_SeqBAIJ_7;
2861: B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2862: break;
2863: case 9:
2864: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2865: B->ops->mult = MatMult_SeqBAIJ_9_AVX2;
2866: B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
2867: #else
2868: B->ops->mult = MatMult_SeqBAIJ_N;
2869: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2870: #endif
2871: break;
2872: case 11:
2873: B->ops->mult = MatMult_SeqBAIJ_11;
2874: B->ops->multadd = MatMultAdd_SeqBAIJ_11;
2875: break;
2876: case 15:
2877: B->ops->mult = MatMult_SeqBAIJ_15_ver1;
2878: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2879: break;
2880: default:
2881: B->ops->mult = MatMult_SeqBAIJ_N;
2882: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2883: break;
2884: }
2885: }
2886: B->ops->sor = MatSOR_SeqBAIJ;
2887: b->mbs = mbs;
2888: b->nbs = nbs;
2889: if (!skipallocation) {
2890: if (!b->imax) {
2891: PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);
2892: PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));
2894: b->free_imax_ilen = PETSC_TRUE;
2895: }
2896: /* b->ilen will count nonzeros in each block row so far. */
2897: for (i=0; i<mbs; i++) b->ilen[i] = 0;
2898: if (!nnz) {
2899: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2900: else if (nz < 0) nz = 1;
2901: nz = PetscMin(nz,nbs);
2902: for (i=0; i<mbs; i++) b->imax[i] = nz;
2903: nz = nz*mbs;
2904: } else {
2905: PetscInt64 nz64 = 0;
2906: for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz64 += nnz[i];}
2907: PetscIntCast(nz64,&nz);
2908: }
2910: /* allocate the matrix space */
2911: MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);
2912: if (B->structure_only) {
2913: PetscMalloc1(nz,&b->j);
2914: PetscMalloc1(B->rmap->N+1,&b->i);
2915: PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));
2916: } else {
2917: PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);
2918: PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));
2919: PetscArrayzero(b->a,nz*bs2);
2920: }
2921: PetscArrayzero(b->j,nz);
2923: if (B->structure_only) {
2924: b->singlemalloc = PETSC_FALSE;
2925: b->free_a = PETSC_FALSE;
2926: } else {
2927: b->singlemalloc = PETSC_TRUE;
2928: b->free_a = PETSC_TRUE;
2929: }
2930: b->free_ij = PETSC_TRUE;
2932: b->i[0] = 0;
2933: for (i=1; i<mbs+1; i++) {
2934: b->i[i] = b->i[i-1] + b->imax[i-1];
2935: }
2937: } else {
2938: b->free_a = PETSC_FALSE;
2939: b->free_ij = PETSC_FALSE;
2940: }
2942: b->bs2 = bs2;
2943: b->mbs = mbs;
2944: b->nz = 0;
2945: b->maxnz = nz;
2946: B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2947: B->was_assembled = PETSC_FALSE;
2948: B->assembled = PETSC_FALSE;
2949: if (realalloc) {MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);}
2950: return(0);
2951: }
2953: PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2954: {
2955: PetscInt i,m,nz,nz_max=0,*nnz;
2956: PetscScalar *values=0;
2957: PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2961: if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2962: PetscLayoutSetBlockSize(B->rmap,bs);
2963: PetscLayoutSetBlockSize(B->cmap,bs);
2964: PetscLayoutSetUp(B->rmap);
2965: PetscLayoutSetUp(B->cmap);
2966: PetscLayoutGetBlockSize(B->rmap,&bs);
2967: m = B->rmap->n/bs;
2969: if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2970: PetscMalloc1(m+1, &nnz);
2971: for (i=0; i<m; i++) {
2972: nz = ii[i+1]- ii[i];
2973: if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2974: nz_max = PetscMax(nz_max, nz);
2975: nnz[i] = nz;
2976: }
2977: MatSeqBAIJSetPreallocation(B,bs,0,nnz);
2978: PetscFree(nnz);
2980: values = (PetscScalar*)V;
2981: if (!values) {
2982: PetscCalloc1(bs*bs*(nz_max+1),&values);
2983: }
2984: for (i=0; i<m; i++) {
2985: PetscInt ncols = ii[i+1] - ii[i];
2986: const PetscInt *icols = jj + ii[i];
2987: if (bs == 1 || !roworiented) {
2988: const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2989: MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);
2990: } else {
2991: PetscInt j;
2992: for (j=0; j<ncols; j++) {
2993: const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2994: MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);
2995: }
2996: }
2997: }
2998: if (!V) { PetscFree(values); }
2999: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3000: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3001: MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3002: return(0);
3003: }
3005: /*@C
3006: MatSeqBAIJGetArray - gives access to the array where the data for a MATSEQBAIJ matrix is stored
3008: Not Collective
3010: Input Parameter:
3011: . mat - a MATSEQBAIJ matrix
3013: Output Parameter:
3014: . array - pointer to the data
3016: Level: intermediate
3018: .seealso: MatSeqBAIJRestoreArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray()
3019: @*/
3020: PetscErrorCode MatSeqBAIJGetArray(Mat A,PetscScalar **array)
3021: {
3025: PetscUseMethod(A,"MatSeqBAIJGetArray_C",(Mat,PetscScalar**),(A,array));
3026: return(0);
3027: }
3029: /*@C
3030: MatSeqBAIJRestoreArray - returns access to the array where the data for a MATSEQBAIJ matrix is stored obtained by MatSeqBAIJGetArray()
3032: Not Collective
3034: Input Parameters:
3035: + mat - a MATSEQBAIJ matrix
3036: - array - pointer to the data
3038: Level: intermediate
3040: .seealso: MatSeqBAIJGetArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray()
3041: @*/
3042: PetscErrorCode MatSeqBAIJRestoreArray(Mat A,PetscScalar **array)
3043: {
3047: PetscUseMethod(A,"MatSeqBAIJRestoreArray_C",(Mat,PetscScalar**),(A,array));
3048: return(0);
3049: }
3051: /*MC
3052: MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3053: block sparse compressed row format.
3055: Options Database Keys:
3056: . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3058: Level: beginner
3060: Notes:
3061: MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
3062: space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
3064: .seealso: MatCreateSeqBAIJ()
3065: M*/
3067: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3069: PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3070: {
3072: PetscMPIInt size;
3073: Mat_SeqBAIJ *b;
3076: MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);
3077: if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3079: PetscNewLog(B,&b);
3080: B->data = (void*)b;
3081: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
3083: b->row = 0;
3084: b->col = 0;
3085: b->icol = 0;
3086: b->reallocs = 0;
3087: b->saved_values = 0;
3089: b->roworiented = PETSC_TRUE;
3090: b->nonew = 0;
3091: b->diag = 0;
3092: B->spptr = 0;
3093: B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2;
3094: b->keepnonzeropattern = PETSC_FALSE;
3096: PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJGetArray_C",MatSeqBAIJGetArray_SeqBAIJ);
3097: PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJRestoreArray_C",MatSeqBAIJRestoreArray_SeqBAIJ);
3098: PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);
3099: PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);
3100: PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);
3101: PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);
3102: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);
3103: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);
3104: PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);
3105: PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);
3106: PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);
3107: #if defined(PETSC_HAVE_HYPRE)
3108: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);
3109: #endif
3110: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);
3111: PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqbaij_C",MatPtAP_IS_XAIJ);
3112: PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);
3113: return(0);
3114: }
3116: PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3117: {
3118: Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3120: PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3123: if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3125: if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3126: c->imax = a->imax;
3127: c->ilen = a->ilen;
3128: c->free_imax_ilen = PETSC_FALSE;
3129: } else {
3130: PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);
3131: PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));
3132: for (i=0; i<mbs; i++) {
3133: c->imax[i] = a->imax[i];
3134: c->ilen[i] = a->ilen[i];
3135: }
3136: c->free_imax_ilen = PETSC_TRUE;
3137: }
3139: /* allocate the matrix space */
3140: if (mallocmatspace) {
3141: if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3142: PetscCalloc1(bs2*nz,&c->a);
3143: PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));
3145: c->i = a->i;
3146: c->j = a->j;
3147: c->singlemalloc = PETSC_FALSE;
3148: c->free_a = PETSC_TRUE;
3149: c->free_ij = PETSC_FALSE;
3150: c->parent = A;
3151: C->preallocated = PETSC_TRUE;
3152: C->assembled = PETSC_TRUE;
3154: PetscObjectReference((PetscObject)A);
3155: MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3156: MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3157: } else {
3158: PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);
3159: PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));
3161: c->singlemalloc = PETSC_TRUE;
3162: c->free_a = PETSC_TRUE;
3163: c->free_ij = PETSC_TRUE;
3165: PetscArraycpy(c->i,a->i,mbs+1);
3166: if (mbs > 0) {
3167: PetscArraycpy(c->j,a->j,nz);
3168: if (cpvalues == MAT_COPY_VALUES) {
3169: PetscArraycpy(c->a,a->a,bs2*nz);
3170: } else {
3171: PetscArrayzero(c->a,bs2*nz);
3172: }
3173: }
3174: C->preallocated = PETSC_TRUE;
3175: C->assembled = PETSC_TRUE;
3176: }
3177: }
3179: c->roworiented = a->roworiented;
3180: c->nonew = a->nonew;
3182: PetscLayoutReference(A->rmap,&C->rmap);
3183: PetscLayoutReference(A->cmap,&C->cmap);
3185: c->bs2 = a->bs2;
3186: c->mbs = a->mbs;
3187: c->nbs = a->nbs;
3189: if (a->diag) {
3190: if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3191: c->diag = a->diag;
3192: c->free_diag = PETSC_FALSE;
3193: } else {
3194: PetscMalloc1(mbs+1,&c->diag);
3195: PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));
3196: for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3197: c->free_diag = PETSC_TRUE;
3198: }
3199: } else c->diag = 0;
3201: c->nz = a->nz;
3202: c->maxnz = a->nz; /* Since we allocate exactly the right amount */
3203: c->solve_work = NULL;
3204: c->mult_work = NULL;
3205: c->sor_workt = NULL;
3206: c->sor_work = NULL;
3208: c->compressedrow.use = a->compressedrow.use;
3209: c->compressedrow.nrows = a->compressedrow.nrows;
3210: if (a->compressedrow.use) {
3211: i = a->compressedrow.nrows;
3212: PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);
3213: PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));
3214: PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1);
3215: PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i);
3216: } else {
3217: c->compressedrow.use = PETSC_FALSE;
3218: c->compressedrow.i = NULL;
3219: c->compressedrow.rindex = NULL;
3220: }
3221: C->nonzerostate = A->nonzerostate;
3223: PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);
3224: return(0);
3225: }
3227: PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3228: {
3232: MatCreate(PetscObjectComm((PetscObject)A),B);
3233: MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);
3234: MatSetType(*B,MATSEQBAIJ);
3235: MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);
3236: return(0);
3237: }
3239: PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3240: {
3241: Mat_SeqBAIJ *a;
3243: PetscInt i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs;
3244: PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3245: PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3246: PetscInt *masked,nmask,tmp,bs2,ishift;
3247: PetscMPIInt size;
3248: int fd;
3249: PetscScalar *aa;
3250: MPI_Comm comm;
3251: PetscBool isbinary;
3254: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
3255: if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name);
3257: /* force binary viewer to load .info file if it has not yet done so */
3258: PetscViewerSetUp(viewer);
3259: PetscObjectGetComm((PetscObject)viewer,&comm);
3260: PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");
3261: PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);
3262: PetscOptionsEnd();
3263: if (bs < 0) bs = 1;
3264: bs2 = bs*bs;
3266: MPI_Comm_size(comm,&size);
3267: if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3268: PetscViewerBinaryGetDescriptor(viewer,&fd);
3269: PetscBinaryRead(fd,header,4,NULL,PETSC_INT);
3270: if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3271: M = header[1]; N = header[2]; nz = header[3];
3273: if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3274: if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3276: /*
3277: This code adds extra rows to make sure the number of rows is
3278: divisible by the blocksize
3279: */
3280: mbs = M/bs;
3281: extra_rows = bs - M + bs*(mbs);
3282: if (extra_rows == bs) extra_rows = 0;
3283: else mbs++;
3284: if (extra_rows) {
3285: PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");
3286: }
3288: /* Set global sizes if not already set */
3289: if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3290: MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
3291: } else { /* Check if the matrix global sizes are correct */
3292: MatGetSize(newmat,&rows,&cols);
3293: if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3294: MatGetLocalSize(newmat,&rows,&cols);
3295: }
3296: if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3297: }
3299: /* read in row lengths */
3300: PetscMalloc1(M+extra_rows,&rowlengths);
3301: PetscBinaryRead(fd,rowlengths,M,NULL,PETSC_INT);
3302: for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3304: /* read in column indices */
3305: PetscMalloc1(nz+extra_rows,&jj);
3306: PetscBinaryRead(fd,jj,nz,NULL,PETSC_INT);
3307: for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3309: /* loop over row lengths determining block row lengths */
3310: PetscCalloc1(mbs,&browlengths);
3311: PetscMalloc2(mbs,&mask,mbs,&masked);
3312: PetscArrayzero(mask,mbs);
3313: rowcount = 0;
3314: nzcount = 0;
3315: for (i=0; i<mbs; i++) {
3316: nmask = 0;
3317: for (j=0; j<bs; j++) {
3318: kmax = rowlengths[rowcount];
3319: for (k=0; k<kmax; k++) {
3320: tmp = jj[nzcount++]/bs;
3321: if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3322: }
3323: rowcount++;
3324: }
3325: browlengths[i] += nmask;
3326: /* zero out the mask elements we set */
3327: for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3328: }
3330: /* Do preallocation */
3331: MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);
3332: a = (Mat_SeqBAIJ*)newmat->data;
3334: /* set matrix "i" values */
3335: a->i[0] = 0;
3336: for (i=1; i<= mbs; i++) {
3337: a->i[i] = a->i[i-1] + browlengths[i-1];
3338: a->ilen[i-1] = browlengths[i-1];
3339: }
3340: a->nz = 0;
3341: for (i=0; i<mbs; i++) a->nz += browlengths[i];
3343: /* read in nonzero values */
3344: PetscMalloc1(nz+extra_rows,&aa);
3345: PetscBinaryRead(fd,aa,nz,NULL,PETSC_SCALAR);
3346: for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3348: /* set "a" and "j" values into matrix */
3349: nzcount = 0; jcount = 0;
3350: for (i=0; i<mbs; i++) {
3351: nzcountb = nzcount;
3352: nmask = 0;
3353: for (j=0; j<bs; j++) {
3354: kmax = rowlengths[i*bs+j];
3355: for (k=0; k<kmax; k++) {
3356: tmp = jj[nzcount++]/bs;
3357: if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3358: }
3359: }
3360: /* sort the masked values */
3361: PetscSortInt(nmask,masked);
3363: /* set "j" values into matrix */
3364: maskcount = 1;
3365: for (j=0; j<nmask; j++) {
3366: a->j[jcount++] = masked[j];
3367: mask[masked[j]] = maskcount++;
3368: }
3369: /* set "a" values into matrix */
3370: ishift = bs2*a->i[i];
3371: for (j=0; j<bs; j++) {
3372: kmax = rowlengths[i*bs+j];
3373: for (k=0; k<kmax; k++) {
3374: tmp = jj[nzcountb]/bs;
3375: block = mask[tmp] - 1;
3376: point = jj[nzcountb] - bs*tmp;
3377: idx = ishift + bs2*block + j + bs*point;
3378: a->a[idx] = (MatScalar)aa[nzcountb++];
3379: }
3380: }
3381: /* zero out the mask elements we set */
3382: for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3383: }
3384: if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3386: PetscFree(rowlengths);
3387: PetscFree(browlengths);
3388: PetscFree(aa);
3389: PetscFree(jj);
3390: PetscFree2(mask,masked);
3392: MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
3393: MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
3394: return(0);
3395: }
3397: /*@C
3398: MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3399: compressed row) format. For good matrix assembly performance the
3400: user should preallocate the matrix storage by setting the parameter nz
3401: (or the array nnz). By setting these parameters accurately, performance
3402: during matrix assembly can be increased by more than a factor of 50.
3404: Collective
3406: Input Parameters:
3407: + comm - MPI communicator, set to PETSC_COMM_SELF
3408: . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3409: blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3410: . m - number of rows
3411: . n - number of columns
3412: . nz - number of nonzero blocks per block row (same for all rows)
3413: - nnz - array containing the number of nonzero blocks in the various block rows
3414: (possibly different for each block row) or NULL
3416: Output Parameter:
3417: . A - the matrix
3419: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3420: MatXXXXSetPreallocation() paradigm instead of this routine directly.
3421: [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3423: Options Database Keys:
3424: + -mat_no_unroll - uses code that does not unroll the loops in the
3425: block calculations (much slower)
3426: - -mat_block_size - size of the blocks to use
3428: Level: intermediate
3430: Notes:
3431: The number of rows and columns must be divisible by blocksize.
3433: If the nnz parameter is given then the nz parameter is ignored
3435: A nonzero block is any block that as 1 or more nonzeros in it
3437: The block AIJ format is fully compatible with standard Fortran 77
3438: storage. That is, the stored row and column indices can begin at
3439: either one (as in Fortran) or zero. See the users' manual for details.
3441: Specify the preallocated storage with either nz or nnz (not both).
3442: Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3443: allocation. See Users-Manual: ch_mat for details.
3444: matrices.
3446: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3447: @*/
3448: PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3449: {
3453: MatCreate(comm,A);
3454: MatSetSizes(*A,m,n,m,n);
3455: MatSetType(*A,MATSEQBAIJ);
3456: MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);
3457: return(0);
3458: }
3460: /*@C
3461: MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3462: per row in the matrix. For good matrix assembly performance the
3463: user should preallocate the matrix storage by setting the parameter nz
3464: (or the array nnz). By setting these parameters accurately, performance
3465: during matrix assembly can be increased by more than a factor of 50.
3467: Collective
3469: Input Parameters:
3470: + B - the matrix
3471: . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3472: blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3473: . nz - number of block nonzeros per block row (same for all rows)
3474: - nnz - array containing the number of block nonzeros in the various block rows
3475: (possibly different for each block row) or NULL
3477: Options Database Keys:
3478: + -mat_no_unroll - uses code that does not unroll the loops in the
3479: block calculations (much slower)
3480: - -mat_block_size - size of the blocks to use
3482: Level: intermediate
3484: Notes:
3485: If the nnz parameter is given then the nz parameter is ignored
3487: You can call MatGetInfo() to get information on how effective the preallocation was;
3488: for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3489: You can also run with the option -info and look for messages with the string
3490: malloc in them to see if additional memory allocation was needed.
3492: The block AIJ format is fully compatible with standard Fortran 77
3493: storage. That is, the stored row and column indices can begin at
3494: either one (as in Fortran) or zero. See the users' manual for details.
3496: Specify the preallocated storage with either nz or nnz (not both).
3497: Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3498: allocation. See Users-Manual: ch_mat for details.
3500: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3501: @*/
3502: PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3503: {
3510: PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));
3511: return(0);
3512: }
3514: /*@C
3515: MatSeqBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values
3517: Collective
3519: Input Parameters:
3520: + B - the matrix
3521: . i - the indices into j for the start of each local row (starts with zero)
3522: . j - the column indices for each local row (starts with zero) these must be sorted for each row
3523: - v - optional values in the matrix
3525: Level: advanced
3527: Notes:
3528: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs
3529: may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3530: over rows within a block and the last index is over columns within a block row. Fortran programs will likely set
3531: MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3532: block column and the second index is over columns within a block.
3534: Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3536: .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3537: @*/
3538: PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3539: {
3546: PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));
3547: return(0);
3548: }
3551: /*@
3552: MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3554: Collective
3556: Input Parameters:
3557: + comm - must be an MPI communicator of size 1
3558: . bs - size of block
3559: . m - number of rows
3560: . n - number of columns
3561: . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3562: . j - column indices
3563: - a - matrix values
3565: Output Parameter:
3566: . mat - the matrix
3568: Level: advanced
3570: Notes:
3571: The i, j, and a arrays are not copied by this routine, the user must free these arrays
3572: once the matrix is destroyed
3574: You cannot set new nonzero locations into this matrix, that will generate an error.
3576: The i and j indices are 0 based
3578: When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3580: The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3581: the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3582: block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory
3583: with column-major ordering within blocks.
3585: .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3587: @*/
3588: PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat)
3589: {
3591: PetscInt ii;
3592: Mat_SeqBAIJ *baij;
3595: if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3596: if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3598: MatCreate(comm,mat);
3599: MatSetSizes(*mat,m,n,m,n);
3600: MatSetType(*mat,MATSEQBAIJ);
3601: MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);
3602: baij = (Mat_SeqBAIJ*)(*mat)->data;
3603: PetscMalloc2(m,&baij->imax,m,&baij->ilen);
3604: PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));
3606: baij->i = i;
3607: baij->j = j;
3608: baij->a = a;
3610: baij->singlemalloc = PETSC_FALSE;
3611: baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3612: baij->free_a = PETSC_FALSE;
3613: baij->free_ij = PETSC_FALSE;
3615: for (ii=0; ii<m; ii++) {
3616: baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3617: #if defined(PETSC_USE_DEBUG)
3618: if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3619: #endif
3620: }
3621: #if defined(PETSC_USE_DEBUG)
3622: for (ii=0; ii<baij->i[m]; ii++) {
3623: if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3624: if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3625: }
3626: #endif
3628: MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
3629: MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
3630: return(0);
3631: }
3633: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3634: {
3636: PetscMPIInt size;
3639: MPI_Comm_size(comm,&size);
3640: if (size == 1 && scall == MAT_REUSE_MATRIX) {
3641: MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);
3642: } else {
3643: MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);
3644: }
3645: return(0);
3646: }