Actual source code: baij.c
petsc-3.8.4 2018-03-24
2: /*
3: Defines the basic matrix operations for the BAIJ (compressed row)
4: matrix storage format.
5: */
6: #include <../src/mat/impls/baij/seq/baij.h>
7: #include <petscblaslapack.h>
8: #include <petsc/private/kernels/blockinvert.h>
9: #include <petsc/private/kernels/blockmatmult.h>
11: #if defined(PETSC_HAVE_HYPRE)
12: PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
13: #endif
15: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
16: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,const MatType,MatReuse,Mat*);
17: #endif
19: PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
20: {
21: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data;
23: PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
24: MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
25: PetscReal shift = 0.0;
26: PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE;
29: allowzeropivot = PetscNot(A->erroriffailure);
31: if (a->idiagvalid) {
32: if (values) *values = a->idiag;
33: return(0);
34: }
35: MatMarkDiagonal_SeqBAIJ(A);
36: diag_offset = a->diag;
37: if (!a->idiag) {
38: PetscMalloc1(2*bs2*mbs,&a->idiag);
39: PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));
40: }
41: diag = a->idiag;
42: mdiag = a->idiag+bs2*mbs;
43: if (values) *values = a->idiag;
44: /* factor and invert each block */
45: switch (bs) {
46: case 1:
47: for (i=0; i<mbs; i++) {
48: odiag = v + 1*diag_offset[i];
49: diag[0] = odiag[0];
50: mdiag[0] = odiag[0];
52: if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
53: if (allowzeropivot) {
54: A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
55: A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
56: A->factorerror_zeropivot_row = i;
57: PetscInfo1(A,"Zero pivot, row %D\n",i);
58: } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
59: }
61: diag[0] = (PetscScalar)1.0 / (diag[0] + shift);
62: diag += 1;
63: mdiag += 1;
64: }
65: break;
66: case 2:
67: for (i=0; i<mbs; i++) {
68: odiag = v + 4*diag_offset[i];
69: diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
70: mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
71: PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);
72: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
73: diag += 4;
74: mdiag += 4;
75: }
76: break;
77: case 3:
78: for (i=0; i<mbs; i++) {
79: odiag = v + 9*diag_offset[i];
80: diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
81: diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
82: diag[8] = odiag[8];
83: mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
84: mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
85: mdiag[8] = odiag[8];
86: PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);
87: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
88: diag += 9;
89: mdiag += 9;
90: }
91: break;
92: case 4:
93: for (i=0; i<mbs; i++) {
94: odiag = v + 16*diag_offset[i];
95: PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));
96: PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));
97: PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);
98: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
99: diag += 16;
100: mdiag += 16;
101: }
102: break;
103: case 5:
104: for (i=0; i<mbs; i++) {
105: odiag = v + 25*diag_offset[i];
106: PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));
107: PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));
108: PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);
109: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
110: diag += 25;
111: mdiag += 25;
112: }
113: break;
114: case 6:
115: for (i=0; i<mbs; i++) {
116: odiag = v + 36*diag_offset[i];
117: PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));
118: PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));
119: PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);
120: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
121: diag += 36;
122: mdiag += 36;
123: }
124: break;
125: case 7:
126: for (i=0; i<mbs; i++) {
127: odiag = v + 49*diag_offset[i];
128: PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));
129: PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));
130: PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);
131: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
132: diag += 49;
133: mdiag += 49;
134: }
135: break;
136: default:
137: PetscMalloc2(bs,&v_work,bs,&v_pivots);
138: for (i=0; i<mbs; i++) {
139: odiag = v + bs2*diag_offset[i];
140: PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));
141: PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));
142: PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);
143: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
144: diag += bs2;
145: mdiag += bs2;
146: }
147: PetscFree2(v_work,v_pivots);
148: }
149: a->idiagvalid = PETSC_TRUE;
150: return(0);
151: }
153: PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
154: {
155: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
156: PetscScalar *x,*work,*w,*workt,*t;
157: const MatScalar *v,*aa = a->a, *idiag;
158: const PetscScalar *b,*xb;
159: PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
160: PetscErrorCode ierr;
161: PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
162: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
165: if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */
166: its = its*lits;
167: if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
168: if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
169: if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
170: if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
171: if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
173: if (!a->idiagvalid) {MatInvertBlockDiagonal(A,NULL);}
175: if (!m) return(0);
176: diag = a->diag;
177: idiag = a->idiag;
178: k = PetscMax(A->rmap->n,A->cmap->n);
179: if (!a->mult_work) {
180: PetscMalloc1(k+1,&a->mult_work);
181: }
182: if (!a->sor_workt) {
183: PetscMalloc1(k,&a->sor_workt);
184: }
185: if (!a->sor_work) {
186: PetscMalloc1(bs,&a->sor_work);
187: }
188: work = a->mult_work;
189: t = a->sor_workt;
190: w = a->sor_work;
192: VecGetArray(xx,&x);
193: VecGetArrayRead(bb,&b);
195: if (flag & SOR_ZERO_INITIAL_GUESS) {
196: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
197: switch (bs) {
198: case 1:
199: PetscKernel_v_gets_A_times_w_1(x,idiag,b);
200: t[0] = b[0];
201: i2 = 1;
202: idiag += 1;
203: for (i=1; i<m; i++) {
204: v = aa + ai[i];
205: vi = aj + ai[i];
206: nz = diag[i] - ai[i];
207: s[0] = b[i2];
208: for (j=0; j<nz; j++) {
209: xw[0] = x[vi[j]];
210: PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
211: }
212: t[i2] = s[0];
213: PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
214: x[i2] = xw[0];
215: idiag += 1;
216: i2 += 1;
217: }
218: break;
219: case 2:
220: PetscKernel_v_gets_A_times_w_2(x,idiag,b);
221: t[0] = b[0]; t[1] = b[1];
222: i2 = 2;
223: idiag += 4;
224: for (i=1; i<m; i++) {
225: v = aa + 4*ai[i];
226: vi = aj + ai[i];
227: nz = diag[i] - ai[i];
228: s[0] = b[i2]; s[1] = b[i2+1];
229: for (j=0; j<nz; j++) {
230: idx = 2*vi[j];
231: it = 4*j;
232: xw[0] = x[idx]; xw[1] = x[1+idx];
233: PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
234: }
235: t[i2] = s[0]; t[i2+1] = s[1];
236: PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
237: x[i2] = xw[0]; x[i2+1] = xw[1];
238: idiag += 4;
239: i2 += 2;
240: }
241: break;
242: case 3:
243: PetscKernel_v_gets_A_times_w_3(x,idiag,b);
244: t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
245: i2 = 3;
246: idiag += 9;
247: for (i=1; i<m; i++) {
248: v = aa + 9*ai[i];
249: vi = aj + ai[i];
250: nz = diag[i] - ai[i];
251: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
252: while (nz--) {
253: idx = 3*(*vi++);
254: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
255: PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
256: v += 9;
257: }
258: t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
259: PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
260: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
261: idiag += 9;
262: i2 += 3;
263: }
264: break;
265: case 4:
266: PetscKernel_v_gets_A_times_w_4(x,idiag,b);
267: t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
268: i2 = 4;
269: idiag += 16;
270: for (i=1; i<m; i++) {
271: v = aa + 16*ai[i];
272: vi = aj + ai[i];
273: nz = diag[i] - ai[i];
274: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
275: while (nz--) {
276: idx = 4*(*vi++);
277: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
278: PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
279: v += 16;
280: }
281: t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
282: PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
283: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
284: idiag += 16;
285: i2 += 4;
286: }
287: break;
288: case 5:
289: PetscKernel_v_gets_A_times_w_5(x,idiag,b);
290: t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
291: i2 = 5;
292: idiag += 25;
293: for (i=1; i<m; i++) {
294: v = aa + 25*ai[i];
295: vi = aj + ai[i];
296: nz = diag[i] - ai[i];
297: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
298: while (nz--) {
299: idx = 5*(*vi++);
300: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
301: PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
302: v += 25;
303: }
304: t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
305: PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
306: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
307: idiag += 25;
308: i2 += 5;
309: }
310: break;
311: case 6:
312: PetscKernel_v_gets_A_times_w_6(x,idiag,b);
313: t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
314: i2 = 6;
315: idiag += 36;
316: for (i=1; i<m; i++) {
317: v = aa + 36*ai[i];
318: vi = aj + ai[i];
319: nz = diag[i] - ai[i];
320: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
321: while (nz--) {
322: idx = 6*(*vi++);
323: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
324: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
325: PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
326: v += 36;
327: }
328: t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
329: t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
330: PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
331: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
332: idiag += 36;
333: i2 += 6;
334: }
335: break;
336: case 7:
337: PetscKernel_v_gets_A_times_w_7(x,idiag,b);
338: t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
339: t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
340: i2 = 7;
341: idiag += 49;
342: for (i=1; i<m; i++) {
343: v = aa + 49*ai[i];
344: vi = aj + ai[i];
345: nz = diag[i] - ai[i];
346: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
347: s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
348: while (nz--) {
349: idx = 7*(*vi++);
350: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
351: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
352: PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
353: v += 49;
354: }
355: t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
356: t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
357: PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
358: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
359: x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
360: idiag += 49;
361: i2 += 7;
362: }
363: break;
364: default:
365: PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
366: PetscMemcpy(t,b,bs*sizeof(PetscScalar));
367: i2 = bs;
368: idiag += bs2;
369: for (i=1; i<m; i++) {
370: v = aa + bs2*ai[i];
371: vi = aj + ai[i];
372: nz = diag[i] - ai[i];
374: PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));
375: /* copy all rows of x that are needed into contiguous space */
376: workt = work;
377: for (j=0; j<nz; j++) {
378: PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
379: workt += bs;
380: }
381: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
382: PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));
383: PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
385: idiag += bs2;
386: i2 += bs;
387: }
388: break;
389: }
390: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
391: PetscLogFlops(1.0*bs2*a->nz);
392: xb = t;
393: }
394: else xb = b;
395: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
396: idiag = a->idiag+bs2*(a->mbs-1);
397: i2 = bs * (m-1);
398: switch (bs) {
399: case 1:
400: s[0] = xb[i2];
401: PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
402: x[i2] = xw[0];
403: i2 -= 1;
404: for (i=m-2; i>=0; i--) {
405: v = aa + (diag[i]+1);
406: vi = aj + diag[i] + 1;
407: nz = ai[i+1] - diag[i] - 1;
408: s[0] = xb[i2];
409: for (j=0; j<nz; j++) {
410: xw[0] = x[vi[j]];
411: PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
412: }
413: PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
414: x[i2] = xw[0];
415: idiag -= 1;
416: i2 -= 1;
417: }
418: break;
419: case 2:
420: s[0] = xb[i2]; s[1] = xb[i2+1];
421: PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
422: x[i2] = xw[0]; x[i2+1] = xw[1];
423: i2 -= 2;
424: idiag -= 4;
425: for (i=m-2; i>=0; i--) {
426: v = aa + 4*(diag[i] + 1);
427: vi = aj + diag[i] + 1;
428: nz = ai[i+1] - diag[i] - 1;
429: s[0] = xb[i2]; s[1] = xb[i2+1];
430: for (j=0; j<nz; j++) {
431: idx = 2*vi[j];
432: it = 4*j;
433: xw[0] = x[idx]; xw[1] = x[1+idx];
434: PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
435: }
436: PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
437: x[i2] = xw[0]; x[i2+1] = xw[1];
438: idiag -= 4;
439: i2 -= 2;
440: }
441: break;
442: case 3:
443: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
444: PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
445: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
446: i2 -= 3;
447: idiag -= 9;
448: for (i=m-2; i>=0; i--) {
449: v = aa + 9*(diag[i]+1);
450: vi = aj + diag[i] + 1;
451: nz = ai[i+1] - diag[i] - 1;
452: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
453: while (nz--) {
454: idx = 3*(*vi++);
455: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
456: PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
457: v += 9;
458: }
459: PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
460: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
461: idiag -= 9;
462: i2 -= 3;
463: }
464: break;
465: case 4:
466: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
467: PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
468: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
469: i2 -= 4;
470: idiag -= 16;
471: for (i=m-2; i>=0; i--) {
472: v = aa + 16*(diag[i]+1);
473: vi = aj + diag[i] + 1;
474: nz = ai[i+1] - diag[i] - 1;
475: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
476: while (nz--) {
477: idx = 4*(*vi++);
478: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
479: PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
480: v += 16;
481: }
482: PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
483: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
484: idiag -= 16;
485: i2 -= 4;
486: }
487: break;
488: case 5:
489: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
490: PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
491: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
492: i2 -= 5;
493: idiag -= 25;
494: for (i=m-2; i>=0; i--) {
495: v = aa + 25*(diag[i]+1);
496: vi = aj + diag[i] + 1;
497: nz = ai[i+1] - diag[i] - 1;
498: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
499: while (nz--) {
500: idx = 5*(*vi++);
501: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
502: PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
503: v += 25;
504: }
505: PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
506: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
507: idiag -= 25;
508: i2 -= 5;
509: }
510: break;
511: case 6:
512: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
513: PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
514: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
515: i2 -= 6;
516: idiag -= 36;
517: for (i=m-2; i>=0; i--) {
518: v = aa + 36*(diag[i]+1);
519: vi = aj + diag[i] + 1;
520: nz = ai[i+1] - diag[i] - 1;
521: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
522: while (nz--) {
523: idx = 6*(*vi++);
524: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
525: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
526: PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
527: v += 36;
528: }
529: PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
530: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
531: idiag -= 36;
532: i2 -= 6;
533: }
534: break;
535: case 7:
536: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
537: s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
538: PetscKernel_v_gets_A_times_w_7(x,idiag,b);
539: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
540: x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
541: i2 -= 7;
542: idiag -= 49;
543: for (i=m-2; i>=0; i--) {
544: v = aa + 49*(diag[i]+1);
545: vi = aj + diag[i] + 1;
546: nz = ai[i+1] - diag[i] - 1;
547: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
548: s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
549: while (nz--) {
550: idx = 7*(*vi++);
551: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
552: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
553: PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
554: v += 49;
555: }
556: PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
557: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
558: x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
559: idiag -= 49;
560: i2 -= 7;
561: }
562: break;
563: default:
564: PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));
565: PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
566: i2 -= bs;
567: idiag -= bs2;
568: for (i=m-2; i>=0; i--) {
569: v = aa + bs2*(diag[i]+1);
570: vi = aj + diag[i] + 1;
571: nz = ai[i+1] - diag[i] - 1;
573: PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));
574: /* copy all rows of x that are needed into contiguous space */
575: workt = work;
576: for (j=0; j<nz; j++) {
577: PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
578: workt += bs;
579: }
580: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
581: PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
583: idiag -= bs2;
584: i2 -= bs;
585: }
586: break;
587: }
588: PetscLogFlops(1.0*bs2*(a->nz));
589: }
590: its--;
591: }
592: while (its--) {
593: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
594: idiag = a->idiag;
595: i2 = 0;
596: switch (bs) {
597: case 1:
598: for (i=0; i<m; i++) {
599: v = aa + ai[i];
600: vi = aj + ai[i];
601: nz = ai[i+1] - ai[i];
602: s[0] = b[i2];
603: for (j=0; j<nz; j++) {
604: xw[0] = x[vi[j]];
605: PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
606: }
607: PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
608: x[i2] += xw[0];
609: idiag += 1;
610: i2 += 1;
611: }
612: break;
613: case 2:
614: for (i=0; i<m; i++) {
615: v = aa + 4*ai[i];
616: vi = aj + ai[i];
617: nz = ai[i+1] - ai[i];
618: s[0] = b[i2]; s[1] = b[i2+1];
619: for (j=0; j<nz; j++) {
620: idx = 2*vi[j];
621: it = 4*j;
622: xw[0] = x[idx]; xw[1] = x[1+idx];
623: PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
624: }
625: PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
626: x[i2] += xw[0]; x[i2+1] += xw[1];
627: idiag += 4;
628: i2 += 2;
629: }
630: break;
631: case 3:
632: for (i=0; i<m; i++) {
633: v = aa + 9*ai[i];
634: vi = aj + ai[i];
635: nz = ai[i+1] - ai[i];
636: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
637: while (nz--) {
638: idx = 3*(*vi++);
639: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
640: PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
641: v += 9;
642: }
643: PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
644: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
645: idiag += 9;
646: i2 += 3;
647: }
648: break;
649: case 4:
650: for (i=0; i<m; i++) {
651: v = aa + 16*ai[i];
652: vi = aj + ai[i];
653: nz = ai[i+1] - ai[i];
654: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
655: while (nz--) {
656: idx = 4*(*vi++);
657: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
658: PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
659: v += 16;
660: }
661: PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
662: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
663: idiag += 16;
664: i2 += 4;
665: }
666: break;
667: case 5:
668: for (i=0; i<m; i++) {
669: v = aa + 25*ai[i];
670: vi = aj + ai[i];
671: nz = ai[i+1] - ai[i];
672: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
673: while (nz--) {
674: idx = 5*(*vi++);
675: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
676: PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
677: v += 25;
678: }
679: PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
680: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
681: idiag += 25;
682: i2 += 5;
683: }
684: break;
685: case 6:
686: for (i=0; i<m; i++) {
687: v = aa + 36*ai[i];
688: vi = aj + ai[i];
689: nz = ai[i+1] - ai[i];
690: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
691: while (nz--) {
692: idx = 6*(*vi++);
693: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
694: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
695: PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
696: v += 36;
697: }
698: PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
699: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
700: x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
701: idiag += 36;
702: i2 += 6;
703: }
704: break;
705: case 7:
706: for (i=0; i<m; i++) {
707: v = aa + 49*ai[i];
708: vi = aj + ai[i];
709: nz = ai[i+1] - ai[i];
710: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
711: s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
712: while (nz--) {
713: idx = 7*(*vi++);
714: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
715: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
716: PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
717: v += 49;
718: }
719: PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
720: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
721: x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
722: idiag += 49;
723: i2 += 7;
724: }
725: break;
726: default:
727: for (i=0; i<m; i++) {
728: v = aa + bs2*ai[i];
729: vi = aj + ai[i];
730: nz = ai[i+1] - ai[i];
732: PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));
733: /* copy all rows of x that are needed into contiguous space */
734: workt = work;
735: for (j=0; j<nz; j++) {
736: PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
737: workt += bs;
738: }
739: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
740: PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
742: idiag += bs2;
743: i2 += bs;
744: }
745: break;
746: }
747: PetscLogFlops(2.0*bs2*a->nz);
748: }
749: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
750: idiag = a->idiag+bs2*(a->mbs-1);
751: i2 = bs * (m-1);
752: switch (bs) {
753: case 1:
754: for (i=m-1; i>=0; i--) {
755: v = aa + ai[i];
756: vi = aj + ai[i];
757: nz = ai[i+1] - ai[i];
758: s[0] = b[i2];
759: for (j=0; j<nz; j++) {
760: xw[0] = x[vi[j]];
761: PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
762: }
763: PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
764: x[i2] += xw[0];
765: idiag -= 1;
766: i2 -= 1;
767: }
768: break;
769: case 2:
770: for (i=m-1; i>=0; i--) {
771: v = aa + 4*ai[i];
772: vi = aj + ai[i];
773: nz = ai[i+1] - ai[i];
774: s[0] = b[i2]; s[1] = b[i2+1];
775: for (j=0; j<nz; j++) {
776: idx = 2*vi[j];
777: it = 4*j;
778: xw[0] = x[idx]; xw[1] = x[1+idx];
779: PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
780: }
781: PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
782: x[i2] += xw[0]; x[i2+1] += xw[1];
783: idiag -= 4;
784: i2 -= 2;
785: }
786: break;
787: case 3:
788: for (i=m-1; i>=0; i--) {
789: v = aa + 9*ai[i];
790: vi = aj + ai[i];
791: nz = ai[i+1] - ai[i];
792: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
793: while (nz--) {
794: idx = 3*(*vi++);
795: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
796: PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
797: v += 9;
798: }
799: PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
800: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
801: idiag -= 9;
802: i2 -= 3;
803: }
804: break;
805: case 4:
806: for (i=m-1; i>=0; i--) {
807: v = aa + 16*ai[i];
808: vi = aj + ai[i];
809: nz = ai[i+1] - ai[i];
810: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
811: while (nz--) {
812: idx = 4*(*vi++);
813: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
814: PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
815: v += 16;
816: }
817: PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
818: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
819: idiag -= 16;
820: i2 -= 4;
821: }
822: break;
823: case 5:
824: for (i=m-1; i>=0; i--) {
825: v = aa + 25*ai[i];
826: vi = aj + ai[i];
827: nz = ai[i+1] - ai[i];
828: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
829: while (nz--) {
830: idx = 5*(*vi++);
831: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
832: PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
833: v += 25;
834: }
835: PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
836: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
837: idiag -= 25;
838: i2 -= 5;
839: }
840: break;
841: case 6:
842: for (i=m-1; i>=0; i--) {
843: v = aa + 36*ai[i];
844: vi = aj + ai[i];
845: nz = ai[i+1] - ai[i];
846: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
847: while (nz--) {
848: idx = 6*(*vi++);
849: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
850: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
851: PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
852: v += 36;
853: }
854: PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
855: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
856: x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
857: idiag -= 36;
858: i2 -= 6;
859: }
860: break;
861: case 7:
862: for (i=m-1; i>=0; i--) {
863: v = aa + 49*ai[i];
864: vi = aj + ai[i];
865: nz = ai[i+1] - ai[i];
866: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
867: s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
868: while (nz--) {
869: idx = 7*(*vi++);
870: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
871: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
872: PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
873: v += 49;
874: }
875: PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
876: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
877: x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
878: idiag -= 49;
879: i2 -= 7;
880: }
881: break;
882: default:
883: for (i=m-1; i>=0; i--) {
884: v = aa + bs2*ai[i];
885: vi = aj + ai[i];
886: nz = ai[i+1] - ai[i];
888: PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));
889: /* copy all rows of x that are needed into contiguous space */
890: workt = work;
891: for (j=0; j<nz; j++) {
892: PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
893: workt += bs;
894: }
895: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
896: PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
898: idiag -= bs2;
899: i2 -= bs;
900: }
901: break;
902: }
903: PetscLogFlops(2.0*bs2*(a->nz));
904: }
905: }
906: VecRestoreArray(xx,&x);
907: VecRestoreArrayRead(bb,&b);
908: return(0);
909: }
912: /*
913: Special version for direct calls from Fortran (Used in PETSc-fun3d)
914: */
915: #if defined(PETSC_HAVE_FORTRAN_CAPS)
916: #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
917: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
918: #define matsetvaluesblocked4_ matsetvaluesblocked4
919: #endif
921: PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
922: {
923: Mat A = *AA;
924: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
925: PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
926: PetscInt *ai =a->i,*ailen=a->ilen;
927: PetscInt *aj =a->j,stepval,lastcol = -1;
928: const PetscScalar *value = v;
929: MatScalar *ap,*aa = a->a,*bap;
932: if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
933: stepval = (n-1)*4;
934: for (k=0; k<m; k++) { /* loop over added rows */
935: row = im[k];
936: rp = aj + ai[row];
937: ap = aa + 16*ai[row];
938: nrow = ailen[row];
939: low = 0;
940: high = nrow;
941: for (l=0; l<n; l++) { /* loop over added columns */
942: col = in[l];
943: if (col <= lastcol) low = 0;
944: else high = nrow;
945: lastcol = col;
946: value = v + k*(stepval+4 + l)*4;
947: while (high-low > 7) {
948: t = (low+high)/2;
949: if (rp[t] > col) high = t;
950: else low = t;
951: }
952: for (i=low; i<high; i++) {
953: if (rp[i] > col) break;
954: if (rp[i] == col) {
955: bap = ap + 16*i;
956: for (ii=0; ii<4; ii++,value+=stepval) {
957: for (jj=ii; jj<16; jj+=4) {
958: bap[jj] += *value++;
959: }
960: }
961: goto noinsert2;
962: }
963: }
964: N = nrow++ - 1;
965: high++; /* added new column index thus must search to one higher than before */
966: /* shift up all the later entries in this row */
967: for (ii=N; ii>=i; ii--) {
968: rp[ii+1] = rp[ii];
969: PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
970: }
971: if (N >= i) {
972: PetscMemzero(ap+16*i,16*sizeof(MatScalar));
973: }
974: rp[i] = col;
975: bap = ap + 16*i;
976: for (ii=0; ii<4; ii++,value+=stepval) {
977: for (jj=ii; jj<16; jj+=4) {
978: bap[jj] = *value++;
979: }
980: }
981: noinsert2:;
982: low = i;
983: }
984: ailen[row] = nrow;
985: }
986: PetscFunctionReturnVoid();
987: }
989: #if defined(PETSC_HAVE_FORTRAN_CAPS)
990: #define matsetvalues4_ MATSETVALUES4
991: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
992: #define matsetvalues4_ matsetvalues4
993: #endif
995: PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
996: {
997: Mat A = *AA;
998: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
999: PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1000: PetscInt *ai=a->i,*ailen=a->ilen;
1001: PetscInt *aj=a->j,brow,bcol;
1002: PetscInt ridx,cidx,lastcol = -1;
1003: MatScalar *ap,value,*aa=a->a,*bap;
1006: for (k=0; k<m; k++) { /* loop over added rows */
1007: row = im[k]; brow = row/4;
1008: rp = aj + ai[brow];
1009: ap = aa + 16*ai[brow];
1010: nrow = ailen[brow];
1011: low = 0;
1012: high = nrow;
1013: for (l=0; l<n; l++) { /* loop over added columns */
1014: col = in[l]; bcol = col/4;
1015: ridx = row % 4; cidx = col % 4;
1016: value = v[l + k*n];
1017: if (col <= lastcol) low = 0;
1018: else high = nrow;
1019: lastcol = col;
1020: while (high-low > 7) {
1021: t = (low+high)/2;
1022: if (rp[t] > bcol) high = t;
1023: else low = t;
1024: }
1025: for (i=low; i<high; i++) {
1026: if (rp[i] > bcol) break;
1027: if (rp[i] == bcol) {
1028: bap = ap + 16*i + 4*cidx + ridx;
1029: *bap += value;
1030: goto noinsert1;
1031: }
1032: }
1033: N = nrow++ - 1;
1034: high++; /* added new column thus must search to one higher than before */
1035: /* shift up all the later entries in this row */
1036: for (ii=N; ii>=i; ii--) {
1037: rp[ii+1] = rp[ii];
1038: PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1039: }
1040: if (N>=i) {
1041: PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1042: }
1043: rp[i] = bcol;
1044: ap[16*i + 4*cidx + ridx] = value;
1045: noinsert1:;
1046: low = i;
1047: }
1048: ailen[brow] = nrow;
1049: }
1050: PetscFunctionReturnVoid();
1051: }
1053: /*
1054: Checks for missing diagonals
1055: */
1056: PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d)
1057: {
1058: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1060: PetscInt *diag,*ii = a->i,i;
1063: MatMarkDiagonal_SeqBAIJ(A);
1064: *missing = PETSC_FALSE;
1065: if (A->rmap->n > 0 && !ii) {
1066: *missing = PETSC_TRUE;
1067: if (d) *d = 0;
1068: PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");
1069: } else {
1070: diag = a->diag;
1071: for (i=0; i<a->mbs; i++) {
1072: if (diag[i] >= ii[i+1]) {
1073: *missing = PETSC_TRUE;
1074: if (d) *d = i;
1075: PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);
1076: break;
1077: }
1078: }
1079: }
1080: return(0);
1081: }
1083: PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1084: {
1085: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1087: PetscInt i,j,m = a->mbs;
1090: if (!a->diag) {
1091: PetscMalloc1(m,&a->diag);
1092: PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));
1093: a->free_diag = PETSC_TRUE;
1094: }
1095: for (i=0; i<m; i++) {
1096: a->diag[i] = a->i[i+1];
1097: for (j=a->i[i]; j<a->i[i+1]; j++) {
1098: if (a->j[j] == i) {
1099: a->diag[i] = j;
1100: break;
1101: }
1102: }
1103: }
1104: return(0);
1105: }
1108: static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done)
1109: {
1110: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1112: PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1113: PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1116: *nn = n;
1117: if (!ia) return(0);
1118: if (symmetric) {
1119: MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);
1120: nz = tia[n];
1121: } else {
1122: tia = a->i; tja = a->j;
1123: }
1125: if (!blockcompressed && bs > 1) {
1126: (*nn) *= bs;
1127: /* malloc & create the natural set of indices */
1128: PetscMalloc1((n+1)*bs,ia);
1129: if (n) {
1130: (*ia)[0] = oshift;
1131: for (j=1; j<bs; j++) {
1132: (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1133: }
1134: }
1136: for (i=1; i<n; i++) {
1137: (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1138: for (j=1; j<bs; j++) {
1139: (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1140: }
1141: }
1142: if (n) {
1143: (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1144: }
1146: if (inja) {
1147: PetscMalloc1(nz*bs*bs,ja);
1148: cnt = 0;
1149: for (i=0; i<n; i++) {
1150: for (j=0; j<bs; j++) {
1151: for (k=tia[i]; k<tia[i+1]; k++) {
1152: for (l=0; l<bs; l++) {
1153: (*ja)[cnt++] = bs*tja[k] + l;
1154: }
1155: }
1156: }
1157: }
1158: }
1160: if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1161: PetscFree(tia);
1162: PetscFree(tja);
1163: }
1164: } else if (oshift == 1) {
1165: if (symmetric) {
1166: nz = tia[A->rmap->n/bs];
1167: /* add 1 to i and j indices */
1168: for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1169: *ia = tia;
1170: if (ja) {
1171: for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1172: *ja = tja;
1173: }
1174: } else {
1175: nz = a->i[A->rmap->n/bs];
1176: /* malloc space and add 1 to i and j indices */
1177: PetscMalloc1(A->rmap->n/bs+1,ia);
1178: for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1179: if (ja) {
1180: PetscMalloc1(nz,ja);
1181: for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1182: }
1183: }
1184: } else {
1185: *ia = tia;
1186: if (ja) *ja = tja;
1187: }
1188: return(0);
1189: }
1191: static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done)
1192: {
1196: if (!ia) return(0);
1197: if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1198: PetscFree(*ia);
1199: if (ja) {PetscFree(*ja);}
1200: }
1201: return(0);
1202: }
1204: PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1205: {
1206: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1210: #if defined(PETSC_USE_LOG)
1211: PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1212: #endif
1213: MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);
1214: ISDestroy(&a->row);
1215: ISDestroy(&a->col);
1216: if (a->free_diag) {PetscFree(a->diag);}
1217: PetscFree(a->idiag);
1218: if (a->free_imax_ilen) {PetscFree2(a->imax,a->ilen);}
1219: PetscFree(a->solve_work);
1220: PetscFree(a->mult_work);
1221: PetscFree(a->sor_workt);
1222: PetscFree(a->sor_work);
1223: ISDestroy(&a->icol);
1224: PetscFree(a->saved_values);
1225: PetscFree2(a->compressedrow.i,a->compressedrow.rindex);
1227: MatDestroy(&a->sbaijMat);
1228: MatDestroy(&a->parent);
1229: PetscFree(A->data);
1231: PetscObjectChangeTypeName((PetscObject)A,0);
1232: PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);
1233: PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);
1234: PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);
1235: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);
1236: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);
1237: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);
1238: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);
1239: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);
1240: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);
1241: PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);
1242: #if defined(PETSC_HAVE_HYPRE)
1243: PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpiaij_hypre_C",NULL);
1244: #endif
1245: return(0);
1246: }
1248: PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1249: {
1250: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1254: switch (op) {
1255: case MAT_ROW_ORIENTED:
1256: a->roworiented = flg;
1257: break;
1258: case MAT_KEEP_NONZERO_PATTERN:
1259: a->keepnonzeropattern = flg;
1260: break;
1261: case MAT_NEW_NONZERO_LOCATIONS:
1262: a->nonew = (flg ? 0 : 1);
1263: break;
1264: case MAT_NEW_NONZERO_LOCATION_ERR:
1265: a->nonew = (flg ? -1 : 0);
1266: break;
1267: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1268: a->nonew = (flg ? -2 : 0);
1269: break;
1270: case MAT_UNUSED_NONZERO_LOCATION_ERR:
1271: a->nounused = (flg ? -1 : 0);
1272: break;
1273: case MAT_NEW_DIAGONALS:
1274: case MAT_IGNORE_OFF_PROC_ENTRIES:
1275: case MAT_USE_HASH_TABLE:
1276: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1277: break;
1278: case MAT_SPD:
1279: case MAT_SYMMETRIC:
1280: case MAT_STRUCTURALLY_SYMMETRIC:
1281: case MAT_HERMITIAN:
1282: case MAT_SYMMETRY_ETERNAL:
1283: case MAT_SUBMAT_SINGLEIS:
1284: case MAT_STRUCTURE_ONLY:
1285: /* These options are handled directly by MatSetOption() */
1286: break;
1287: default:
1288: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1289: }
1290: return(0);
1291: }
1293: /* used for both SeqBAIJ and SeqSBAIJ matrices */
1294: PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1295: {
1297: PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1298: MatScalar *aa_i;
1299: PetscScalar *v_i;
1302: bs = A->rmap->bs;
1303: bs2 = bs*bs;
1304: if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1306: bn = row/bs; /* Block number */
1307: bp = row % bs; /* Block Position */
1308: M = ai[bn+1] - ai[bn];
1309: *nz = bs*M;
1311: if (v) {
1312: *v = 0;
1313: if (*nz) {
1314: PetscMalloc1(*nz,v);
1315: for (i=0; i<M; i++) { /* for each block in the block row */
1316: v_i = *v + i*bs;
1317: aa_i = aa + bs2*(ai[bn] + i);
1318: for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1319: }
1320: }
1321: }
1323: if (idx) {
1324: *idx = 0;
1325: if (*nz) {
1326: PetscMalloc1(*nz,idx);
1327: for (i=0; i<M; i++) { /* for each block in the block row */
1328: idx_i = *idx + i*bs;
1329: itmp = bs*aj[ai[bn] + i];
1330: for (j=0; j<bs; j++) idx_i[j] = itmp++;
1331: }
1332: }
1333: }
1334: return(0);
1335: }
1337: PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1338: {
1339: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1343: MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);
1344: return(0);
1345: }
1347: PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1348: {
1352: if (idx) {PetscFree(*idx);}
1353: if (v) {PetscFree(*v);}
1354: return(0);
1355: }
1357: PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1358: {
1359: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
1360: Mat C;
1362: PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1363: PetscInt *rows,*cols,bs2=a->bs2;
1364: MatScalar *array;
1367: if (reuse == MAT_INPLACE_MATRIX && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1368: if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1369: PetscCalloc1(1+nbs,&col);
1371: for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1372: MatCreate(PetscObjectComm((PetscObject)A),&C);
1373: MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);
1374: MatSetType(C,((PetscObject)A)->type_name);
1375: MatSeqBAIJSetPreallocation(C,bs,0,col);
1376: PetscFree(col);
1377: } else {
1378: C = *B;
1379: }
1381: array = a->a;
1382: PetscMalloc2(bs,&rows,bs,&cols);
1383: for (i=0; i<mbs; i++) {
1384: cols[0] = i*bs;
1385: for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1386: len = ai[i+1] - ai[i];
1387: for (j=0; j<len; j++) {
1388: rows[0] = (*aj++)*bs;
1389: for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1390: MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);
1391: array += bs2;
1392: }
1393: }
1394: PetscFree2(rows,cols);
1396: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1397: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1399: if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1400: *B = C;
1401: } else {
1402: MatHeaderMerge(A,&C);
1403: }
1404: return(0);
1405: }
1407: PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f)
1408: {
1410: Mat Btrans;
1413: *f = PETSC_FALSE;
1414: MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);
1415: MatEqual_SeqBAIJ(B,Btrans,f);
1416: MatDestroy(&Btrans);
1417: return(0);
1418: }
1420: static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1421: {
1422: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1424: PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1425: int fd;
1426: PetscScalar *aa;
1427: FILE *file;
1430: PetscViewerBinaryGetDescriptor(viewer,&fd);
1431: PetscMalloc1(4+A->rmap->N,&col_lens);
1432: col_lens[0] = MAT_FILE_CLASSID;
1434: col_lens[1] = A->rmap->N;
1435: col_lens[2] = A->cmap->n;
1436: col_lens[3] = a->nz*bs2;
1438: /* store lengths of each row and write (including header) to file */
1439: count = 0;
1440: for (i=0; i<a->mbs; i++) {
1441: for (j=0; j<bs; j++) {
1442: col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1443: }
1444: }
1445: PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);
1446: PetscFree(col_lens);
1448: /* store column indices (zero start index) */
1449: PetscMalloc1((a->nz+1)*bs2,&jj);
1450: count = 0;
1451: for (i=0; i<a->mbs; i++) {
1452: for (j=0; j<bs; j++) {
1453: for (k=a->i[i]; k<a->i[i+1]; k++) {
1454: for (l=0; l<bs; l++) {
1455: jj[count++] = bs*a->j[k] + l;
1456: }
1457: }
1458: }
1459: }
1460: PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);
1461: PetscFree(jj);
1463: /* store nonzero values */
1464: PetscMalloc1((a->nz+1)*bs2,&aa);
1465: count = 0;
1466: for (i=0; i<a->mbs; i++) {
1467: for (j=0; j<bs; j++) {
1468: for (k=a->i[i]; k<a->i[i+1]; k++) {
1469: for (l=0; l<bs; l++) {
1470: aa[count++] = a->a[bs2*k + l*bs + j];
1471: }
1472: }
1473: }
1474: }
1475: PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);
1476: PetscFree(aa);
1478: PetscViewerBinaryGetInfoPointer(viewer,&file);
1479: if (file) {
1480: fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1481: }
1482: return(0);
1483: }
1485: static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)
1486: {
1488: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1489: PetscInt i,bs = A->rmap->bs,k;
1492: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1493: for (i=0; i<a->mbs; i++) {
1494: PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);
1495: for (k=a->i[i]; k<a->i[i+1]; k++) {
1496: PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);
1497: }
1498: PetscViewerASCIIPrintf(viewer,"\n");
1499: }
1500: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1501: return(0);
1502: }
1504: static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1505: {
1506: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1507: PetscErrorCode ierr;
1508: PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1509: PetscViewerFormat format;
1512: if (A->structure_only) {
1513: MatView_SeqBAIJ_ASCII_structonly(A,viewer);
1514: return(0);
1515: }
1517: PetscViewerGetFormat(viewer,&format);
1518: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1519: PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);
1520: } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1521: const char *matname;
1522: Mat aij;
1523: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);
1524: PetscObjectGetName((PetscObject)A,&matname);
1525: PetscObjectSetName((PetscObject)aij,matname);
1526: MatView(aij,viewer);
1527: MatDestroy(&aij);
1528: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1529: return(0);
1530: } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1531: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1532: for (i=0; i<a->mbs; i++) {
1533: for (j=0; j<bs; j++) {
1534: PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1535: for (k=a->i[i]; k<a->i[i+1]; k++) {
1536: for (l=0; l<bs; l++) {
1537: #if defined(PETSC_USE_COMPLEX)
1538: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1539: PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1540: (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1541: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1542: PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1543: (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1544: } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1545: PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));
1546: }
1547: #else
1548: if (a->a[bs2*k + l*bs + j] != 0.0) {
1549: PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);
1550: }
1551: #endif
1552: }
1553: }
1554: PetscViewerASCIIPrintf(viewer,"\n");
1555: }
1556: }
1557: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1558: } else {
1559: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1560: for (i=0; i<a->mbs; i++) {
1561: for (j=0; j<bs; j++) {
1562: PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1563: for (k=a->i[i]; k<a->i[i+1]; k++) {
1564: for (l=0; l<bs; l++) {
1565: #if defined(PETSC_USE_COMPLEX)
1566: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1567: PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1568: (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1569: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1570: PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1571: (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1572: } else {
1573: PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));
1574: }
1575: #else
1576: PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);
1577: #endif
1578: }
1579: }
1580: PetscViewerASCIIPrintf(viewer,"\n");
1581: }
1582: }
1583: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1584: }
1585: PetscViewerFlush(viewer);
1586: return(0);
1587: }
1589: #include <petscdraw.h>
1590: static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1591: {
1592: Mat A = (Mat) Aa;
1593: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
1594: PetscErrorCode ierr;
1595: PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1596: PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1597: MatScalar *aa;
1598: PetscViewer viewer;
1599: PetscViewerFormat format;
1602: PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);
1603: PetscViewerGetFormat(viewer,&format);
1604: PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);
1606: /* loop over matrix elements drawing boxes */
1608: if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1609: PetscDrawCollectiveBegin(draw);
1610: /* Blue for negative, Cyan for zero and Red for positive */
1611: color = PETSC_DRAW_BLUE;
1612: for (i=0,row=0; i<mbs; i++,row+=bs) {
1613: for (j=a->i[i]; j<a->i[i+1]; j++) {
1614: y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1615: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1616: aa = a->a + j*bs2;
1617: for (k=0; k<bs; k++) {
1618: for (l=0; l<bs; l++) {
1619: if (PetscRealPart(*aa++) >= 0.) continue;
1620: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1621: }
1622: }
1623: }
1624: }
1625: color = PETSC_DRAW_CYAN;
1626: for (i=0,row=0; i<mbs; i++,row+=bs) {
1627: for (j=a->i[i]; j<a->i[i+1]; j++) {
1628: y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1629: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1630: aa = a->a + j*bs2;
1631: for (k=0; k<bs; k++) {
1632: for (l=0; l<bs; l++) {
1633: if (PetscRealPart(*aa++) != 0.) continue;
1634: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1635: }
1636: }
1637: }
1638: }
1639: color = PETSC_DRAW_RED;
1640: for (i=0,row=0; i<mbs; i++,row+=bs) {
1641: for (j=a->i[i]; j<a->i[i+1]; j++) {
1642: y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1643: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1644: aa = a->a + j*bs2;
1645: for (k=0; k<bs; k++) {
1646: for (l=0; l<bs; l++) {
1647: if (PetscRealPart(*aa++) <= 0.) continue;
1648: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1649: }
1650: }
1651: }
1652: }
1653: PetscDrawCollectiveEnd(draw);
1654: } else {
1655: /* use contour shading to indicate magnitude of values */
1656: /* first determine max of all nonzero values */
1657: PetscReal minv = 0.0, maxv = 0.0;
1658: PetscDraw popup;
1660: for (i=0; i<a->nz*a->bs2; i++) {
1661: if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1662: }
1663: if (minv >= maxv) maxv = minv + PETSC_SMALL;
1664: PetscDrawGetPopup(draw,&popup);
1665: PetscDrawScalePopup(popup,0.0,maxv);
1667: PetscDrawCollectiveBegin(draw);
1668: for (i=0,row=0; i<mbs; i++,row+=bs) {
1669: for (j=a->i[i]; j<a->i[i+1]; j++) {
1670: y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1671: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1672: aa = a->a + j*bs2;
1673: for (k=0; k<bs; k++) {
1674: for (l=0; l<bs; l++) {
1675: MatScalar v = *aa++;
1676: color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1677: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1678: }
1679: }
1680: }
1681: }
1682: PetscDrawCollectiveEnd(draw);
1683: }
1684: return(0);
1685: }
1687: static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1688: {
1690: PetscReal xl,yl,xr,yr,w,h;
1691: PetscDraw draw;
1692: PetscBool isnull;
1695: PetscViewerDrawGetDraw(viewer,0,&draw);
1696: PetscDrawIsNull(draw,&isnull);
1697: if (isnull) return(0);
1699: xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1700: xr += w; yr += h; xl = -w; yl = -h;
1701: PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
1702: PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
1703: PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
1704: PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);
1705: PetscDrawSave(draw);
1706: return(0);
1707: }
1709: PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1710: {
1712: PetscBool iascii,isbinary,isdraw;
1715: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1716: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1717: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1718: if (iascii) {
1719: MatView_SeqBAIJ_ASCII(A,viewer);
1720: } else if (isbinary) {
1721: MatView_SeqBAIJ_Binary(A,viewer);
1722: } else if (isdraw) {
1723: MatView_SeqBAIJ_Draw(A,viewer);
1724: } else {
1725: Mat B;
1726: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
1727: MatView(B,viewer);
1728: MatDestroy(&B);
1729: }
1730: return(0);
1731: }
1734: PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1735: {
1736: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1737: PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1738: PetscInt *ai = a->i,*ailen = a->ilen;
1739: PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1740: MatScalar *ap,*aa = a->a;
1743: for (k=0; k<m; k++) { /* loop over rows */
1744: row = im[k]; brow = row/bs;
1745: if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1746: if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1747: rp = aj + ai[brow]; ap = aa + bs2*ai[brow];
1748: nrow = ailen[brow];
1749: for (l=0; l<n; l++) { /* loop over columns */
1750: if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1751: if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1752: col = in[l];
1753: bcol = col/bs;
1754: cidx = col%bs;
1755: ridx = row%bs;
1756: high = nrow;
1757: low = 0; /* assume unsorted */
1758: while (high-low > 5) {
1759: t = (low+high)/2;
1760: if (rp[t] > bcol) high = t;
1761: else low = t;
1762: }
1763: for (i=low; i<high; i++) {
1764: if (rp[i] > bcol) break;
1765: if (rp[i] == bcol) {
1766: *v++ = ap[bs2*i+bs*cidx+ridx];
1767: goto finished;
1768: }
1769: }
1770: *v++ = 0.0;
1771: finished:;
1772: }
1773: }
1774: return(0);
1775: }
1777: PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1778: {
1779: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1780: PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1781: PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1782: PetscErrorCode ierr;
1783: PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1784: PetscBool roworiented=a->roworiented;
1785: const PetscScalar *value = v;
1786: MatScalar *ap=NULL,*aa = a->a,*bap;
1789: if (roworiented) {
1790: stepval = (n-1)*bs;
1791: } else {
1792: stepval = (m-1)*bs;
1793: }
1794: for (k=0; k<m; k++) { /* loop over added rows */
1795: row = im[k];
1796: if (row < 0) continue;
1797: #if defined(PETSC_USE_DEBUG)
1798: if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1799: #endif
1800: rp = aj + ai[row];
1801: if (!A->structure_only) ap = aa + bs2*ai[row];
1802: rmax = imax[row];
1803: nrow = ailen[row];
1804: low = 0;
1805: high = nrow;
1806: for (l=0; l<n; l++) { /* loop over added columns */
1807: if (in[l] < 0) continue;
1808: #if defined(PETSC_USE_DEBUG)
1809: if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1810: #endif
1811: col = in[l];
1812: if (!A->structure_only) {
1813: if (roworiented) {
1814: value = v + (k*(stepval+bs) + l)*bs;
1815: } else {
1816: value = v + (l*(stepval+bs) + k)*bs;
1817: }
1818: }
1819: if (col <= lastcol) low = 0;
1820: else high = nrow;
1821: lastcol = col;
1822: while (high-low > 7) {
1823: t = (low+high)/2;
1824: if (rp[t] > col) high = t;
1825: else low = t;
1826: }
1827: for (i=low; i<high; i++) {
1828: if (rp[i] > col) break;
1829: if (rp[i] == col) {
1830: if (A->structure_only) goto noinsert2;
1831: bap = ap + bs2*i;
1832: if (roworiented) {
1833: if (is == ADD_VALUES) {
1834: for (ii=0; ii<bs; ii++,value+=stepval) {
1835: for (jj=ii; jj<bs2; jj+=bs) {
1836: bap[jj] += *value++;
1837: }
1838: }
1839: } else {
1840: for (ii=0; ii<bs; ii++,value+=stepval) {
1841: for (jj=ii; jj<bs2; jj+=bs) {
1842: bap[jj] = *value++;
1843: }
1844: }
1845: }
1846: } else {
1847: if (is == ADD_VALUES) {
1848: for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1849: for (jj=0; jj<bs; jj++) {
1850: bap[jj] += value[jj];
1851: }
1852: bap += bs;
1853: }
1854: } else {
1855: for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1856: for (jj=0; jj<bs; jj++) {
1857: bap[jj] = value[jj];
1858: }
1859: bap += bs;
1860: }
1861: }
1862: }
1863: goto noinsert2;
1864: }
1865: }
1866: if (nonew == 1) goto noinsert2;
1867: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1868: if (A->structure_only) {
1869: MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar);
1870: } else {
1871: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1872: }
1873: N = nrow++ - 1; high++;
1874: /* shift up all the later entries in this row */
1875: for (ii=N; ii>=i; ii--) {
1876: rp[ii+1] = rp[ii];
1877: if (!A->structure_only) {
1878: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1879: }
1880: }
1881: if (N >= i && !A->structure_only) {
1882: PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1883: }
1885: rp[i] = col;
1886: if (!A->structure_only) {
1887: bap = ap + bs2*i;
1888: if (roworiented) {
1889: for (ii=0; ii<bs; ii++,value+=stepval) {
1890: for (jj=ii; jj<bs2; jj+=bs) {
1891: bap[jj] = *value++;
1892: }
1893: }
1894: } else {
1895: for (ii=0; ii<bs; ii++,value+=stepval) {
1896: for (jj=0; jj<bs; jj++) {
1897: *bap++ = *value++;
1898: }
1899: }
1900: }
1901: }
1902: noinsert2:;
1903: low = i;
1904: }
1905: ailen[row] = nrow;
1906: }
1907: return(0);
1908: }
1910: PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1911: {
1912: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1913: PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1914: PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen;
1916: PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1917: MatScalar *aa = a->a,*ap;
1918: PetscReal ratio=0.6;
1921: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
1923: if (m) rmax = ailen[0];
1924: for (i=1; i<mbs; i++) {
1925: /* move each row back by the amount of empty slots (fshift) before it*/
1926: fshift += imax[i-1] - ailen[i-1];
1927: rmax = PetscMax(rmax,ailen[i]);
1928: if (fshift) {
1929: ip = aj + ai[i]; ap = aa + bs2*ai[i];
1930: N = ailen[i];
1931: for (j=0; j<N; j++) {
1932: ip[j-fshift] = ip[j];
1933: if (!A->structure_only) {
1934: PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));
1935: }
1936: }
1937: }
1938: ai[i] = ai[i-1] + ailen[i-1];
1939: }
1940: if (mbs) {
1941: fshift += imax[mbs-1] - ailen[mbs-1];
1942: ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1943: }
1945: /* reset ilen and imax for each row */
1946: a->nonzerorowcnt = 0;
1947: if (A->structure_only) {
1948: PetscFree2(a->imax,a->ilen);
1949: } else { /* !A->structure_only */
1950: for (i=0; i<mbs; i++) {
1951: ailen[i] = imax[i] = ai[i+1] - ai[i];
1952: a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1953: }
1954: }
1955: a->nz = ai[mbs];
1957: /* diagonals may have moved, so kill the diagonal pointers */
1958: a->idiagvalid = PETSC_FALSE;
1959: if (fshift && a->diag) {
1960: PetscFree(a->diag);
1961: PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));
1962: a->diag = 0;
1963: }
1964: if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1965: PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);
1966: PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);
1967: PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);
1969: A->info.mallocs += a->reallocs;
1970: a->reallocs = 0;
1971: A->info.nz_unneeded = (PetscReal)fshift*bs2;
1972: a->rmax = rmax;
1974: if (!A->structure_only) {
1975: MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);
1976: }
1977: return(0);
1978: }
1980: /*
1981: This function returns an array of flags which indicate the locations of contiguous
1982: blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9]
1983: then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1984: Assume: sizes should be long enough to hold all the values.
1985: */
1986: static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1987: {
1988: PetscInt i,j,k,row;
1989: PetscBool flg;
1992: for (i=0,j=0; i<n; j++) {
1993: row = idx[i];
1994: if (row%bs!=0) { /* Not the begining of a block */
1995: sizes[j] = 1;
1996: i++;
1997: } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1998: sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */
1999: i++;
2000: } else { /* Begining of the block, so check if the complete block exists */
2001: flg = PETSC_TRUE;
2002: for (k=1; k<bs; k++) {
2003: if (row+k != idx[i+k]) { /* break in the block */
2004: flg = PETSC_FALSE;
2005: break;
2006: }
2007: }
2008: if (flg) { /* No break in the bs */
2009: sizes[j] = bs;
2010: i += bs;
2011: } else {
2012: sizes[j] = 1;
2013: i++;
2014: }
2015: }
2016: }
2017: *bs_max = j;
2018: return(0);
2019: }
2021: PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2022: {
2023: Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
2024: PetscErrorCode ierr;
2025: PetscInt i,j,k,count,*rows;
2026: PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2027: PetscScalar zero = 0.0;
2028: MatScalar *aa;
2029: const PetscScalar *xx;
2030: PetscScalar *bb;
2033: /* fix right hand side if needed */
2034: if (x && b) {
2035: VecGetArrayRead(x,&xx);
2036: VecGetArray(b,&bb);
2037: for (i=0; i<is_n; i++) {
2038: bb[is_idx[i]] = diag*xx[is_idx[i]];
2039: }
2040: VecRestoreArrayRead(x,&xx);
2041: VecRestoreArray(b,&bb);
2042: }
2044: /* Make a copy of the IS and sort it */
2045: /* allocate memory for rows,sizes */
2046: PetscMalloc2(is_n,&rows,2*is_n,&sizes);
2048: /* copy IS values to rows, and sort them */
2049: for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2050: PetscSortInt(is_n,rows);
2052: if (baij->keepnonzeropattern) {
2053: for (i=0; i<is_n; i++) sizes[i] = 1;
2054: bs_max = is_n;
2055: } else {
2056: MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);
2057: A->nonzerostate++;
2058: }
2060: for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2061: row = rows[j];
2062: if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2063: count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2064: aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2065: if (sizes[i] == bs && !baij->keepnonzeropattern) {
2066: if (diag != (PetscScalar)0.0) {
2067: if (baij->ilen[row/bs] > 0) {
2068: baij->ilen[row/bs] = 1;
2069: baij->j[baij->i[row/bs]] = row/bs;
2071: PetscMemzero(aa,count*bs*sizeof(MatScalar));
2072: }
2073: /* Now insert all the diagonal values for this bs */
2074: for (k=0; k<bs; k++) {
2075: (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);
2076: }
2077: } else { /* (diag == 0.0) */
2078: baij->ilen[row/bs] = 0;
2079: } /* end (diag == 0.0) */
2080: } else { /* (sizes[i] != bs) */
2081: #if defined(PETSC_USE_DEBUG)
2082: if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2083: #endif
2084: for (k=0; k<count; k++) {
2085: aa[0] = zero;
2086: aa += bs;
2087: }
2088: if (diag != (PetscScalar)0.0) {
2089: (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);
2090: }
2091: }
2092: }
2094: PetscFree2(rows,sizes);
2095: MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
2096: return(0);
2097: }
2099: PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2100: {
2101: Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
2102: PetscErrorCode ierr;
2103: PetscInt i,j,k,count;
2104: PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col;
2105: PetscScalar zero = 0.0;
2106: MatScalar *aa;
2107: const PetscScalar *xx;
2108: PetscScalar *bb;
2109: PetscBool *zeroed,vecs = PETSC_FALSE;
2112: /* fix right hand side if needed */
2113: if (x && b) {
2114: VecGetArrayRead(x,&xx);
2115: VecGetArray(b,&bb);
2116: vecs = PETSC_TRUE;
2117: }
2119: /* zero the columns */
2120: PetscCalloc1(A->rmap->n,&zeroed);
2121: for (i=0; i<is_n; i++) {
2122: if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2123: zeroed[is_idx[i]] = PETSC_TRUE;
2124: }
2125: for (i=0; i<A->rmap->N; i++) {
2126: if (!zeroed[i]) {
2127: row = i/bs;
2128: for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2129: for (k=0; k<bs; k++) {
2130: col = bs*baij->j[j] + k;
2131: if (zeroed[col]) {
2132: aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2133: if (vecs) bb[i] -= aa[0]*xx[col];
2134: aa[0] = 0.0;
2135: }
2136: }
2137: }
2138: } else if (vecs) bb[i] = diag*xx[i];
2139: }
2140: PetscFree(zeroed);
2141: if (vecs) {
2142: VecRestoreArrayRead(x,&xx);
2143: VecRestoreArray(b,&bb);
2144: }
2146: /* zero the rows */
2147: for (i=0; i<is_n; i++) {
2148: row = is_idx[i];
2149: count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2150: aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2151: for (k=0; k<count; k++) {
2152: aa[0] = zero;
2153: aa += bs;
2154: }
2155: if (diag != (PetscScalar)0.0) {
2156: (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);
2157: }
2158: }
2159: MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
2160: return(0);
2161: }
2163: PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2164: {
2165: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2166: PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2167: PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2168: PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2170: PetscInt ridx,cidx,bs2=a->bs2;
2171: PetscBool roworiented=a->roworiented;
2172: MatScalar *ap=NULL,value=0.0,*aa=a->a,*bap;
2175: for (k=0; k<m; k++) { /* loop over added rows */
2176: row = im[k];
2177: brow = row/bs;
2178: if (row < 0) continue;
2179: #if defined(PETSC_USE_DEBUG)
2180: if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2181: #endif
2182: rp = aj + ai[brow];
2183: if (!A->structure_only) ap = aa + bs2*ai[brow];
2184: rmax = imax[brow];
2185: nrow = ailen[brow];
2186: low = 0;
2187: high = nrow;
2188: for (l=0; l<n; l++) { /* loop over added columns */
2189: if (in[l] < 0) continue;
2190: #if defined(PETSC_USE_DEBUG)
2191: if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2192: #endif
2193: col = in[l]; bcol = col/bs;
2194: ridx = row % bs; cidx = col % bs;
2195: if (!A->structure_only) {
2196: if (roworiented) {
2197: value = v[l + k*n];
2198: } else {
2199: value = v[k + l*m];
2200: }
2201: }
2202: if (col <= lastcol) low = 0; else high = nrow;
2203: lastcol = col;
2204: while (high-low > 7) {
2205: t = (low+high)/2;
2206: if (rp[t] > bcol) high = t;
2207: else low = t;
2208: }
2209: for (i=low; i<high; i++) {
2210: if (rp[i] > bcol) break;
2211: if (rp[i] == bcol) {
2212: bap = ap + bs2*i + bs*cidx + ridx;
2213: if (!A->structure_only) {
2214: if (is == ADD_VALUES) *bap += value;
2215: else *bap = value;
2216: }
2217: goto noinsert1;
2218: }
2219: }
2220: if (nonew == 1) goto noinsert1;
2221: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2222: if (A->structure_only) {
2223: MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar);
2224: } else {
2225: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2226: }
2227: N = nrow++ - 1; high++;
2228: /* shift up all the later entries in this row */
2229: for (ii=N; ii>=i; ii--) {
2230: rp[ii+1] = rp[ii];
2231: if (!A->structure_only) {
2232: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
2233: }
2234: }
2235: if (N>=i && !A->structure_only) {
2236: PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
2237: }
2238: rp[i] = bcol;
2239: if (!A->structure_only) ap[bs2*i + bs*cidx + ridx] = value;
2240: a->nz++;
2241: A->nonzerostate++;
2242: noinsert1:;
2243: low = i;
2244: }
2245: ailen[brow] = nrow;
2246: }
2247: return(0);
2248: }
2250: PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2251: {
2252: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
2253: Mat outA;
2255: PetscBool row_identity,col_identity;
2258: if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2259: ISIdentity(row,&row_identity);
2260: ISIdentity(col,&col_identity);
2261: if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2263: outA = inA;
2264: inA->factortype = MAT_FACTOR_LU;
2265: PetscFree(inA->solvertype);
2266: PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);
2268: MatMarkDiagonal_SeqBAIJ(inA);
2270: PetscObjectReference((PetscObject)row);
2271: ISDestroy(&a->row);
2272: a->row = row;
2273: PetscObjectReference((PetscObject)col);
2274: ISDestroy(&a->col);
2275: a->col = col;
2277: /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2278: ISDestroy(&a->icol);
2279: ISInvertPermutation(col,PETSC_DECIDE,&a->icol);
2280: PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);
2282: MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));
2283: if (!a->solve_work) {
2284: PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);
2285: PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));
2286: }
2287: MatLUFactorNumeric(outA,inA,info);
2288: return(0);
2289: }
2291: PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2292: {
2293: Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2294: PetscInt i,nz,mbs;
2297: nz = baij->maxnz;
2298: mbs = baij->mbs;
2299: for (i=0; i<nz; i++) {
2300: baij->j[i] = indices[i];
2301: }
2302: baij->nz = nz;
2303: for (i=0; i<mbs; i++) {
2304: baij->ilen[i] = baij->imax[i];
2305: }
2306: return(0);
2307: }
2309: /*@
2310: MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2311: in the matrix.
2313: Input Parameters:
2314: + mat - the SeqBAIJ matrix
2315: - indices - the column indices
2317: Level: advanced
2319: Notes:
2320: This can be called if you have precomputed the nonzero structure of the
2321: matrix and want to provide it to the matrix object to improve the performance
2322: of the MatSetValues() operation.
2324: You MUST have set the correct numbers of nonzeros per row in the call to
2325: MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2327: MUST be called before any calls to MatSetValues();
2329: @*/
2330: PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2331: {
2337: PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));
2338: return(0);
2339: }
2341: PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2342: {
2343: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2345: PetscInt i,j,n,row,bs,*ai,*aj,mbs;
2346: PetscReal atmp;
2347: PetscScalar *x,zero = 0.0;
2348: MatScalar *aa;
2349: PetscInt ncols,brow,krow,kcol;
2352: if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2353: bs = A->rmap->bs;
2354: aa = a->a;
2355: ai = a->i;
2356: aj = a->j;
2357: mbs = a->mbs;
2359: VecSet(v,zero);
2360: VecGetArray(v,&x);
2361: VecGetLocalSize(v,&n);
2362: if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2363: for (i=0; i<mbs; i++) {
2364: ncols = ai[1] - ai[0]; ai++;
2365: brow = bs*i;
2366: for (j=0; j<ncols; j++) {
2367: for (kcol=0; kcol<bs; kcol++) {
2368: for (krow=0; krow<bs; krow++) {
2369: atmp = PetscAbsScalar(*aa);aa++;
2370: row = brow + krow; /* row index */
2371: if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2372: }
2373: }
2374: aj++;
2375: }
2376: }
2377: VecRestoreArray(v,&x);
2378: return(0);
2379: }
2381: PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2382: {
2386: /* If the two matrices have the same copy implementation, use fast copy. */
2387: if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2388: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2389: Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2390: PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2392: if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2393: if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2394: PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));
2395: PetscObjectStateIncrease((PetscObject)B);
2396: } else {
2397: MatCopy_Basic(A,B,str);
2398: }
2399: return(0);
2400: }
2402: PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2403: {
2407: MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);
2408: return(0);
2409: }
2411: PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2412: {
2413: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2416: *array = a->a;
2417: return(0);
2418: }
2420: PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2421: {
2423: return(0);
2424: }
2426: PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2427: {
2428: PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2429: Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data;
2430: Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data;
2434: /* Set the number of nonzeros in the new matrix */
2435: MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);
2436: return(0);
2437: }
2439: PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2440: {
2441: Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2443: PetscInt bs=Y->rmap->bs,bs2=bs*bs;
2444: PetscBLASInt one=1;
2447: if (str == SAME_NONZERO_PATTERN) {
2448: PetscScalar alpha = a;
2449: PetscBLASInt bnz;
2450: PetscBLASIntCast(x->nz*bs2,&bnz);
2451: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2452: PetscObjectStateIncrease((PetscObject)Y);
2453: } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2454: MatAXPY_Basic(Y,a,X,str);
2455: } else {
2456: Mat B;
2457: PetscInt *nnz;
2458: if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2459: PetscMalloc1(Y->rmap->N,&nnz);
2460: MatCreate(PetscObjectComm((PetscObject)Y),&B);
2461: PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);
2462: MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);
2463: MatSetBlockSizesFromMats(B,Y,Y);
2464: MatSetType(B,(MatType) ((PetscObject)Y)->type_name);
2465: MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);
2466: MatSeqBAIJSetPreallocation(B,bs,0,nnz);
2467: MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
2468: MatHeaderReplace(Y,&B);
2469: PetscFree(nnz);
2470: }
2471: return(0);
2472: }
2474: PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2475: {
2476: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2477: PetscInt i,nz = a->bs2*a->i[a->mbs];
2478: MatScalar *aa = a->a;
2481: for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2482: return(0);
2483: }
2485: PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2486: {
2487: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2488: PetscInt i,nz = a->bs2*a->i[a->mbs];
2489: MatScalar *aa = a->a;
2492: for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2493: return(0);
2494: }
2496: /*
2497: Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2498: */
2499: PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done)
2500: {
2501: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2503: PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2504: PetscInt nz = a->i[m],row,*jj,mr,col;
2507: *nn = n;
2508: if (!ia) return(0);
2509: if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2510: else {
2511: PetscCalloc1(n+1,&collengths);
2512: PetscMalloc1(n+1,&cia);
2513: PetscMalloc1(nz+1,&cja);
2514: jj = a->j;
2515: for (i=0; i<nz; i++) {
2516: collengths[jj[i]]++;
2517: }
2518: cia[0] = oshift;
2519: for (i=0; i<n; i++) {
2520: cia[i+1] = cia[i] + collengths[i];
2521: }
2522: PetscMemzero(collengths,n*sizeof(PetscInt));
2523: jj = a->j;
2524: for (row=0; row<m; row++) {
2525: mr = a->i[row+1] - a->i[row];
2526: for (i=0; i<mr; i++) {
2527: col = *jj++;
2529: cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2530: }
2531: }
2532: PetscFree(collengths);
2533: *ia = cia; *ja = cja;
2534: }
2535: return(0);
2536: }
2538: PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done)
2539: {
2543: if (!ia) return(0);
2544: PetscFree(*ia);
2545: PetscFree(*ja);
2546: return(0);
2547: }
2549: /*
2550: MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2551: MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2552: spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2553: */
2554: PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done)
2555: {
2556: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2558: PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2559: PetscInt nz = a->i[m],row,*jj,mr,col;
2560: PetscInt *cspidx;
2563: *nn = n;
2564: if (!ia) return(0);
2566: PetscCalloc1(n+1,&collengths);
2567: PetscMalloc1(n+1,&cia);
2568: PetscMalloc1(nz+1,&cja);
2569: PetscMalloc1(nz+1,&cspidx);
2570: jj = a->j;
2571: for (i=0; i<nz; i++) {
2572: collengths[jj[i]]++;
2573: }
2574: cia[0] = oshift;
2575: for (i=0; i<n; i++) {
2576: cia[i+1] = cia[i] + collengths[i];
2577: }
2578: PetscMemzero(collengths,n*sizeof(PetscInt));
2579: jj = a->j;
2580: for (row=0; row<m; row++) {
2581: mr = a->i[row+1] - a->i[row];
2582: for (i=0; i<mr; i++) {
2583: col = *jj++;
2584: cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2585: cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2586: }
2587: }
2588: PetscFree(collengths);
2589: *ia = cia; *ja = cja;
2590: *spidx = cspidx;
2591: return(0);
2592: }
2594: PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done)
2595: {
2599: MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);
2600: PetscFree(*spidx);
2601: return(0);
2602: }
2604: PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2605: {
2607: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data;
2610: if (!Y->preallocated || !aij->nz) {
2611: MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);
2612: }
2613: MatShift_Basic(Y,a);
2614: return(0);
2615: }
2617: /* -------------------------------------------------------------------*/
2618: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2619: MatGetRow_SeqBAIJ,
2620: MatRestoreRow_SeqBAIJ,
2621: MatMult_SeqBAIJ_N,
2622: /* 4*/ MatMultAdd_SeqBAIJ_N,
2623: MatMultTranspose_SeqBAIJ,
2624: MatMultTransposeAdd_SeqBAIJ,
2625: 0,
2626: 0,
2627: 0,
2628: /* 10*/ 0,
2629: MatLUFactor_SeqBAIJ,
2630: 0,
2631: 0,
2632: MatTranspose_SeqBAIJ,
2633: /* 15*/ MatGetInfo_SeqBAIJ,
2634: MatEqual_SeqBAIJ,
2635: MatGetDiagonal_SeqBAIJ,
2636: MatDiagonalScale_SeqBAIJ,
2637: MatNorm_SeqBAIJ,
2638: /* 20*/ 0,
2639: MatAssemblyEnd_SeqBAIJ,
2640: MatSetOption_SeqBAIJ,
2641: MatZeroEntries_SeqBAIJ,
2642: /* 24*/ MatZeroRows_SeqBAIJ,
2643: 0,
2644: 0,
2645: 0,
2646: 0,
2647: /* 29*/ MatSetUp_SeqBAIJ,
2648: 0,
2649: 0,
2650: 0,
2651: 0,
2652: /* 34*/ MatDuplicate_SeqBAIJ,
2653: 0,
2654: 0,
2655: MatILUFactor_SeqBAIJ,
2656: 0,
2657: /* 39*/ MatAXPY_SeqBAIJ,
2658: MatCreateSubMatrices_SeqBAIJ,
2659: MatIncreaseOverlap_SeqBAIJ,
2660: MatGetValues_SeqBAIJ,
2661: MatCopy_SeqBAIJ,
2662: /* 44*/ 0,
2663: MatScale_SeqBAIJ,
2664: MatShift_SeqBAIJ,
2665: 0,
2666: MatZeroRowsColumns_SeqBAIJ,
2667: /* 49*/ 0,
2668: MatGetRowIJ_SeqBAIJ,
2669: MatRestoreRowIJ_SeqBAIJ,
2670: MatGetColumnIJ_SeqBAIJ,
2671: MatRestoreColumnIJ_SeqBAIJ,
2672: /* 54*/ MatFDColoringCreate_SeqXAIJ,
2673: 0,
2674: 0,
2675: 0,
2676: MatSetValuesBlocked_SeqBAIJ,
2677: /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2678: MatDestroy_SeqBAIJ,
2679: MatView_SeqBAIJ,
2680: 0,
2681: 0,
2682: /* 64*/ 0,
2683: 0,
2684: 0,
2685: 0,
2686: 0,
2687: /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2688: 0,
2689: MatConvert_Basic,
2690: 0,
2691: 0,
2692: /* 74*/ 0,
2693: MatFDColoringApply_BAIJ,
2694: 0,
2695: 0,
2696: 0,
2697: /* 79*/ 0,
2698: 0,
2699: 0,
2700: 0,
2701: MatLoad_SeqBAIJ,
2702: /* 84*/ 0,
2703: 0,
2704: 0,
2705: 0,
2706: 0,
2707: /* 89*/ 0,
2708: 0,
2709: 0,
2710: 0,
2711: 0,
2712: /* 94*/ 0,
2713: 0,
2714: 0,
2715: 0,
2716: 0,
2717: /* 99*/ 0,
2718: 0,
2719: 0,
2720: 0,
2721: 0,
2722: /*104*/ 0,
2723: MatRealPart_SeqBAIJ,
2724: MatImaginaryPart_SeqBAIJ,
2725: 0,
2726: 0,
2727: /*109*/ 0,
2728: 0,
2729: 0,
2730: 0,
2731: MatMissingDiagonal_SeqBAIJ,
2732: /*114*/ 0,
2733: 0,
2734: 0,
2735: 0,
2736: 0,
2737: /*119*/ 0,
2738: 0,
2739: MatMultHermitianTranspose_SeqBAIJ,
2740: MatMultHermitianTransposeAdd_SeqBAIJ,
2741: 0,
2742: /*124*/ 0,
2743: 0,
2744: MatInvertBlockDiagonal_SeqBAIJ,
2745: 0,
2746: 0,
2747: /*129*/ 0,
2748: 0,
2749: 0,
2750: 0,
2751: 0,
2752: /*134*/ 0,
2753: 0,
2754: 0,
2755: 0,
2756: 0,
2757: /*139*/ MatSetBlockSizes_Default,
2758: 0,
2759: 0,
2760: MatFDColoringSetUp_SeqXAIJ,
2761: 0,
2762: /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
2763: MatDestroySubMatrices_SeqBAIJ
2764: };
2766: PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
2767: {
2768: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data;
2769: PetscInt nz = aij->i[aij->mbs]*aij->bs2;
2773: if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2775: /* allocate space for values if not already there */
2776: if (!aij->saved_values) {
2777: PetscMalloc1(nz+1,&aij->saved_values);
2778: PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));
2779: }
2781: /* copy values over */
2782: PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));
2783: return(0);
2784: }
2786: PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
2787: {
2788: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data;
2790: PetscInt nz = aij->i[aij->mbs]*aij->bs2;
2793: if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2794: if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2796: /* copy values over */
2797: PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));
2798: return(0);
2799: }
2801: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2802: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2804: PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2805: {
2806: Mat_SeqBAIJ *b;
2808: PetscInt i,mbs,nbs,bs2;
2809: PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2812: if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2813: if (nz == MAT_SKIP_ALLOCATION) {
2814: skipallocation = PETSC_TRUE;
2815: nz = 0;
2816: }
2818: MatSetBlockSize(B,PetscAbs(bs));
2819: PetscLayoutSetUp(B->rmap);
2820: PetscLayoutSetUp(B->cmap);
2821: PetscLayoutGetBlockSize(B->rmap,&bs);
2823: B->preallocated = PETSC_TRUE;
2825: mbs = B->rmap->n/bs;
2826: nbs = B->cmap->n/bs;
2827: bs2 = bs*bs;
2829: if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2831: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2832: if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2833: if (nnz) {
2834: for (i=0; i<mbs; i++) {
2835: if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2836: if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2837: }
2838: }
2840: b = (Mat_SeqBAIJ*)B->data;
2841: PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");
2842: PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);
2843: PetscOptionsEnd();
2845: if (!flg) {
2846: switch (bs) {
2847: case 1:
2848: B->ops->mult = MatMult_SeqBAIJ_1;
2849: B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2850: break;
2851: case 2:
2852: B->ops->mult = MatMult_SeqBAIJ_2;
2853: B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2854: break;
2855: case 3:
2856: B->ops->mult = MatMult_SeqBAIJ_3;
2857: B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2858: break;
2859: case 4:
2860: B->ops->mult = MatMult_SeqBAIJ_4;
2861: B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2862: break;
2863: case 5:
2864: B->ops->mult = MatMult_SeqBAIJ_5;
2865: B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2866: break;
2867: case 6:
2868: B->ops->mult = MatMult_SeqBAIJ_6;
2869: B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2870: break;
2871: case 7:
2872: B->ops->mult = MatMult_SeqBAIJ_7;
2873: B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2874: break;
2875: case 11:
2876: B->ops->mult = MatMult_SeqBAIJ_11;
2877: B->ops->multadd = MatMultAdd_SeqBAIJ_11;
2878: break;
2879: case 15:
2880: B->ops->mult = MatMult_SeqBAIJ_15_ver1;
2881: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2882: break;
2883: default:
2884: B->ops->mult = MatMult_SeqBAIJ_N;
2885: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2886: break;
2887: }
2888: }
2889: B->ops->sor = MatSOR_SeqBAIJ;
2890: b->mbs = mbs;
2891: b->nbs = nbs;
2892: if (!skipallocation) {
2893: if (!b->imax) {
2894: PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);
2895: PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));
2897: b->free_imax_ilen = PETSC_TRUE;
2898: }
2899: /* b->ilen will count nonzeros in each block row so far. */
2900: for (i=0; i<mbs; i++) b->ilen[i] = 0;
2901: if (!nnz) {
2902: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2903: else if (nz < 0) nz = 1;
2904: for (i=0; i<mbs; i++) b->imax[i] = nz;
2905: nz = nz*mbs;
2906: } else {
2907: nz = 0;
2908: for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2909: }
2911: /* allocate the matrix space */
2912: MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);
2913: if (B->structure_only) {
2914: PetscMalloc1(nz,&b->j);
2915: PetscMalloc1(B->rmap->N+1,&b->i);
2916: PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));
2917: } else {
2918: PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);
2919: PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));
2920: PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));
2921: }
2922: PetscMemzero(b->j,nz*sizeof(PetscInt));
2924: if (B->structure_only) {
2925: b->singlemalloc = PETSC_FALSE;
2926: b->free_a = PETSC_FALSE;
2927: } else {
2928: b->singlemalloc = PETSC_TRUE;
2929: b->free_a = PETSC_TRUE;
2930: }
2931: b->free_ij = PETSC_TRUE;
2933: b->i[0] = 0;
2934: for (i=1; i<mbs+1; i++) {
2935: b->i[i] = b->i[i-1] + b->imax[i-1];
2936: }
2938: } else {
2939: b->free_a = PETSC_FALSE;
2940: b->free_ij = PETSC_FALSE;
2941: }
2943: b->bs2 = bs2;
2944: b->mbs = mbs;
2945: b->nz = 0;
2946: b->maxnz = nz;
2947: B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2948: B->was_assembled = PETSC_FALSE;
2949: B->assembled = PETSC_FALSE;
2950: if (realalloc) {MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);}
2951: return(0);
2952: }
2954: PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2955: {
2956: PetscInt i,m,nz,nz_max=0,*nnz;
2957: PetscScalar *values=0;
2958: PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2962: if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2963: PetscLayoutSetBlockSize(B->rmap,bs);
2964: PetscLayoutSetBlockSize(B->cmap,bs);
2965: PetscLayoutSetUp(B->rmap);
2966: PetscLayoutSetUp(B->cmap);
2967: PetscLayoutGetBlockSize(B->rmap,&bs);
2968: m = B->rmap->n/bs;
2970: if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2971: PetscMalloc1(m+1, &nnz);
2972: for (i=0; i<m; i++) {
2973: nz = ii[i+1]- ii[i];
2974: if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2975: nz_max = PetscMax(nz_max, nz);
2976: nnz[i] = nz;
2977: }
2978: MatSeqBAIJSetPreallocation(B,bs,0,nnz);
2979: PetscFree(nnz);
2981: values = (PetscScalar*)V;
2982: if (!values) {
2983: PetscCalloc1(bs*bs*(nz_max+1),&values);
2984: }
2985: for (i=0; i<m; i++) {
2986: PetscInt ncols = ii[i+1] - ii[i];
2987: const PetscInt *icols = jj + ii[i];
2988: const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2989: if (!roworiented) {
2990: MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);
2991: } else {
2992: PetscInt j;
2993: for (j=0; j<ncols; j++) {
2994: const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2995: MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);
2996: }
2997: }
2998: }
2999: if (!V) { PetscFree(values); }
3000: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3001: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3002: MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3003: return(0);
3004: }
3006: /*MC
3007: MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3008: block sparse compressed row format.
3010: Options Database Keys:
3011: . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3013: Level: beginner
3015: .seealso: MatCreateSeqBAIJ()
3016: M*/
3018: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3020: PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3021: {
3023: PetscMPIInt size;
3024: Mat_SeqBAIJ *b;
3027: MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);
3028: if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3030: PetscNewLog(B,&b);
3031: B->data = (void*)b;
3032: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
3034: b->row = 0;
3035: b->col = 0;
3036: b->icol = 0;
3037: b->reallocs = 0;
3038: b->saved_values = 0;
3040: b->roworiented = PETSC_TRUE;
3041: b->nonew = 0;
3042: b->diag = 0;
3043: B->spptr = 0;
3044: B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2;
3045: b->keepnonzeropattern = PETSC_FALSE;
3047: PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);
3048: PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);
3049: PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);
3050: PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);
3051: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);
3052: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);
3053: PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);
3054: PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);
3055: PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);
3056: #if defined(PETSC_HAVE_HYPRE)
3057: PetscObjectComposeFunction((PetscObject)B,"MatConvert_baij_hypre_C",MatConvert_AIJ_HYPRE);
3058: #endif
3059: PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);
3060: return(0);
3061: }
3063: PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3064: {
3065: Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3067: PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3070: if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3072: if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3073: c->imax = a->imax;
3074: c->ilen = a->ilen;
3075: c->free_imax_ilen = PETSC_FALSE;
3076: } else {
3077: PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);
3078: PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));
3079: for (i=0; i<mbs; i++) {
3080: c->imax[i] = a->imax[i];
3081: c->ilen[i] = a->ilen[i];
3082: }
3083: c->free_imax_ilen = PETSC_TRUE;
3084: }
3086: /* allocate the matrix space */
3087: if (mallocmatspace) {
3088: if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3089: PetscCalloc1(bs2*nz,&c->a);
3090: PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));
3092: c->i = a->i;
3093: c->j = a->j;
3094: c->singlemalloc = PETSC_FALSE;
3095: c->free_a = PETSC_TRUE;
3096: c->free_ij = PETSC_FALSE;
3097: c->parent = A;
3098: C->preallocated = PETSC_TRUE;
3099: C->assembled = PETSC_TRUE;
3101: PetscObjectReference((PetscObject)A);
3102: MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3103: MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3104: } else {
3105: PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);
3106: PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));
3108: c->singlemalloc = PETSC_TRUE;
3109: c->free_a = PETSC_TRUE;
3110: c->free_ij = PETSC_TRUE;
3112: PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));
3113: if (mbs > 0) {
3114: PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));
3115: if (cpvalues == MAT_COPY_VALUES) {
3116: PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));
3117: } else {
3118: PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));
3119: }
3120: }
3121: C->preallocated = PETSC_TRUE;
3122: C->assembled = PETSC_TRUE;
3123: }
3124: }
3126: c->roworiented = a->roworiented;
3127: c->nonew = a->nonew;
3129: PetscLayoutReference(A->rmap,&C->rmap);
3130: PetscLayoutReference(A->cmap,&C->cmap);
3132: c->bs2 = a->bs2;
3133: c->mbs = a->mbs;
3134: c->nbs = a->nbs;
3136: if (a->diag) {
3137: if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3138: c->diag = a->diag;
3139: c->free_diag = PETSC_FALSE;
3140: } else {
3141: PetscMalloc1(mbs+1,&c->diag);
3142: PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));
3143: for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3144: c->free_diag = PETSC_TRUE;
3145: }
3146: } else c->diag = 0;
3148: c->nz = a->nz;
3149: c->maxnz = a->nz; /* Since we allocate exactly the right amount */
3150: c->solve_work = NULL;
3151: c->mult_work = NULL;
3152: c->sor_workt = NULL;
3153: c->sor_work = NULL;
3155: c->compressedrow.use = a->compressedrow.use;
3156: c->compressedrow.nrows = a->compressedrow.nrows;
3157: if (a->compressedrow.use) {
3158: i = a->compressedrow.nrows;
3159: PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);
3160: PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));
3161: PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));
3162: PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));
3163: } else {
3164: c->compressedrow.use = PETSC_FALSE;
3165: c->compressedrow.i = NULL;
3166: c->compressedrow.rindex = NULL;
3167: }
3168: C->nonzerostate = A->nonzerostate;
3170: PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);
3171: return(0);
3172: }
3174: PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3175: {
3179: MatCreate(PetscObjectComm((PetscObject)A),B);
3180: MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);
3181: MatSetType(*B,MATSEQBAIJ);
3182: MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);
3183: return(0);
3184: }
3186: PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3187: {
3188: Mat_SeqBAIJ *a;
3190: PetscInt i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs;
3191: PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3192: PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3193: PetscInt *masked,nmask,tmp,bs2,ishift;
3194: PetscMPIInt size;
3195: int fd;
3196: PetscScalar *aa;
3197: MPI_Comm comm;
3200: /* force binary viewer to load .info file if it has not yet done so */
3201: PetscViewerSetUp(viewer);
3202: PetscObjectGetComm((PetscObject)viewer,&comm);
3203: PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");
3204: PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);
3205: PetscOptionsEnd();
3206: if (bs < 0) bs = 1;
3207: bs2 = bs*bs;
3209: MPI_Comm_size(comm,&size);
3210: if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3211: PetscViewerBinaryGetDescriptor(viewer,&fd);
3212: PetscBinaryRead(fd,header,4,PETSC_INT);
3213: if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3214: M = header[1]; N = header[2]; nz = header[3];
3216: if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3217: if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3219: /*
3220: This code adds extra rows to make sure the number of rows is
3221: divisible by the blocksize
3222: */
3223: mbs = M/bs;
3224: extra_rows = bs - M + bs*(mbs);
3225: if (extra_rows == bs) extra_rows = 0;
3226: else mbs++;
3227: if (extra_rows) {
3228: PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");
3229: }
3231: /* Set global sizes if not already set */
3232: if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3233: MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
3234: } else { /* Check if the matrix global sizes are correct */
3235: MatGetSize(newmat,&rows,&cols);
3236: if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3237: MatGetLocalSize(newmat,&rows,&cols);
3238: }
3239: if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3240: }
3242: /* read in row lengths */
3243: PetscMalloc1(M+extra_rows,&rowlengths);
3244: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
3245: for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3247: /* read in column indices */
3248: PetscMalloc1(nz+extra_rows,&jj);
3249: PetscBinaryRead(fd,jj,nz,PETSC_INT);
3250: for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3252: /* loop over row lengths determining block row lengths */
3253: PetscCalloc1(mbs,&browlengths);
3254: PetscMalloc2(mbs,&mask,mbs,&masked);
3255: PetscMemzero(mask,mbs*sizeof(PetscInt));
3256: rowcount = 0;
3257: nzcount = 0;
3258: for (i=0; i<mbs; i++) {
3259: nmask = 0;
3260: for (j=0; j<bs; j++) {
3261: kmax = rowlengths[rowcount];
3262: for (k=0; k<kmax; k++) {
3263: tmp = jj[nzcount++]/bs;
3264: if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3265: }
3266: rowcount++;
3267: }
3268: browlengths[i] += nmask;
3269: /* zero out the mask elements we set */
3270: for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3271: }
3273: /* Do preallocation */
3274: MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);
3275: a = (Mat_SeqBAIJ*)newmat->data;
3277: /* set matrix "i" values */
3278: a->i[0] = 0;
3279: for (i=1; i<= mbs; i++) {
3280: a->i[i] = a->i[i-1] + browlengths[i-1];
3281: a->ilen[i-1] = browlengths[i-1];
3282: }
3283: a->nz = 0;
3284: for (i=0; i<mbs; i++) a->nz += browlengths[i];
3286: /* read in nonzero values */
3287: PetscMalloc1(nz+extra_rows,&aa);
3288: PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);
3289: for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3291: /* set "a" and "j" values into matrix */
3292: nzcount = 0; jcount = 0;
3293: for (i=0; i<mbs; i++) {
3294: nzcountb = nzcount;
3295: nmask = 0;
3296: for (j=0; j<bs; j++) {
3297: kmax = rowlengths[i*bs+j];
3298: for (k=0; k<kmax; k++) {
3299: tmp = jj[nzcount++]/bs;
3300: if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3301: }
3302: }
3303: /* sort the masked values */
3304: PetscSortInt(nmask,masked);
3306: /* set "j" values into matrix */
3307: maskcount = 1;
3308: for (j=0; j<nmask; j++) {
3309: a->j[jcount++] = masked[j];
3310: mask[masked[j]] = maskcount++;
3311: }
3312: /* set "a" values into matrix */
3313: ishift = bs2*a->i[i];
3314: for (j=0; j<bs; j++) {
3315: kmax = rowlengths[i*bs+j];
3316: for (k=0; k<kmax; k++) {
3317: tmp = jj[nzcountb]/bs;
3318: block = mask[tmp] - 1;
3319: point = jj[nzcountb] - bs*tmp;
3320: idx = ishift + bs2*block + j + bs*point;
3321: a->a[idx] = (MatScalar)aa[nzcountb++];
3322: }
3323: }
3324: /* zero out the mask elements we set */
3325: for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3326: }
3327: if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3329: PetscFree(rowlengths);
3330: PetscFree(browlengths);
3331: PetscFree(aa);
3332: PetscFree(jj);
3333: PetscFree2(mask,masked);
3335: MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
3336: MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
3337: return(0);
3338: }
3340: /*@C
3341: MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3342: compressed row) format. For good matrix assembly performance the
3343: user should preallocate the matrix storage by setting the parameter nz
3344: (or the array nnz). By setting these parameters accurately, performance
3345: during matrix assembly can be increased by more than a factor of 50.
3347: Collective on MPI_Comm
3349: Input Parameters:
3350: + comm - MPI communicator, set to PETSC_COMM_SELF
3351: . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3352: blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3353: . m - number of rows
3354: . n - number of columns
3355: . nz - number of nonzero blocks per block row (same for all rows)
3356: - nnz - array containing the number of nonzero blocks in the various block rows
3357: (possibly different for each block row) or NULL
3359: Output Parameter:
3360: . A - the matrix
3362: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3363: MatXXXXSetPreallocation() paradgm instead of this routine directly.
3364: [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3366: Options Database Keys:
3367: . -mat_no_unroll - uses code that does not unroll the loops in the
3368: block calculations (much slower)
3369: . -mat_block_size - size of the blocks to use
3371: Level: intermediate
3373: Notes:
3374: The number of rows and columns must be divisible by blocksize.
3376: If the nnz parameter is given then the nz parameter is ignored
3378: A nonzero block is any block that as 1 or more nonzeros in it
3380: The block AIJ format is fully compatible with standard Fortran 77
3381: storage. That is, the stored row and column indices can begin at
3382: either one (as in Fortran) or zero. See the users' manual for details.
3384: Specify the preallocated storage with either nz or nnz (not both).
3385: Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3386: allocation. See Users-Manual: ch_mat for details.
3387: matrices.
3389: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3390: @*/
3391: PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3392: {
3396: MatCreate(comm,A);
3397: MatSetSizes(*A,m,n,m,n);
3398: MatSetType(*A,MATSEQBAIJ);
3399: MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);
3400: return(0);
3401: }
3403: /*@C
3404: MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3405: per row in the matrix. For good matrix assembly performance the
3406: user should preallocate the matrix storage by setting the parameter nz
3407: (or the array nnz). By setting these parameters accurately, performance
3408: during matrix assembly can be increased by more than a factor of 50.
3410: Collective on MPI_Comm
3412: Input Parameters:
3413: + B - the matrix
3414: . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3415: blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3416: . nz - number of block nonzeros per block row (same for all rows)
3417: - nnz - array containing the number of block nonzeros in the various block rows
3418: (possibly different for each block row) or NULL
3420: Options Database Keys:
3421: . -mat_no_unroll - uses code that does not unroll the loops in the
3422: block calculations (much slower)
3423: . -mat_block_size - size of the blocks to use
3425: Level: intermediate
3427: Notes:
3428: If the nnz parameter is given then the nz parameter is ignored
3430: You can call MatGetInfo() to get information on how effective the preallocation was;
3431: for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3432: You can also run with the option -info and look for messages with the string
3433: malloc in them to see if additional memory allocation was needed.
3435: The block AIJ format is fully compatible with standard Fortran 77
3436: storage. That is, the stored row and column indices can begin at
3437: either one (as in Fortran) or zero. See the users' manual for details.
3439: Specify the preallocated storage with either nz or nnz (not both).
3440: Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3441: allocation. See Users-Manual: ch_mat for details.
3443: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3444: @*/
3445: PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3446: {
3453: PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));
3454: return(0);
3455: }
3457: /*@C
3458: MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3459: (the default sequential PETSc format).
3461: Collective on MPI_Comm
3463: Input Parameters:
3464: + B - the matrix
3465: . i - the indices into j for the start of each local row (starts with zero)
3466: . j - the column indices for each local row (starts with zero) these must be sorted for each row
3467: - v - optional values in the matrix
3469: Level: developer
3471: Notes:
3472: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs
3473: may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3474: over rows within a block and the last index is over columns within a block row. Fortran programs will likely set
3475: MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3476: block column and the second index is over columns within a block.
3478: .keywords: matrix, aij, compressed row, sparse
3480: .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3481: @*/
3482: PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3483: {
3490: PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));
3491: return(0);
3492: }
3495: /*@
3496: MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3498: Collective on MPI_Comm
3500: Input Parameters:
3501: + comm - must be an MPI communicator of size 1
3502: . bs - size of block
3503: . m - number of rows
3504: . n - number of columns
3505: . i - row indices
3506: . j - column indices
3507: - a - matrix values
3509: Output Parameter:
3510: . mat - the matrix
3512: Level: advanced
3514: Notes:
3515: The i, j, and a arrays are not copied by this routine, the user must free these arrays
3516: once the matrix is destroyed
3518: You cannot set new nonzero locations into this matrix, that will generate an error.
3520: The i and j indices are 0 based
3522: When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3524: The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3525: the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3526: block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory
3527: with column-major ordering within blocks.
3529: .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3531: @*/
3532: PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat)
3533: {
3535: PetscInt ii;
3536: Mat_SeqBAIJ *baij;
3539: if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3540: if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3542: MatCreate(comm,mat);
3543: MatSetSizes(*mat,m,n,m,n);
3544: MatSetType(*mat,MATSEQBAIJ);
3545: MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);
3546: baij = (Mat_SeqBAIJ*)(*mat)->data;
3547: PetscMalloc2(m,&baij->imax,m,&baij->ilen);
3548: PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));
3550: baij->i = i;
3551: baij->j = j;
3552: baij->a = a;
3554: baij->singlemalloc = PETSC_FALSE;
3555: baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3556: baij->free_a = PETSC_FALSE;
3557: baij->free_ij = PETSC_FALSE;
3559: for (ii=0; ii<m; ii++) {
3560: baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3561: #if defined(PETSC_USE_DEBUG)
3562: if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3563: #endif
3564: }
3565: #if defined(PETSC_USE_DEBUG)
3566: for (ii=0; ii<baij->i[m]; ii++) {
3567: if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3568: if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3569: }
3570: #endif
3572: MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
3573: MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
3574: return(0);
3575: }
3577: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3578: {
3580: PetscMPIInt size;
3583: MPI_Comm_size(comm,&size);
3584: if (size == 1 && scall == MAT_REUSE_MATRIX) {
3585: MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);
3586: } else {
3587: MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);
3588: }
3589: return(0);
3590: }