Actual source code: baij.c
petsc-3.7.7 2017-09-25
2: /*
3: Defines the basic matrix operations for the BAIJ (compressed row)
4: matrix storage format.
5: */
6: #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/
7: #include <petscblaslapack.h>
8: #include <petsc/private/kernels/blockinvert.h>
9: #include <petsc/private/kernels/blockmatmult.h>
13: PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
14: {
15: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data;
17: PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18: MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19: PetscReal shift = 0.0;
20: PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE;
23: allowzeropivot = PetscNot(A->erroriffailure);
25: if (a->idiagvalid) {
26: if (values) *values = a->idiag;
27: return(0);
28: }
29: MatMarkDiagonal_SeqBAIJ(A);
30: diag_offset = a->diag;
31: if (!a->idiag) {
32: PetscMalloc1(2*bs2*mbs,&a->idiag);
33: PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));
34: }
35: diag = a->idiag;
36: mdiag = a->idiag+bs2*mbs;
37: if (values) *values = a->idiag;
38: /* factor and invert each block */
39: switch (bs) {
40: case 1:
41: for (i=0; i<mbs; i++) {
42: odiag = v + 1*diag_offset[i];
43: diag[0] = odiag[0];
44: mdiag[0] = odiag[0];
45:
46: if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
47: if (allowzeropivot) {
48: A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
49: PetscInfo1(A,"Zero pivot, row %D\n",i);
50: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",i);
51: }
52:
53: diag[0] = (PetscScalar)1.0 / (diag[0] + shift);
54: diag += 1;
55: mdiag += 1;
56: }
57: break;
58: case 2:
59: for (i=0; i<mbs; i++) {
60: odiag = v + 4*diag_offset[i];
61: diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
62: mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
63: PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);
64: if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
65: diag += 4;
66: mdiag += 4;
67: }
68: break;
69: case 3:
70: for (i=0; i<mbs; i++) {
71: odiag = v + 9*diag_offset[i];
72: diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
73: diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
74: diag[8] = odiag[8];
75: mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
76: mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
77: mdiag[8] = odiag[8];
78: PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);
79: if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
80: diag += 9;
81: mdiag += 9;
82: }
83: break;
84: case 4:
85: for (i=0; i<mbs; i++) {
86: odiag = v + 16*diag_offset[i];
87: PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));
88: PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));
89: PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);
90: if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
91: diag += 16;
92: mdiag += 16;
93: }
94: break;
95: case 5:
96: for (i=0; i<mbs; i++) {
97: odiag = v + 25*diag_offset[i];
98: PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));
99: PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));
100: PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);
101: if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
102: diag += 25;
103: mdiag += 25;
104: }
105: break;
106: case 6:
107: for (i=0; i<mbs; i++) {
108: odiag = v + 36*diag_offset[i];
109: PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));
110: PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));
111: PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);
112: if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
113: diag += 36;
114: mdiag += 36;
115: }
116: break;
117: case 7:
118: for (i=0; i<mbs; i++) {
119: odiag = v + 49*diag_offset[i];
120: PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));
121: PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));
122: PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);
123: if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
124: diag += 49;
125: mdiag += 49;
126: }
127: break;
128: default:
129: PetscMalloc2(bs,&v_work,bs,&v_pivots);
130: for (i=0; i<mbs; i++) {
131: odiag = v + bs2*diag_offset[i];
132: PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));
133: PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));
134: PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);
135: if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
136: diag += bs2;
137: mdiag += bs2;
138: }
139: PetscFree2(v_work,v_pivots);
140: }
141: a->idiagvalid = PETSC_TRUE;
142: return(0);
143: }
147: PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
148: {
149: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
150: PetscScalar *x,*work,*w,*workt,*t;
151: const MatScalar *v,*aa = a->a, *idiag;
152: const PetscScalar *b,*xb;
153: PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
154: PetscErrorCode ierr;
155: PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
156: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
159: if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */
160: its = its*lits;
161: if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
162: if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
163: if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
164: if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
165: if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
167: if (!a->idiagvalid) {MatInvertBlockDiagonal(A,NULL);}
169: if (!m) return(0);
170: diag = a->diag;
171: idiag = a->idiag;
172: k = PetscMax(A->rmap->n,A->cmap->n);
173: if (!a->mult_work) {
174: PetscMalloc1(k+1,&a->mult_work);
175: }
176: if (!a->sor_workt) {
177: PetscMalloc1(k,&a->sor_workt);
178: }
179: if (!a->sor_work) {
180: PetscMalloc1(bs,&a->sor_work);
181: }
182: work = a->mult_work;
183: t = a->sor_workt;
184: w = a->sor_work;
186: VecGetArray(xx,&x);
187: VecGetArrayRead(bb,&b);
189: if (flag & SOR_ZERO_INITIAL_GUESS) {
190: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
191: switch (bs) {
192: case 1:
193: PetscKernel_v_gets_A_times_w_1(x,idiag,b);
194: t[0] = b[0];
195: i2 = 1;
196: idiag += 1;
197: for (i=1; i<m; i++) {
198: v = aa + ai[i];
199: vi = aj + ai[i];
200: nz = diag[i] - ai[i];
201: s[0] = b[i2];
202: for (j=0; j<nz; j++) {
203: xw[0] = x[vi[j]];
204: PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
205: }
206: t[i2] = s[0];
207: PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
208: x[i2] = xw[0];
209: idiag += 1;
210: i2 += 1;
211: }
212: break;
213: case 2:
214: PetscKernel_v_gets_A_times_w_2(x,idiag,b);
215: t[0] = b[0]; t[1] = b[1];
216: i2 = 2;
217: idiag += 4;
218: for (i=1; i<m; i++) {
219: v = aa + 4*ai[i];
220: vi = aj + ai[i];
221: nz = diag[i] - ai[i];
222: s[0] = b[i2]; s[1] = b[i2+1];
223: for (j=0; j<nz; j++) {
224: idx = 2*vi[j];
225: it = 4*j;
226: xw[0] = x[idx]; xw[1] = x[1+idx];
227: PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
228: }
229: t[i2] = s[0]; t[i2+1] = s[1];
230: PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
231: x[i2] = xw[0]; x[i2+1] = xw[1];
232: idiag += 4;
233: i2 += 2;
234: }
235: break;
236: case 3:
237: PetscKernel_v_gets_A_times_w_3(x,idiag,b);
238: t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
239: i2 = 3;
240: idiag += 9;
241: for (i=1; i<m; i++) {
242: v = aa + 9*ai[i];
243: vi = aj + ai[i];
244: nz = diag[i] - ai[i];
245: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
246: while (nz--) {
247: idx = 3*(*vi++);
248: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
249: PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
250: v += 9;
251: }
252: t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
253: PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
254: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
255: idiag += 9;
256: i2 += 3;
257: }
258: break;
259: case 4:
260: PetscKernel_v_gets_A_times_w_4(x,idiag,b);
261: t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
262: i2 = 4;
263: idiag += 16;
264: for (i=1; i<m; i++) {
265: v = aa + 16*ai[i];
266: vi = aj + ai[i];
267: nz = diag[i] - ai[i];
268: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
269: while (nz--) {
270: idx = 4*(*vi++);
271: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
272: PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
273: v += 16;
274: }
275: t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
276: PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
277: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
278: idiag += 16;
279: i2 += 4;
280: }
281: break;
282: case 5:
283: PetscKernel_v_gets_A_times_w_5(x,idiag,b);
284: t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
285: i2 = 5;
286: idiag += 25;
287: for (i=1; i<m; i++) {
288: v = aa + 25*ai[i];
289: vi = aj + ai[i];
290: nz = diag[i] - ai[i];
291: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
292: while (nz--) {
293: idx = 5*(*vi++);
294: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
295: PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
296: v += 25;
297: }
298: t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
299: PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
300: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
301: idiag += 25;
302: i2 += 5;
303: }
304: break;
305: case 6:
306: PetscKernel_v_gets_A_times_w_6(x,idiag,b);
307: t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
308: i2 = 6;
309: idiag += 36;
310: for (i=1; i<m; i++) {
311: v = aa + 36*ai[i];
312: vi = aj + ai[i];
313: nz = diag[i] - ai[i];
314: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
315: while (nz--) {
316: idx = 6*(*vi++);
317: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
318: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
319: PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
320: v += 36;
321: }
322: t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
323: t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
324: PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
325: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
326: idiag += 36;
327: i2 += 6;
328: }
329: break;
330: case 7:
331: PetscKernel_v_gets_A_times_w_7(x,idiag,b);
332: t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
333: t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
334: i2 = 7;
335: idiag += 49;
336: for (i=1; i<m; i++) {
337: v = aa + 49*ai[i];
338: vi = aj + ai[i];
339: nz = diag[i] - ai[i];
340: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
341: s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
342: while (nz--) {
343: idx = 7*(*vi++);
344: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
345: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
346: PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
347: v += 49;
348: }
349: t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
350: t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
351: PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
352: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
353: x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
354: idiag += 49;
355: i2 += 7;
356: }
357: break;
358: default:
359: PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
360: PetscMemcpy(t,b,bs*sizeof(PetscScalar));
361: i2 = bs;
362: idiag += bs2;
363: for (i=1; i<m; i++) {
364: v = aa + bs2*ai[i];
365: vi = aj + ai[i];
366: nz = diag[i] - ai[i];
368: PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));
369: /* copy all rows of x that are needed into contiguous space */
370: workt = work;
371: for (j=0; j<nz; j++) {
372: PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
373: workt += bs;
374: }
375: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
376: PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));
377: PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
379: idiag += bs2;
380: i2 += bs;
381: }
382: break;
383: }
384: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
385: PetscLogFlops(1.0*bs2*a->nz);
386: xb = t;
387: }
388: else xb = b;
389: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
390: idiag = a->idiag+bs2*(a->mbs-1);
391: i2 = bs * (m-1);
392: switch (bs) {
393: case 1:
394: s[0] = xb[i2];
395: PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
396: x[i2] = xw[0];
397: i2 -= 1;
398: for (i=m-2; i>=0; i--) {
399: v = aa + (diag[i]+1);
400: vi = aj + diag[i] + 1;
401: nz = ai[i+1] - diag[i] - 1;
402: s[0] = xb[i2];
403: for (j=0; j<nz; j++) {
404: xw[0] = x[vi[j]];
405: PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
406: }
407: PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
408: x[i2] = xw[0];
409: idiag -= 1;
410: i2 -= 1;
411: }
412: break;
413: case 2:
414: s[0] = xb[i2]; s[1] = xb[i2+1];
415: PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
416: x[i2] = xw[0]; x[i2+1] = xw[1];
417: i2 -= 2;
418: idiag -= 4;
419: for (i=m-2; i>=0; i--) {
420: v = aa + 4*(diag[i] + 1);
421: vi = aj + diag[i] + 1;
422: nz = ai[i+1] - diag[i] - 1;
423: s[0] = xb[i2]; s[1] = xb[i2+1];
424: for (j=0; j<nz; j++) {
425: idx = 2*vi[j];
426: it = 4*j;
427: xw[0] = x[idx]; xw[1] = x[1+idx];
428: PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
429: }
430: PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
431: x[i2] = xw[0]; x[i2+1] = xw[1];
432: idiag -= 4;
433: i2 -= 2;
434: }
435: break;
436: case 3:
437: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
438: PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
439: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
440: i2 -= 3;
441: idiag -= 9;
442: for (i=m-2; i>=0; i--) {
443: v = aa + 9*(diag[i]+1);
444: vi = aj + diag[i] + 1;
445: nz = ai[i+1] - diag[i] - 1;
446: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
447: while (nz--) {
448: idx = 3*(*vi++);
449: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
450: PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
451: v += 9;
452: }
453: PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
454: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
455: idiag -= 9;
456: i2 -= 3;
457: }
458: break;
459: case 4:
460: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
461: PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
462: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
463: i2 -= 4;
464: idiag -= 16;
465: for (i=m-2; i>=0; i--) {
466: v = aa + 16*(diag[i]+1);
467: vi = aj + diag[i] + 1;
468: nz = ai[i+1] - diag[i] - 1;
469: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
470: while (nz--) {
471: idx = 4*(*vi++);
472: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
473: PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
474: v += 16;
475: }
476: PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
477: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
478: idiag -= 16;
479: i2 -= 4;
480: }
481: break;
482: case 5:
483: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
484: PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
485: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
486: i2 -= 5;
487: idiag -= 25;
488: for (i=m-2; i>=0; i--) {
489: v = aa + 25*(diag[i]+1);
490: vi = aj + diag[i] + 1;
491: nz = ai[i+1] - diag[i] - 1;
492: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
493: while (nz--) {
494: idx = 5*(*vi++);
495: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
496: PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
497: v += 25;
498: }
499: PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
500: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
501: idiag -= 25;
502: i2 -= 5;
503: }
504: break;
505: case 6:
506: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
507: PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
508: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
509: i2 -= 6;
510: idiag -= 36;
511: for (i=m-2; i>=0; i--) {
512: v = aa + 36*(diag[i]+1);
513: vi = aj + diag[i] + 1;
514: nz = ai[i+1] - diag[i] - 1;
515: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
516: while (nz--) {
517: idx = 6*(*vi++);
518: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
519: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
520: PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
521: v += 36;
522: }
523: PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
524: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
525: idiag -= 36;
526: i2 -= 6;
527: }
528: break;
529: case 7:
530: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
531: s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
532: PetscKernel_v_gets_A_times_w_7(x,idiag,b);
533: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
534: x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
535: i2 -= 7;
536: idiag -= 49;
537: for (i=m-2; i>=0; i--) {
538: v = aa + 49*(diag[i]+1);
539: vi = aj + diag[i] + 1;
540: nz = ai[i+1] - diag[i] - 1;
541: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
542: s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
543: while (nz--) {
544: idx = 7*(*vi++);
545: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
546: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
547: PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
548: v += 49;
549: }
550: PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
551: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
552: x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
553: idiag -= 49;
554: i2 -= 7;
555: }
556: break;
557: default:
558: PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));
559: PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
560: i2 -= bs;
561: idiag -= bs2;
562: for (i=m-2; i>=0; i--) {
563: v = aa + bs2*(diag[i]+1);
564: vi = aj + diag[i] + 1;
565: nz = ai[i+1] - diag[i] - 1;
567: PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));
568: /* copy all rows of x that are needed into contiguous space */
569: workt = work;
570: for (j=0; j<nz; j++) {
571: PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
572: workt += bs;
573: }
574: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
575: PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
577: idiag -= bs2;
578: i2 -= bs;
579: }
580: break;
581: }
582: PetscLogFlops(1.0*bs2*(a->nz));
583: }
584: its--;
585: }
586: while (its--) {
587: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
588: idiag = a->idiag;
589: i2 = 0;
590: switch (bs) {
591: case 1:
592: for (i=0; i<m; i++) {
593: v = aa + ai[i];
594: vi = aj + ai[i];
595: nz = ai[i+1] - ai[i];
596: s[0] = b[i2];
597: for (j=0; j<nz; j++) {
598: xw[0] = x[vi[j]];
599: PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
600: }
601: PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
602: x[i2] += xw[0];
603: idiag += 1;
604: i2 += 1;
605: }
606: break;
607: case 2:
608: for (i=0; i<m; i++) {
609: v = aa + 4*ai[i];
610: vi = aj + ai[i];
611: nz = ai[i+1] - ai[i];
612: s[0] = b[i2]; s[1] = b[i2+1];
613: for (j=0; j<nz; j++) {
614: idx = 2*vi[j];
615: it = 4*j;
616: xw[0] = x[idx]; xw[1] = x[1+idx];
617: PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
618: }
619: PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
620: x[i2] += xw[0]; x[i2+1] += xw[1];
621: idiag += 4;
622: i2 += 2;
623: }
624: break;
625: case 3:
626: for (i=0; i<m; i++) {
627: v = aa + 9*ai[i];
628: vi = aj + ai[i];
629: nz = ai[i+1] - ai[i];
630: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
631: while (nz--) {
632: idx = 3*(*vi++);
633: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
634: PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
635: v += 9;
636: }
637: PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
638: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
639: idiag += 9;
640: i2 += 3;
641: }
642: break;
643: case 4:
644: for (i=0; i<m; i++) {
645: v = aa + 16*ai[i];
646: vi = aj + ai[i];
647: nz = ai[i+1] - ai[i];
648: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
649: while (nz--) {
650: idx = 4*(*vi++);
651: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
652: PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
653: v += 16;
654: }
655: PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
656: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
657: idiag += 16;
658: i2 += 4;
659: }
660: break;
661: case 5:
662: for (i=0; i<m; i++) {
663: v = aa + 25*ai[i];
664: vi = aj + ai[i];
665: nz = ai[i+1] - ai[i];
666: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
667: while (nz--) {
668: idx = 5*(*vi++);
669: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
670: PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
671: v += 25;
672: }
673: PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
674: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
675: idiag += 25;
676: i2 += 5;
677: }
678: break;
679: case 6:
680: for (i=0; i<m; i++) {
681: v = aa + 36*ai[i];
682: vi = aj + ai[i];
683: nz = ai[i+1] - ai[i];
684: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
685: while (nz--) {
686: idx = 6*(*vi++);
687: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
688: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
689: PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
690: v += 36;
691: }
692: PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
693: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
694: x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
695: idiag += 36;
696: i2 += 6;
697: }
698: break;
699: case 7:
700: for (i=0; i<m; i++) {
701: v = aa + 49*ai[i];
702: vi = aj + ai[i];
703: nz = ai[i+1] - ai[i];
704: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
705: s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
706: while (nz--) {
707: idx = 7*(*vi++);
708: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
709: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
710: PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
711: v += 49;
712: }
713: PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
714: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
715: x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
716: idiag += 49;
717: i2 += 7;
718: }
719: break;
720: default:
721: for (i=0; i<m; i++) {
722: v = aa + bs2*ai[i];
723: vi = aj + ai[i];
724: nz = ai[i+1] - ai[i];
726: PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));
727: /* copy all rows of x that are needed into contiguous space */
728: workt = work;
729: for (j=0; j<nz; j++) {
730: PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
731: workt += bs;
732: }
733: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
734: PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
736: idiag += bs2;
737: i2 += bs;
738: }
739: break;
740: }
741: PetscLogFlops(2.0*bs2*a->nz);
742: }
743: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
744: idiag = a->idiag+bs2*(a->mbs-1);
745: i2 = bs * (m-1);
746: switch (bs) {
747: case 1:
748: for (i=m-1; i>=0; i--) {
749: v = aa + ai[i];
750: vi = aj + ai[i];
751: nz = ai[i+1] - ai[i];
752: s[0] = b[i2];
753: for (j=0; j<nz; j++) {
754: xw[0] = x[vi[j]];
755: PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
756: }
757: PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
758: x[i2] += xw[0];
759: idiag -= 1;
760: i2 -= 1;
761: }
762: break;
763: case 2:
764: for (i=m-1; i>=0; i--) {
765: v = aa + 4*ai[i];
766: vi = aj + ai[i];
767: nz = ai[i+1] - ai[i];
768: s[0] = b[i2]; s[1] = b[i2+1];
769: for (j=0; j<nz; j++) {
770: idx = 2*vi[j];
771: it = 4*j;
772: xw[0] = x[idx]; xw[1] = x[1+idx];
773: PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
774: }
775: PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
776: x[i2] += xw[0]; x[i2+1] += xw[1];
777: idiag -= 4;
778: i2 -= 2;
779: }
780: break;
781: case 3:
782: for (i=m-1; i>=0; i--) {
783: v = aa + 9*ai[i];
784: vi = aj + ai[i];
785: nz = ai[i+1] - ai[i];
786: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
787: while (nz--) {
788: idx = 3*(*vi++);
789: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
790: PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
791: v += 9;
792: }
793: PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
794: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
795: idiag -= 9;
796: i2 -= 3;
797: }
798: break;
799: case 4:
800: for (i=m-1; i>=0; i--) {
801: v = aa + 16*ai[i];
802: vi = aj + ai[i];
803: nz = ai[i+1] - ai[i];
804: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
805: while (nz--) {
806: idx = 4*(*vi++);
807: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
808: PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
809: v += 16;
810: }
811: PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
812: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
813: idiag -= 16;
814: i2 -= 4;
815: }
816: break;
817: case 5:
818: for (i=m-1; i>=0; i--) {
819: v = aa + 25*ai[i];
820: vi = aj + ai[i];
821: nz = ai[i+1] - ai[i];
822: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
823: while (nz--) {
824: idx = 5*(*vi++);
825: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
826: PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
827: v += 25;
828: }
829: PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
830: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
831: idiag -= 25;
832: i2 -= 5;
833: }
834: break;
835: case 6:
836: for (i=m-1; i>=0; i--) {
837: v = aa + 36*ai[i];
838: vi = aj + ai[i];
839: nz = ai[i+1] - ai[i];
840: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
841: while (nz--) {
842: idx = 6*(*vi++);
843: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
844: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
845: PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
846: v += 36;
847: }
848: PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
849: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
850: x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
851: idiag -= 36;
852: i2 -= 6;
853: }
854: break;
855: case 7:
856: for (i=m-1; i>=0; i--) {
857: v = aa + 49*ai[i];
858: vi = aj + ai[i];
859: nz = ai[i+1] - ai[i];
860: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
861: s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
862: while (nz--) {
863: idx = 7*(*vi++);
864: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
865: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
866: PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
867: v += 49;
868: }
869: PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
870: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
871: x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
872: idiag -= 49;
873: i2 -= 7;
874: }
875: break;
876: default:
877: for (i=m-1; i>=0; i--) {
878: v = aa + bs2*ai[i];
879: vi = aj + ai[i];
880: nz = ai[i+1] - ai[i];
882: PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));
883: /* copy all rows of x that are needed into contiguous space */
884: workt = work;
885: for (j=0; j<nz; j++) {
886: PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
887: workt += bs;
888: }
889: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
890: PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
892: idiag -= bs2;
893: i2 -= bs;
894: }
895: break;
896: }
897: PetscLogFlops(2.0*bs2*(a->nz));
898: }
899: }
900: VecRestoreArray(xx,&x);
901: VecRestoreArrayRead(bb,&b);
902: return(0);
903: }
906: /*
907: Special version for direct calls from Fortran (Used in PETSc-fun3d)
908: */
909: #if defined(PETSC_HAVE_FORTRAN_CAPS)
910: #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
911: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
912: #define matsetvaluesblocked4_ matsetvaluesblocked4
913: #endif
917: PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
918: {
919: Mat A = *AA;
920: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
921: PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
922: PetscInt *ai =a->i,*ailen=a->ilen;
923: PetscInt *aj =a->j,stepval,lastcol = -1;
924: const PetscScalar *value = v;
925: MatScalar *ap,*aa = a->a,*bap;
928: if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
929: stepval = (n-1)*4;
930: for (k=0; k<m; k++) { /* loop over added rows */
931: row = im[k];
932: rp = aj + ai[row];
933: ap = aa + 16*ai[row];
934: nrow = ailen[row];
935: low = 0;
936: high = nrow;
937: for (l=0; l<n; l++) { /* loop over added columns */
938: col = in[l];
939: if (col <= lastcol) low = 0;
940: else high = nrow;
941: lastcol = col;
942: value = v + k*(stepval+4 + l)*4;
943: while (high-low > 7) {
944: t = (low+high)/2;
945: if (rp[t] > col) high = t;
946: else low = t;
947: }
948: for (i=low; i<high; i++) {
949: if (rp[i] > col) break;
950: if (rp[i] == col) {
951: bap = ap + 16*i;
952: for (ii=0; ii<4; ii++,value+=stepval) {
953: for (jj=ii; jj<16; jj+=4) {
954: bap[jj] += *value++;
955: }
956: }
957: goto noinsert2;
958: }
959: }
960: N = nrow++ - 1;
961: high++; /* added new column index thus must search to one higher than before */
962: /* shift up all the later entries in this row */
963: for (ii=N; ii>=i; ii--) {
964: rp[ii+1] = rp[ii];
965: PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
966: }
967: if (N >= i) {
968: PetscMemzero(ap+16*i,16*sizeof(MatScalar));
969: }
970: rp[i] = col;
971: bap = ap + 16*i;
972: for (ii=0; ii<4; ii++,value+=stepval) {
973: for (jj=ii; jj<16; jj+=4) {
974: bap[jj] = *value++;
975: }
976: }
977: noinsert2:;
978: low = i;
979: }
980: ailen[row] = nrow;
981: }
982: PetscFunctionReturnVoid();
983: }
985: #if defined(PETSC_HAVE_FORTRAN_CAPS)
986: #define matsetvalues4_ MATSETVALUES4
987: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
988: #define matsetvalues4_ matsetvalues4
989: #endif
993: PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
994: {
995: Mat A = *AA;
996: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
997: PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
998: PetscInt *ai=a->i,*ailen=a->ilen;
999: PetscInt *aj=a->j,brow,bcol;
1000: PetscInt ridx,cidx,lastcol = -1;
1001: MatScalar *ap,value,*aa=a->a,*bap;
1004: for (k=0; k<m; k++) { /* loop over added rows */
1005: row = im[k]; brow = row/4;
1006: rp = aj + ai[brow];
1007: ap = aa + 16*ai[brow];
1008: nrow = ailen[brow];
1009: low = 0;
1010: high = nrow;
1011: for (l=0; l<n; l++) { /* loop over added columns */
1012: col = in[l]; bcol = col/4;
1013: ridx = row % 4; cidx = col % 4;
1014: value = v[l + k*n];
1015: if (col <= lastcol) low = 0;
1016: else high = nrow;
1017: lastcol = col;
1018: while (high-low > 7) {
1019: t = (low+high)/2;
1020: if (rp[t] > bcol) high = t;
1021: else low = t;
1022: }
1023: for (i=low; i<high; i++) {
1024: if (rp[i] > bcol) break;
1025: if (rp[i] == bcol) {
1026: bap = ap + 16*i + 4*cidx + ridx;
1027: *bap += value;
1028: goto noinsert1;
1029: }
1030: }
1031: N = nrow++ - 1;
1032: high++; /* added new column thus must search to one higher than before */
1033: /* shift up all the later entries in this row */
1034: for (ii=N; ii>=i; ii--) {
1035: rp[ii+1] = rp[ii];
1036: PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1037: }
1038: if (N>=i) {
1039: PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1040: }
1041: rp[i] = bcol;
1042: ap[16*i + 4*cidx + ridx] = value;
1043: noinsert1:;
1044: low = i;
1045: }
1046: ailen[brow] = nrow;
1047: }
1048: PetscFunctionReturnVoid();
1049: }
1051: /*
1052: Checks for missing diagonals
1053: */
1056: PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d)
1057: {
1058: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1060: PetscInt *diag,*ii = a->i,i;
1063: MatMarkDiagonal_SeqBAIJ(A);
1064: *missing = PETSC_FALSE;
1065: if (A->rmap->n > 0 && !ii) {
1066: *missing = PETSC_TRUE;
1067: if (d) *d = 0;
1068: PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");
1069: } else {
1070: diag = a->diag;
1071: for (i=0; i<a->mbs; i++) {
1072: if (diag[i] >= ii[i+1]) {
1073: *missing = PETSC_TRUE;
1074: if (d) *d = i;
1075: PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);
1076: break;
1077: }
1078: }
1079: }
1080: return(0);
1081: }
1085: PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1086: {
1087: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1089: PetscInt i,j,m = a->mbs;
1092: if (!a->diag) {
1093: PetscMalloc1(m,&a->diag);
1094: PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));
1095: a->free_diag = PETSC_TRUE;
1096: }
1097: for (i=0; i<m; i++) {
1098: a->diag[i] = a->i[i+1];
1099: for (j=a->i[i]; j<a->i[i+1]; j++) {
1100: if (a->j[j] == i) {
1101: a->diag[i] = j;
1102: break;
1103: }
1104: }
1105: }
1106: return(0);
1107: }
1112: static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done)
1113: {
1114: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1116: PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1117: PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1120: *nn = n;
1121: if (!ia) return(0);
1122: if (symmetric) {
1123: MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);
1124: nz = tia[n];
1125: } else {
1126: tia = a->i; tja = a->j;
1127: }
1129: if (!blockcompressed && bs > 1) {
1130: (*nn) *= bs;
1131: /* malloc & create the natural set of indices */
1132: PetscMalloc1((n+1)*bs,ia);
1133: if (n) {
1134: (*ia)[0] = oshift;
1135: for (j=1; j<bs; j++) {
1136: (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1137: }
1138: }
1140: for (i=1; i<n; i++) {
1141: (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1142: for (j=1; j<bs; j++) {
1143: (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1144: }
1145: }
1146: if (n) {
1147: (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1148: }
1150: if (inja) {
1151: PetscMalloc1(nz*bs*bs,ja);
1152: cnt = 0;
1153: for (i=0; i<n; i++) {
1154: for (j=0; j<bs; j++) {
1155: for (k=tia[i]; k<tia[i+1]; k++) {
1156: for (l=0; l<bs; l++) {
1157: (*ja)[cnt++] = bs*tja[k] + l;
1158: }
1159: }
1160: }
1161: }
1162: }
1164: if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1165: PetscFree(tia);
1166: PetscFree(tja);
1167: }
1168: } else if (oshift == 1) {
1169: if (symmetric) {
1170: nz = tia[A->rmap->n/bs];
1171: /* add 1 to i and j indices */
1172: for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1173: *ia = tia;
1174: if (ja) {
1175: for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1176: *ja = tja;
1177: }
1178: } else {
1179: nz = a->i[A->rmap->n/bs];
1180: /* malloc space and add 1 to i and j indices */
1181: PetscMalloc1(A->rmap->n/bs+1,ia);
1182: for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1183: if (ja) {
1184: PetscMalloc1(nz,ja);
1185: for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1186: }
1187: }
1188: } else {
1189: *ia = tia;
1190: if (ja) *ja = tja;
1191: }
1192: return(0);
1193: }
1197: static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done)
1198: {
1202: if (!ia) return(0);
1203: if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1204: PetscFree(*ia);
1205: if (ja) {PetscFree(*ja);}
1206: }
1207: return(0);
1208: }
1212: PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1213: {
1214: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1218: #if defined(PETSC_USE_LOG)
1219: PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1220: #endif
1221: MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);
1222: ISDestroy(&a->row);
1223: ISDestroy(&a->col);
1224: if (a->free_diag) {PetscFree(a->diag);}
1225: PetscFree(a->idiag);
1226: if (a->free_imax_ilen) {PetscFree2(a->imax,a->ilen);}
1227: PetscFree(a->solve_work);
1228: PetscFree(a->mult_work);
1229: PetscFree(a->sor_workt);
1230: PetscFree(a->sor_work);
1231: ISDestroy(&a->icol);
1232: PetscFree(a->saved_values);
1233: PetscFree2(a->compressedrow.i,a->compressedrow.rindex);
1235: MatDestroy(&a->sbaijMat);
1236: MatDestroy(&a->parent);
1237: PetscFree(A->data);
1239: PetscObjectChangeTypeName((PetscObject)A,0);
1240: PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);
1241: PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);
1242: PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);
1243: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);
1244: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);
1245: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);
1246: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);
1247: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);
1248: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);
1249: PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);
1250: return(0);
1251: }
1255: PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1256: {
1257: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1261: switch (op) {
1262: case MAT_ROW_ORIENTED:
1263: a->roworiented = flg;
1264: break;
1265: case MAT_KEEP_NONZERO_PATTERN:
1266: a->keepnonzeropattern = flg;
1267: break;
1268: case MAT_NEW_NONZERO_LOCATIONS:
1269: a->nonew = (flg ? 0 : 1);
1270: break;
1271: case MAT_NEW_NONZERO_LOCATION_ERR:
1272: a->nonew = (flg ? -1 : 0);
1273: break;
1274: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1275: a->nonew = (flg ? -2 : 0);
1276: break;
1277: case MAT_UNUSED_NONZERO_LOCATION_ERR:
1278: a->nounused = (flg ? -1 : 0);
1279: break;
1280: case MAT_NEW_DIAGONALS:
1281: case MAT_IGNORE_OFF_PROC_ENTRIES:
1282: case MAT_USE_HASH_TABLE:
1283: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1284: break;
1285: case MAT_SPD:
1286: case MAT_SYMMETRIC:
1287: case MAT_STRUCTURALLY_SYMMETRIC:
1288: case MAT_HERMITIAN:
1289: case MAT_SYMMETRY_ETERNAL:
1290: /* These options are handled directly by MatSetOption() */
1291: break;
1292: default:
1293: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1294: }
1295: return(0);
1296: }
1298: /* used for both SeqBAIJ and SeqSBAIJ matrices */
1301: PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1302: {
1304: PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1305: MatScalar *aa_i;
1306: PetscScalar *v_i;
1309: bs = A->rmap->bs;
1310: bs2 = bs*bs;
1311: if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1313: bn = row/bs; /* Block number */
1314: bp = row % bs; /* Block Position */
1315: M = ai[bn+1] - ai[bn];
1316: *nz = bs*M;
1318: if (v) {
1319: *v = 0;
1320: if (*nz) {
1321: PetscMalloc1(*nz,v);
1322: for (i=0; i<M; i++) { /* for each block in the block row */
1323: v_i = *v + i*bs;
1324: aa_i = aa + bs2*(ai[bn] + i);
1325: for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1326: }
1327: }
1328: }
1330: if (idx) {
1331: *idx = 0;
1332: if (*nz) {
1333: PetscMalloc1(*nz,idx);
1334: for (i=0; i<M; i++) { /* for each block in the block row */
1335: idx_i = *idx + i*bs;
1336: itmp = bs*aj[ai[bn] + i];
1337: for (j=0; j<bs; j++) idx_i[j] = itmp++;
1338: }
1339: }
1340: }
1341: return(0);
1342: }
1346: PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1347: {
1348: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1352: MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);
1353: return(0);
1354: }
1358: PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1359: {
1363: if (idx) {PetscFree(*idx);}
1364: if (v) {PetscFree(*v);}
1365: return(0);
1366: }
1368: extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1372: PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1373: {
1374: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
1375: Mat C;
1377: PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1378: PetscInt *rows,*cols,bs2=a->bs2;
1379: MatScalar *array;
1382: if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1383: if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1384: PetscCalloc1(1+nbs,&col);
1386: for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1387: MatCreate(PetscObjectComm((PetscObject)A),&C);
1388: MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);
1389: MatSetType(C,((PetscObject)A)->type_name);
1390: MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);
1391: PetscFree(col);
1392: } else {
1393: C = *B;
1394: }
1396: array = a->a;
1397: PetscMalloc2(bs,&rows,bs,&cols);
1398: for (i=0; i<mbs; i++) {
1399: cols[0] = i*bs;
1400: for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1401: len = ai[i+1] - ai[i];
1402: for (j=0; j<len; j++) {
1403: rows[0] = (*aj++)*bs;
1404: for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1405: MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);
1406: array += bs2;
1407: }
1408: }
1409: PetscFree2(rows,cols);
1411: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1412: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1414: if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1415: *B = C;
1416: } else {
1417: MatHeaderMerge(A,&C);
1418: }
1419: return(0);
1420: }
1424: PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f)
1425: {
1427: Mat Btrans;
1430: *f = PETSC_FALSE;
1431: MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);
1432: MatEqual_SeqBAIJ(B,Btrans,f);
1433: MatDestroy(&Btrans);
1434: return(0);
1435: }
1439: static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1440: {
1441: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1443: PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1444: int fd;
1445: PetscScalar *aa;
1446: FILE *file;
1449: PetscViewerBinaryGetDescriptor(viewer,&fd);
1450: PetscMalloc1(4+A->rmap->N,&col_lens);
1451: col_lens[0] = MAT_FILE_CLASSID;
1453: col_lens[1] = A->rmap->N;
1454: col_lens[2] = A->cmap->n;
1455: col_lens[3] = a->nz*bs2;
1457: /* store lengths of each row and write (including header) to file */
1458: count = 0;
1459: for (i=0; i<a->mbs; i++) {
1460: for (j=0; j<bs; j++) {
1461: col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1462: }
1463: }
1464: PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);
1465: PetscFree(col_lens);
1467: /* store column indices (zero start index) */
1468: PetscMalloc1((a->nz+1)*bs2,&jj);
1469: count = 0;
1470: for (i=0; i<a->mbs; i++) {
1471: for (j=0; j<bs; j++) {
1472: for (k=a->i[i]; k<a->i[i+1]; k++) {
1473: for (l=0; l<bs; l++) {
1474: jj[count++] = bs*a->j[k] + l;
1475: }
1476: }
1477: }
1478: }
1479: PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);
1480: PetscFree(jj);
1482: /* store nonzero values */
1483: PetscMalloc1((a->nz+1)*bs2,&aa);
1484: count = 0;
1485: for (i=0; i<a->mbs; i++) {
1486: for (j=0; j<bs; j++) {
1487: for (k=a->i[i]; k<a->i[i+1]; k++) {
1488: for (l=0; l<bs; l++) {
1489: aa[count++] = a->a[bs2*k + l*bs + j];
1490: }
1491: }
1492: }
1493: }
1494: PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);
1495: PetscFree(aa);
1497: PetscViewerBinaryGetInfoPointer(viewer,&file);
1498: if (file) {
1499: fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1500: }
1501: return(0);
1502: }
1506: static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1507: {
1508: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1509: PetscErrorCode ierr;
1510: PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1511: PetscViewerFormat format;
1514: PetscViewerGetFormat(viewer,&format);
1515: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1516: PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);
1517: } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1518: const char *matname;
1519: Mat aij;
1520: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);
1521: PetscObjectGetName((PetscObject)A,&matname);
1522: PetscObjectSetName((PetscObject)aij,matname);
1523: MatView(aij,viewer);
1524: MatDestroy(&aij);
1525: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1526: return(0);
1527: } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1528: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1529: for (i=0; i<a->mbs; i++) {
1530: for (j=0; j<bs; j++) {
1531: PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1532: for (k=a->i[i]; k<a->i[i+1]; k++) {
1533: for (l=0; l<bs; l++) {
1534: #if defined(PETSC_USE_COMPLEX)
1535: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1536: PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1537: (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1538: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1539: PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1540: (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1541: } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1542: PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));
1543: }
1544: #else
1545: if (a->a[bs2*k + l*bs + j] != 0.0) {
1546: PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);
1547: }
1548: #endif
1549: }
1550: }
1551: PetscViewerASCIIPrintf(viewer,"\n");
1552: }
1553: }
1554: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1555: } else {
1556: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1557: for (i=0; i<a->mbs; i++) {
1558: for (j=0; j<bs; j++) {
1559: PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1560: for (k=a->i[i]; k<a->i[i+1]; k++) {
1561: for (l=0; l<bs; l++) {
1562: #if defined(PETSC_USE_COMPLEX)
1563: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1564: PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1565: (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1566: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1567: PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1568: (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1569: } else {
1570: PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));
1571: }
1572: #else
1573: PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);
1574: #endif
1575: }
1576: }
1577: PetscViewerASCIIPrintf(viewer,"\n");
1578: }
1579: }
1580: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1581: }
1582: PetscViewerFlush(viewer);
1583: return(0);
1584: }
1586: #include <petscdraw.h>
1589: static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1590: {
1591: Mat A = (Mat) Aa;
1592: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
1593: PetscErrorCode ierr;
1594: PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1595: PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1596: MatScalar *aa;
1597: PetscViewer viewer;
1598: PetscViewerFormat format;
1601: PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);
1602: PetscViewerGetFormat(viewer,&format);
1603: PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);
1605: /* loop over matrix elements drawing boxes */
1607: if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1608: PetscDrawCollectiveBegin(draw);
1609: /* Blue for negative, Cyan for zero and Red for positive */
1610: color = PETSC_DRAW_BLUE;
1611: for (i=0,row=0; i<mbs; i++,row+=bs) {
1612: for (j=a->i[i]; j<a->i[i+1]; j++) {
1613: y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1614: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1615: aa = a->a + j*bs2;
1616: for (k=0; k<bs; k++) {
1617: for (l=0; l<bs; l++) {
1618: if (PetscRealPart(*aa++) >= 0.) continue;
1619: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1620: }
1621: }
1622: }
1623: }
1624: color = PETSC_DRAW_CYAN;
1625: for (i=0,row=0; i<mbs; i++,row+=bs) {
1626: for (j=a->i[i]; j<a->i[i+1]; j++) {
1627: y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1628: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1629: aa = a->a + j*bs2;
1630: for (k=0; k<bs; k++) {
1631: for (l=0; l<bs; l++) {
1632: if (PetscRealPart(*aa++) != 0.) continue;
1633: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1634: }
1635: }
1636: }
1637: }
1638: color = PETSC_DRAW_RED;
1639: for (i=0,row=0; i<mbs; i++,row+=bs) {
1640: for (j=a->i[i]; j<a->i[i+1]; j++) {
1641: y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1642: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1643: aa = a->a + j*bs2;
1644: for (k=0; k<bs; k++) {
1645: for (l=0; l<bs; l++) {
1646: if (PetscRealPart(*aa++) <= 0.) continue;
1647: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1648: }
1649: }
1650: }
1651: }
1652: PetscDrawCollectiveEnd(draw);
1653: } else {
1654: /* use contour shading to indicate magnitude of values */
1655: /* first determine max of all nonzero values */
1656: PetscReal minv = 0.0, maxv = 0.0;
1657: PetscDraw popup;
1659: for (i=0; i<a->nz*a->bs2; i++) {
1660: if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1661: }
1662: if (minv >= maxv) maxv = minv + PETSC_SMALL;
1663: PetscDrawGetPopup(draw,&popup);
1664: PetscDrawScalePopup(popup,0.0,maxv);
1666: PetscDrawCollectiveBegin(draw);
1667: for (i=0,row=0; i<mbs; i++,row+=bs) {
1668: for (j=a->i[i]; j<a->i[i+1]; j++) {
1669: y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1670: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1671: aa = a->a + j*bs2;
1672: for (k=0; k<bs; k++) {
1673: for (l=0; l<bs; l++) {
1674: MatScalar v = *aa++;
1675: color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1676: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1677: }
1678: }
1679: }
1680: }
1681: PetscDrawCollectiveEnd(draw);
1682: }
1683: return(0);
1684: }
1688: static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1689: {
1691: PetscReal xl,yl,xr,yr,w,h;
1692: PetscDraw draw;
1693: PetscBool isnull;
1696: PetscViewerDrawGetDraw(viewer,0,&draw);
1697: PetscDrawIsNull(draw,&isnull);
1698: if (isnull) return(0);
1700: xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1701: xr += w; yr += h; xl = -w; yl = -h;
1702: PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
1703: PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
1704: PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
1705: PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);
1706: PetscDrawSave(draw);
1707: return(0);
1708: }
1712: PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1713: {
1715: PetscBool iascii,isbinary,isdraw;
1718: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1719: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1720: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1721: if (iascii) {
1722: MatView_SeqBAIJ_ASCII(A,viewer);
1723: } else if (isbinary) {
1724: MatView_SeqBAIJ_Binary(A,viewer);
1725: } else if (isdraw) {
1726: MatView_SeqBAIJ_Draw(A,viewer);
1727: } else {
1728: Mat B;
1729: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
1730: MatView(B,viewer);
1731: MatDestroy(&B);
1732: }
1733: return(0);
1734: }
1739: PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1740: {
1741: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1742: PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1743: PetscInt *ai = a->i,*ailen = a->ilen;
1744: PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1745: MatScalar *ap,*aa = a->a;
1748: for (k=0; k<m; k++) { /* loop over rows */
1749: row = im[k]; brow = row/bs;
1750: if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1751: if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1752: rp = aj + ai[brow]; ap = aa + bs2*ai[brow];
1753: nrow = ailen[brow];
1754: for (l=0; l<n; l++) { /* loop over columns */
1755: if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1756: if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1757: col = in[l];
1758: bcol = col/bs;
1759: cidx = col%bs;
1760: ridx = row%bs;
1761: high = nrow;
1762: low = 0; /* assume unsorted */
1763: while (high-low > 5) {
1764: t = (low+high)/2;
1765: if (rp[t] > bcol) high = t;
1766: else low = t;
1767: }
1768: for (i=low; i<high; i++) {
1769: if (rp[i] > bcol) break;
1770: if (rp[i] == bcol) {
1771: *v++ = ap[bs2*i+bs*cidx+ridx];
1772: goto finished;
1773: }
1774: }
1775: *v++ = 0.0;
1776: finished:;
1777: }
1778: }
1779: return(0);
1780: }
1784: PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1785: {
1786: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1787: PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1788: PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1789: PetscErrorCode ierr;
1790: PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1791: PetscBool roworiented=a->roworiented;
1792: const PetscScalar *value = v;
1793: MatScalar *ap,*aa = a->a,*bap;
1796: if (roworiented) {
1797: stepval = (n-1)*bs;
1798: } else {
1799: stepval = (m-1)*bs;
1800: }
1801: for (k=0; k<m; k++) { /* loop over added rows */
1802: row = im[k];
1803: if (row < 0) continue;
1804: #if defined(PETSC_USE_DEBUG)
1805: if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1806: #endif
1807: rp = aj + ai[row];
1808: ap = aa + bs2*ai[row];
1809: rmax = imax[row];
1810: nrow = ailen[row];
1811: low = 0;
1812: high = nrow;
1813: for (l=0; l<n; l++) { /* loop over added columns */
1814: if (in[l] < 0) continue;
1815: #if defined(PETSC_USE_DEBUG)
1816: if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1817: #endif
1818: col = in[l];
1819: if (roworiented) {
1820: value = v + (k*(stepval+bs) + l)*bs;
1821: } else {
1822: value = v + (l*(stepval+bs) + k)*bs;
1823: }
1824: if (col <= lastcol) low = 0;
1825: else high = nrow;
1826: lastcol = col;
1827: while (high-low > 7) {
1828: t = (low+high)/2;
1829: if (rp[t] > col) high = t;
1830: else low = t;
1831: }
1832: for (i=low; i<high; i++) {
1833: if (rp[i] > col) break;
1834: if (rp[i] == col) {
1835: bap = ap + bs2*i;
1836: if (roworiented) {
1837: if (is == ADD_VALUES) {
1838: for (ii=0; ii<bs; ii++,value+=stepval) {
1839: for (jj=ii; jj<bs2; jj+=bs) {
1840: bap[jj] += *value++;
1841: }
1842: }
1843: } else {
1844: for (ii=0; ii<bs; ii++,value+=stepval) {
1845: for (jj=ii; jj<bs2; jj+=bs) {
1846: bap[jj] = *value++;
1847: }
1848: }
1849: }
1850: } else {
1851: if (is == ADD_VALUES) {
1852: for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1853: for (jj=0; jj<bs; jj++) {
1854: bap[jj] += value[jj];
1855: }
1856: bap += bs;
1857: }
1858: } else {
1859: for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1860: for (jj=0; jj<bs; jj++) {
1861: bap[jj] = value[jj];
1862: }
1863: bap += bs;
1864: }
1865: }
1866: }
1867: goto noinsert2;
1868: }
1869: }
1870: if (nonew == 1) goto noinsert2;
1871: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1872: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1873: N = nrow++ - 1; high++;
1874: /* shift up all the later entries in this row */
1875: for (ii=N; ii>=i; ii--) {
1876: rp[ii+1] = rp[ii];
1877: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1878: }
1879: if (N >= i) {
1880: PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1881: }
1882: rp[i] = col;
1883: bap = ap + bs2*i;
1884: if (roworiented) {
1885: for (ii=0; ii<bs; ii++,value+=stepval) {
1886: for (jj=ii; jj<bs2; jj+=bs) {
1887: bap[jj] = *value++;
1888: }
1889: }
1890: } else {
1891: for (ii=0; ii<bs; ii++,value+=stepval) {
1892: for (jj=0; jj<bs; jj++) {
1893: *bap++ = *value++;
1894: }
1895: }
1896: }
1897: noinsert2:;
1898: low = i;
1899: }
1900: ailen[row] = nrow;
1901: }
1902: return(0);
1903: }
1907: PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1908: {
1909: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1910: PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1911: PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen;
1913: PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1914: MatScalar *aa = a->a,*ap;
1915: PetscReal ratio=0.6;
1918: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
1920: if (m) rmax = ailen[0];
1921: for (i=1; i<mbs; i++) {
1922: /* move each row back by the amount of empty slots (fshift) before it*/
1923: fshift += imax[i-1] - ailen[i-1];
1924: rmax = PetscMax(rmax,ailen[i]);
1925: if (fshift) {
1926: ip = aj + ai[i]; ap = aa + bs2*ai[i];
1927: N = ailen[i];
1928: for (j=0; j<N; j++) {
1929: ip[j-fshift] = ip[j];
1931: PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));
1932: }
1933: }
1934: ai[i] = ai[i-1] + ailen[i-1];
1935: }
1936: if (mbs) {
1937: fshift += imax[mbs-1] - ailen[mbs-1];
1938: ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1939: }
1941: /* reset ilen and imax for each row */
1942: a->nonzerorowcnt = 0;
1943: for (i=0; i<mbs; i++) {
1944: ailen[i] = imax[i] = ai[i+1] - ai[i];
1945: a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1946: }
1947: a->nz = ai[mbs];
1949: /* diagonals may have moved, so kill the diagonal pointers */
1950: a->idiagvalid = PETSC_FALSE;
1951: if (fshift && a->diag) {
1952: PetscFree(a->diag);
1953: PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));
1954: a->diag = 0;
1955: }
1956: if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1957: PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);
1958: PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);
1959: PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);
1961: A->info.mallocs += a->reallocs;
1962: a->reallocs = 0;
1963: A->info.nz_unneeded = (PetscReal)fshift*bs2;
1964: a->rmax = rmax;
1966: MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);
1967: return(0);
1968: }
1970: /*
1971: This function returns an array of flags which indicate the locations of contiguous
1972: blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9]
1973: then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1974: Assume: sizes should be long enough to hold all the values.
1975: */
1978: static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1979: {
1980: PetscInt i,j,k,row;
1981: PetscBool flg;
1984: for (i=0,j=0; i<n; j++) {
1985: row = idx[i];
1986: if (row%bs!=0) { /* Not the begining of a block */
1987: sizes[j] = 1;
1988: i++;
1989: } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1990: sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */
1991: i++;
1992: } else { /* Begining of the block, so check if the complete block exists */
1993: flg = PETSC_TRUE;
1994: for (k=1; k<bs; k++) {
1995: if (row+k != idx[i+k]) { /* break in the block */
1996: flg = PETSC_FALSE;
1997: break;
1998: }
1999: }
2000: if (flg) { /* No break in the bs */
2001: sizes[j] = bs;
2002: i += bs;
2003: } else {
2004: sizes[j] = 1;
2005: i++;
2006: }
2007: }
2008: }
2009: *bs_max = j;
2010: return(0);
2011: }
2015: PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2016: {
2017: Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
2018: PetscErrorCode ierr;
2019: PetscInt i,j,k,count,*rows;
2020: PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2021: PetscScalar zero = 0.0;
2022: MatScalar *aa;
2023: const PetscScalar *xx;
2024: PetscScalar *bb;
2027: /* fix right hand side if needed */
2028: if (x && b) {
2029: VecGetArrayRead(x,&xx);
2030: VecGetArray(b,&bb);
2031: for (i=0; i<is_n; i++) {
2032: bb[is_idx[i]] = diag*xx[is_idx[i]];
2033: }
2034: VecRestoreArrayRead(x,&xx);
2035: VecRestoreArray(b,&bb);
2036: }
2038: /* Make a copy of the IS and sort it */
2039: /* allocate memory for rows,sizes */
2040: PetscMalloc2(is_n,&rows,2*is_n,&sizes);
2042: /* copy IS values to rows, and sort them */
2043: for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2044: PetscSortInt(is_n,rows);
2046: if (baij->keepnonzeropattern) {
2047: for (i=0; i<is_n; i++) sizes[i] = 1;
2048: bs_max = is_n;
2049: } else {
2050: MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);
2051: A->nonzerostate++;
2052: }
2054: for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2055: row = rows[j];
2056: if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2057: count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2058: aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2059: if (sizes[i] == bs && !baij->keepnonzeropattern) {
2060: if (diag != (PetscScalar)0.0) {
2061: if (baij->ilen[row/bs] > 0) {
2062: baij->ilen[row/bs] = 1;
2063: baij->j[baij->i[row/bs]] = row/bs;
2065: PetscMemzero(aa,count*bs*sizeof(MatScalar));
2066: }
2067: /* Now insert all the diagonal values for this bs */
2068: for (k=0; k<bs; k++) {
2069: (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);
2070: }
2071: } else { /* (diag == 0.0) */
2072: baij->ilen[row/bs] = 0;
2073: } /* end (diag == 0.0) */
2074: } else { /* (sizes[i] != bs) */
2075: #if defined(PETSC_USE_DEBUG)
2076: if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2077: #endif
2078: for (k=0; k<count; k++) {
2079: aa[0] = zero;
2080: aa += bs;
2081: }
2082: if (diag != (PetscScalar)0.0) {
2083: (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);
2084: }
2085: }
2086: }
2088: PetscFree2(rows,sizes);
2089: MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
2090: return(0);
2091: }
2095: PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2096: {
2097: Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
2098: PetscErrorCode ierr;
2099: PetscInt i,j,k,count;
2100: PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col;
2101: PetscScalar zero = 0.0;
2102: MatScalar *aa;
2103: const PetscScalar *xx;
2104: PetscScalar *bb;
2105: PetscBool *zeroed,vecs = PETSC_FALSE;
2108: /* fix right hand side if needed */
2109: if (x && b) {
2110: VecGetArrayRead(x,&xx);
2111: VecGetArray(b,&bb);
2112: vecs = PETSC_TRUE;
2113: }
2115: /* zero the columns */
2116: PetscCalloc1(A->rmap->n,&zeroed);
2117: for (i=0; i<is_n; i++) {
2118: if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2119: zeroed[is_idx[i]] = PETSC_TRUE;
2120: }
2121: for (i=0; i<A->rmap->N; i++) {
2122: if (!zeroed[i]) {
2123: row = i/bs;
2124: for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2125: for (k=0; k<bs; k++) {
2126: col = bs*baij->j[j] + k;
2127: if (zeroed[col]) {
2128: aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2129: if (vecs) bb[i] -= aa[0]*xx[col];
2130: aa[0] = 0.0;
2131: }
2132: }
2133: }
2134: } else if (vecs) bb[i] = diag*xx[i];
2135: }
2136: PetscFree(zeroed);
2137: if (vecs) {
2138: VecRestoreArrayRead(x,&xx);
2139: VecRestoreArray(b,&bb);
2140: }
2142: /* zero the rows */
2143: for (i=0; i<is_n; i++) {
2144: row = is_idx[i];
2145: count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2146: aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2147: for (k=0; k<count; k++) {
2148: aa[0] = zero;
2149: aa += bs;
2150: }
2151: if (diag != (PetscScalar)0.0) {
2152: (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);
2153: }
2154: }
2155: MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
2156: return(0);
2157: }
2161: PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2162: {
2163: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2164: PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2165: PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2166: PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2168: PetscInt ridx,cidx,bs2=a->bs2;
2169: PetscBool roworiented=a->roworiented;
2170: MatScalar *ap,value,*aa=a->a,*bap;
2173: for (k=0; k<m; k++) { /* loop over added rows */
2174: row = im[k];
2175: brow = row/bs;
2176: if (row < 0) continue;
2177: #if defined(PETSC_USE_DEBUG)
2178: if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2179: #endif
2180: rp = aj + ai[brow];
2181: ap = aa + bs2*ai[brow];
2182: rmax = imax[brow];
2183: nrow = ailen[brow];
2184: low = 0;
2185: high = nrow;
2186: for (l=0; l<n; l++) { /* loop over added columns */
2187: if (in[l] < 0) continue;
2188: #if defined(PETSC_USE_DEBUG)
2189: if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2190: #endif
2191: col = in[l]; bcol = col/bs;
2192: ridx = row % bs; cidx = col % bs;
2193: if (roworiented) {
2194: value = v[l + k*n];
2195: } else {
2196: value = v[k + l*m];
2197: }
2198: if (col <= lastcol) low = 0; else high = nrow;
2199: lastcol = col;
2200: while (high-low > 7) {
2201: t = (low+high)/2;
2202: if (rp[t] > bcol) high = t;
2203: else low = t;
2204: }
2205: for (i=low; i<high; i++) {
2206: if (rp[i] > bcol) break;
2207: if (rp[i] == bcol) {
2208: bap = ap + bs2*i + bs*cidx + ridx;
2209: if (is == ADD_VALUES) *bap += value;
2210: else *bap = value;
2211: goto noinsert1;
2212: }
2213: }
2214: if (nonew == 1) goto noinsert1;
2215: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2216: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2217: N = nrow++ - 1; high++;
2218: /* shift up all the later entries in this row */
2219: for (ii=N; ii>=i; ii--) {
2220: rp[ii+1] = rp[ii];
2221: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
2222: }
2223: if (N>=i) {
2224: PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
2225: }
2226: rp[i] = bcol;
2227: ap[bs2*i + bs*cidx + ridx] = value;
2228: a->nz++;
2229: A->nonzerostate++;
2230: noinsert1:;
2231: low = i;
2232: }
2233: ailen[brow] = nrow;
2234: }
2235: return(0);
2236: }
2240: PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2241: {
2242: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
2243: Mat outA;
2245: PetscBool row_identity,col_identity;
2248: if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2249: ISIdentity(row,&row_identity);
2250: ISIdentity(col,&col_identity);
2251: if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2253: outA = inA;
2254: inA->factortype = MAT_FACTOR_LU;
2255: PetscFree(inA->solvertype);
2256: PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);
2258: MatMarkDiagonal_SeqBAIJ(inA);
2260: PetscObjectReference((PetscObject)row);
2261: ISDestroy(&a->row);
2262: a->row = row;
2263: PetscObjectReference((PetscObject)col);
2264: ISDestroy(&a->col);
2265: a->col = col;
2267: /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2268: ISDestroy(&a->icol);
2269: ISInvertPermutation(col,PETSC_DECIDE,&a->icol);
2270: PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);
2272: MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));
2273: if (!a->solve_work) {
2274: PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);
2275: PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));
2276: }
2277: MatLUFactorNumeric(outA,inA,info);
2278: return(0);
2279: }
2283: PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2284: {
2285: Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2286: PetscInt i,nz,mbs;
2289: nz = baij->maxnz;
2290: mbs = baij->mbs;
2291: for (i=0; i<nz; i++) {
2292: baij->j[i] = indices[i];
2293: }
2294: baij->nz = nz;
2295: for (i=0; i<mbs; i++) {
2296: baij->ilen[i] = baij->imax[i];
2297: }
2298: return(0);
2299: }
2303: /*@
2304: MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2305: in the matrix.
2307: Input Parameters:
2308: + mat - the SeqBAIJ matrix
2309: - indices - the column indices
2311: Level: advanced
2313: Notes:
2314: This can be called if you have precomputed the nonzero structure of the
2315: matrix and want to provide it to the matrix object to improve the performance
2316: of the MatSetValues() operation.
2318: You MUST have set the correct numbers of nonzeros per row in the call to
2319: MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2321: MUST be called before any calls to MatSetValues();
2323: @*/
2324: PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2325: {
2331: PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));
2332: return(0);
2333: }
2337: PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2338: {
2339: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2341: PetscInt i,j,n,row,bs,*ai,*aj,mbs;
2342: PetscReal atmp;
2343: PetscScalar *x,zero = 0.0;
2344: MatScalar *aa;
2345: PetscInt ncols,brow,krow,kcol;
2348: if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2349: bs = A->rmap->bs;
2350: aa = a->a;
2351: ai = a->i;
2352: aj = a->j;
2353: mbs = a->mbs;
2355: VecSet(v,zero);
2356: VecGetArray(v,&x);
2357: VecGetLocalSize(v,&n);
2358: if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2359: for (i=0; i<mbs; i++) {
2360: ncols = ai[1] - ai[0]; ai++;
2361: brow = bs*i;
2362: for (j=0; j<ncols; j++) {
2363: for (kcol=0; kcol<bs; kcol++) {
2364: for (krow=0; krow<bs; krow++) {
2365: atmp = PetscAbsScalar(*aa);aa++;
2366: row = brow + krow; /* row index */
2367: if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2368: }
2369: }
2370: aj++;
2371: }
2372: }
2373: VecRestoreArray(v,&x);
2374: return(0);
2375: }
2379: PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2380: {
2384: /* If the two matrices have the same copy implementation, use fast copy. */
2385: if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2386: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2387: Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2388: PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2390: if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2391: if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2392: PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));
2393: } else {
2394: MatCopy_Basic(A,B,str);
2395: }
2396: return(0);
2397: }
2401: PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2402: {
2406: MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);
2407: return(0);
2408: }
2412: PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2413: {
2414: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2417: *array = a->a;
2418: return(0);
2419: }
2423: PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2424: {
2426: return(0);
2427: }
2431: PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2432: {
2433: PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2434: Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data;
2435: Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data;
2439: /* Set the number of nonzeros in the new matrix */
2440: MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);
2441: return(0);
2442: }
2446: PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2447: {
2448: Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2450: PetscInt bs=Y->rmap->bs,bs2=bs*bs;
2451: PetscBLASInt one=1;
2454: if (str == SAME_NONZERO_PATTERN) {
2455: PetscScalar alpha = a;
2456: PetscBLASInt bnz;
2457: PetscBLASIntCast(x->nz*bs2,&bnz);
2458: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2459: PetscObjectStateIncrease((PetscObject)Y);
2460: } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2461: MatAXPY_Basic(Y,a,X,str);
2462: } else {
2463: Mat B;
2464: PetscInt *nnz;
2465: if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2466: PetscMalloc1(Y->rmap->N,&nnz);
2467: MatCreate(PetscObjectComm((PetscObject)Y),&B);
2468: PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);
2469: MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);
2470: MatSetBlockSizesFromMats(B,Y,Y);
2471: MatSetType(B,(MatType) ((PetscObject)Y)->type_name);
2472: MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);
2473: MatSeqBAIJSetPreallocation(B,bs,0,nnz);
2474: MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
2475: MatHeaderReplace(Y,&B);
2476: PetscFree(nnz);
2477: }
2478: return(0);
2479: }
2483: PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2484: {
2485: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2486: PetscInt i,nz = a->bs2*a->i[a->mbs];
2487: MatScalar *aa = a->a;
2490: for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2491: return(0);
2492: }
2496: PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2497: {
2498: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2499: PetscInt i,nz = a->bs2*a->i[a->mbs];
2500: MatScalar *aa = a->a;
2503: for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2504: return(0);
2505: }
2509: /*
2510: Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2511: */
2512: PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done)
2513: {
2514: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2516: PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2517: PetscInt nz = a->i[m],row,*jj,mr,col;
2520: *nn = n;
2521: if (!ia) return(0);
2522: if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2523: else {
2524: PetscCalloc1(n+1,&collengths);
2525: PetscMalloc1(n+1,&cia);
2526: PetscMalloc1(nz+1,&cja);
2527: jj = a->j;
2528: for (i=0; i<nz; i++) {
2529: collengths[jj[i]]++;
2530: }
2531: cia[0] = oshift;
2532: for (i=0; i<n; i++) {
2533: cia[i+1] = cia[i] + collengths[i];
2534: }
2535: PetscMemzero(collengths,n*sizeof(PetscInt));
2536: jj = a->j;
2537: for (row=0; row<m; row++) {
2538: mr = a->i[row+1] - a->i[row];
2539: for (i=0; i<mr; i++) {
2540: col = *jj++;
2542: cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2543: }
2544: }
2545: PetscFree(collengths);
2546: *ia = cia; *ja = cja;
2547: }
2548: return(0);
2549: }
2553: PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done)
2554: {
2558: if (!ia) return(0);
2559: PetscFree(*ia);
2560: PetscFree(*ja);
2561: return(0);
2562: }
2564: /*
2565: MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2566: MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2567: spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2568: */
2571: PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done)
2572: {
2573: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2575: PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2576: PetscInt nz = a->i[m],row,*jj,mr,col;
2577: PetscInt *cspidx;
2580: *nn = n;
2581: if (!ia) return(0);
2583: PetscCalloc1(n+1,&collengths);
2584: PetscMalloc1(n+1,&cia);
2585: PetscMalloc1(nz+1,&cja);
2586: PetscMalloc1(nz+1,&cspidx);
2587: jj = a->j;
2588: for (i=0; i<nz; i++) {
2589: collengths[jj[i]]++;
2590: }
2591: cia[0] = oshift;
2592: for (i=0; i<n; i++) {
2593: cia[i+1] = cia[i] + collengths[i];
2594: }
2595: PetscMemzero(collengths,n*sizeof(PetscInt));
2596: jj = a->j;
2597: for (row=0; row<m; row++) {
2598: mr = a->i[row+1] - a->i[row];
2599: for (i=0; i<mr; i++) {
2600: col = *jj++;
2601: cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2602: cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2603: }
2604: }
2605: PetscFree(collengths);
2606: *ia = cia; *ja = cja;
2607: *spidx = cspidx;
2608: return(0);
2609: }
2613: PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done)
2614: {
2618: MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);
2619: PetscFree(*spidx);
2620: return(0);
2621: }
2625: PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2626: {
2628: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data;
2631: if (!Y->preallocated || !aij->nz) {
2632: MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);
2633: }
2634: MatShift_Basic(Y,a);
2635: return(0);
2636: }
2638: /* -------------------------------------------------------------------*/
2639: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2640: MatGetRow_SeqBAIJ,
2641: MatRestoreRow_SeqBAIJ,
2642: MatMult_SeqBAIJ_N,
2643: /* 4*/ MatMultAdd_SeqBAIJ_N,
2644: MatMultTranspose_SeqBAIJ,
2645: MatMultTransposeAdd_SeqBAIJ,
2646: 0,
2647: 0,
2648: 0,
2649: /* 10*/ 0,
2650: MatLUFactor_SeqBAIJ,
2651: 0,
2652: 0,
2653: MatTranspose_SeqBAIJ,
2654: /* 15*/ MatGetInfo_SeqBAIJ,
2655: MatEqual_SeqBAIJ,
2656: MatGetDiagonal_SeqBAIJ,
2657: MatDiagonalScale_SeqBAIJ,
2658: MatNorm_SeqBAIJ,
2659: /* 20*/ 0,
2660: MatAssemblyEnd_SeqBAIJ,
2661: MatSetOption_SeqBAIJ,
2662: MatZeroEntries_SeqBAIJ,
2663: /* 24*/ MatZeroRows_SeqBAIJ,
2664: 0,
2665: 0,
2666: 0,
2667: 0,
2668: /* 29*/ MatSetUp_SeqBAIJ,
2669: 0,
2670: 0,
2671: 0,
2672: 0,
2673: /* 34*/ MatDuplicate_SeqBAIJ,
2674: 0,
2675: 0,
2676: MatILUFactor_SeqBAIJ,
2677: 0,
2678: /* 39*/ MatAXPY_SeqBAIJ,
2679: MatGetSubMatrices_SeqBAIJ,
2680: MatIncreaseOverlap_SeqBAIJ,
2681: MatGetValues_SeqBAIJ,
2682: MatCopy_SeqBAIJ,
2683: /* 44*/ 0,
2684: MatScale_SeqBAIJ,
2685: MatShift_SeqBAIJ,
2686: 0,
2687: MatZeroRowsColumns_SeqBAIJ,
2688: /* 49*/ 0,
2689: MatGetRowIJ_SeqBAIJ,
2690: MatRestoreRowIJ_SeqBAIJ,
2691: MatGetColumnIJ_SeqBAIJ,
2692: MatRestoreColumnIJ_SeqBAIJ,
2693: /* 54*/ MatFDColoringCreate_SeqXAIJ,
2694: 0,
2695: 0,
2696: 0,
2697: MatSetValuesBlocked_SeqBAIJ,
2698: /* 59*/ MatGetSubMatrix_SeqBAIJ,
2699: MatDestroy_SeqBAIJ,
2700: MatView_SeqBAIJ,
2701: 0,
2702: 0,
2703: /* 64*/ 0,
2704: 0,
2705: 0,
2706: 0,
2707: 0,
2708: /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2709: 0,
2710: MatConvert_Basic,
2711: 0,
2712: 0,
2713: /* 74*/ 0,
2714: MatFDColoringApply_BAIJ,
2715: 0,
2716: 0,
2717: 0,
2718: /* 79*/ 0,
2719: 0,
2720: 0,
2721: 0,
2722: MatLoad_SeqBAIJ,
2723: /* 84*/ 0,
2724: 0,
2725: 0,
2726: 0,
2727: 0,
2728: /* 89*/ 0,
2729: 0,
2730: 0,
2731: 0,
2732: 0,
2733: /* 94*/ 0,
2734: 0,
2735: 0,
2736: 0,
2737: 0,
2738: /* 99*/ 0,
2739: 0,
2740: 0,
2741: 0,
2742: 0,
2743: /*104*/ 0,
2744: MatRealPart_SeqBAIJ,
2745: MatImaginaryPart_SeqBAIJ,
2746: 0,
2747: 0,
2748: /*109*/ 0,
2749: 0,
2750: 0,
2751: 0,
2752: MatMissingDiagonal_SeqBAIJ,
2753: /*114*/ 0,
2754: 0,
2755: 0,
2756: 0,
2757: 0,
2758: /*119*/ 0,
2759: 0,
2760: MatMultHermitianTranspose_SeqBAIJ,
2761: MatMultHermitianTransposeAdd_SeqBAIJ,
2762: 0,
2763: /*124*/ 0,
2764: 0,
2765: MatInvertBlockDiagonal_SeqBAIJ,
2766: 0,
2767: 0,
2768: /*129*/ 0,
2769: 0,
2770: 0,
2771: 0,
2772: 0,
2773: /*134*/ 0,
2774: 0,
2775: 0,
2776: 0,
2777: 0,
2778: /*139*/ 0,
2779: 0,
2780: 0,
2781: MatFDColoringSetUp_SeqXAIJ,
2782: 0,
2783: /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ
2784: };
2788: PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
2789: {
2790: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data;
2791: PetscInt nz = aij->i[aij->mbs]*aij->bs2;
2795: if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2797: /* allocate space for values if not already there */
2798: if (!aij->saved_values) {
2799: PetscMalloc1(nz+1,&aij->saved_values);
2800: PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));
2801: }
2803: /* copy values over */
2804: PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));
2805: return(0);
2806: }
2810: PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
2811: {
2812: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data;
2814: PetscInt nz = aij->i[aij->mbs]*aij->bs2;
2817: if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2818: if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2820: /* copy values over */
2821: PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));
2822: return(0);
2823: }
2825: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2826: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2830: PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2831: {
2832: Mat_SeqBAIJ *b;
2834: PetscInt i,mbs,nbs,bs2;
2835: PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2838: if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2839: if (nz == MAT_SKIP_ALLOCATION) {
2840: skipallocation = PETSC_TRUE;
2841: nz = 0;
2842: }
2844: MatSetBlockSize(B,PetscAbs(bs));
2845: PetscLayoutSetUp(B->rmap);
2846: PetscLayoutSetUp(B->cmap);
2847: PetscLayoutGetBlockSize(B->rmap,&bs);
2849: B->preallocated = PETSC_TRUE;
2851: mbs = B->rmap->n/bs;
2852: nbs = B->cmap->n/bs;
2853: bs2 = bs*bs;
2855: if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2857: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2858: if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2859: if (nnz) {
2860: for (i=0; i<mbs; i++) {
2861: if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2862: if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2863: }
2864: }
2866: b = (Mat_SeqBAIJ*)B->data;
2867: PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");
2868: PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);
2869: PetscOptionsEnd();
2871: if (!flg) {
2872: switch (bs) {
2873: case 1:
2874: B->ops->mult = MatMult_SeqBAIJ_1;
2875: B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2876: break;
2877: case 2:
2878: B->ops->mult = MatMult_SeqBAIJ_2;
2879: B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2880: break;
2881: case 3:
2882: B->ops->mult = MatMult_SeqBAIJ_3;
2883: B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2884: break;
2885: case 4:
2886: B->ops->mult = MatMult_SeqBAIJ_4;
2887: B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2888: break;
2889: case 5:
2890: B->ops->mult = MatMult_SeqBAIJ_5;
2891: B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2892: break;
2893: case 6:
2894: B->ops->mult = MatMult_SeqBAIJ_6;
2895: B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2896: break;
2897: case 7:
2898: B->ops->mult = MatMult_SeqBAIJ_7;
2899: B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2900: break;
2901: case 15:
2902: B->ops->mult = MatMult_SeqBAIJ_15_ver1;
2903: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2904: break;
2905: default:
2906: B->ops->mult = MatMult_SeqBAIJ_N;
2907: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2908: break;
2909: }
2910: }
2911: B->ops->sor = MatSOR_SeqBAIJ;
2912: b->mbs = mbs;
2913: b->nbs = nbs;
2914: if (!skipallocation) {
2915: if (!b->imax) {
2916: PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);
2917: PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));
2919: b->free_imax_ilen = PETSC_TRUE;
2920: }
2921: /* b->ilen will count nonzeros in each block row so far. */
2922: for (i=0; i<mbs; i++) b->ilen[i] = 0;
2923: if (!nnz) {
2924: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2925: else if (nz < 0) nz = 1;
2926: for (i=0; i<mbs; i++) b->imax[i] = nz;
2927: nz = nz*mbs;
2928: } else {
2929: nz = 0;
2930: for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2931: }
2933: /* allocate the matrix space */
2934: MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);
2935: PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);
2936: PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));
2937: PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));
2938: PetscMemzero(b->j,nz*sizeof(PetscInt));
2940: b->singlemalloc = PETSC_TRUE;
2941: b->i[0] = 0;
2942: for (i=1; i<mbs+1; i++) {
2943: b->i[i] = b->i[i-1] + b->imax[i-1];
2944: }
2945: b->free_a = PETSC_TRUE;
2946: b->free_ij = PETSC_TRUE;
2947: } else {
2948: b->free_a = PETSC_FALSE;
2949: b->free_ij = PETSC_FALSE;
2950: }
2952: b->bs2 = bs2;
2953: b->mbs = mbs;
2954: b->nz = 0;
2955: b->maxnz = nz;
2956: B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2957: if (realalloc) {MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);}
2958: return(0);
2959: }
2963: PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2964: {
2965: PetscInt i,m,nz,nz_max=0,*nnz;
2966: PetscScalar *values=0;
2967: PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2971: if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2972: PetscLayoutSetBlockSize(B->rmap,bs);
2973: PetscLayoutSetBlockSize(B->cmap,bs);
2974: PetscLayoutSetUp(B->rmap);
2975: PetscLayoutSetUp(B->cmap);
2976: PetscLayoutGetBlockSize(B->rmap,&bs);
2977: m = B->rmap->n/bs;
2979: if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2980: PetscMalloc1(m+1, &nnz);
2981: for (i=0; i<m; i++) {
2982: nz = ii[i+1]- ii[i];
2983: if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2984: nz_max = PetscMax(nz_max, nz);
2985: nnz[i] = nz;
2986: }
2987: MatSeqBAIJSetPreallocation(B,bs,0,nnz);
2988: PetscFree(nnz);
2990: values = (PetscScalar*)V;
2991: if (!values) {
2992: PetscCalloc1(bs*bs*(nz_max+1),&values);
2993: }
2994: for (i=0; i<m; i++) {
2995: PetscInt ncols = ii[i+1] - ii[i];
2996: const PetscInt *icols = jj + ii[i];
2997: const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2998: if (!roworiented) {
2999: MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);
3000: } else {
3001: PetscInt j;
3002: for (j=0; j<ncols; j++) {
3003: const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
3004: MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);
3005: }
3006: }
3007: }
3008: if (!V) { PetscFree(values); }
3009: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3010: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3011: MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3012: return(0);
3013: }
3015: /*MC
3016: MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3017: block sparse compressed row format.
3019: Options Database Keys:
3020: . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3022: Level: beginner
3024: .seealso: MatCreateSeqBAIJ()
3025: M*/
3027: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3031: PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3032: {
3034: PetscMPIInt size;
3035: Mat_SeqBAIJ *b;
3038: MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);
3039: if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3041: PetscNewLog(B,&b);
3042: B->data = (void*)b;
3043: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
3045: b->row = 0;
3046: b->col = 0;
3047: b->icol = 0;
3048: b->reallocs = 0;
3049: b->saved_values = 0;
3051: b->roworiented = PETSC_TRUE;
3052: b->nonew = 0;
3053: b->diag = 0;
3054: B->spptr = 0;
3055: B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2;
3056: b->keepnonzeropattern = PETSC_FALSE;
3058: PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);
3059: PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);
3060: PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);
3061: PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);
3062: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);
3063: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);
3064: PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);
3065: PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);
3066: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",MatConvert_SeqBAIJ_SeqBSTRM);
3067: PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);
3068: PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);
3069: return(0);
3070: }
3074: PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3075: {
3076: Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3078: PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3081: if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3083: if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3084: c->imax = a->imax;
3085: c->ilen = a->ilen;
3086: c->free_imax_ilen = PETSC_FALSE;
3087: } else {
3088: PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);
3089: PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));
3090: for (i=0; i<mbs; i++) {
3091: c->imax[i] = a->imax[i];
3092: c->ilen[i] = a->ilen[i];
3093: }
3094: c->free_imax_ilen = PETSC_TRUE;
3095: }
3097: /* allocate the matrix space */
3098: if (mallocmatspace) {
3099: if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3100: PetscCalloc1(bs2*nz,&c->a);
3101: PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));
3103: c->i = a->i;
3104: c->j = a->j;
3105: c->singlemalloc = PETSC_FALSE;
3106: c->free_a = PETSC_TRUE;
3107: c->free_ij = PETSC_FALSE;
3108: c->parent = A;
3109: C->preallocated = PETSC_TRUE;
3110: C->assembled = PETSC_TRUE;
3112: PetscObjectReference((PetscObject)A);
3113: MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3114: MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3115: } else {
3116: PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);
3117: PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));
3119: c->singlemalloc = PETSC_TRUE;
3120: c->free_a = PETSC_TRUE;
3121: c->free_ij = PETSC_TRUE;
3123: PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));
3124: if (mbs > 0) {
3125: PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));
3126: if (cpvalues == MAT_COPY_VALUES) {
3127: PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));
3128: } else {
3129: PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));
3130: }
3131: }
3132: C->preallocated = PETSC_TRUE;
3133: C->assembled = PETSC_TRUE;
3134: }
3135: }
3137: c->roworiented = a->roworiented;
3138: c->nonew = a->nonew;
3140: PetscLayoutReference(A->rmap,&C->rmap);
3141: PetscLayoutReference(A->cmap,&C->cmap);
3143: c->bs2 = a->bs2;
3144: c->mbs = a->mbs;
3145: c->nbs = a->nbs;
3147: if (a->diag) {
3148: if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3149: c->diag = a->diag;
3150: c->free_diag = PETSC_FALSE;
3151: } else {
3152: PetscMalloc1(mbs+1,&c->diag);
3153: PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));
3154: for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3155: c->free_diag = PETSC_TRUE;
3156: }
3157: } else c->diag = 0;
3159: c->nz = a->nz;
3160: c->maxnz = a->nz; /* Since we allocate exactly the right amount */
3161: c->solve_work = NULL;
3162: c->mult_work = NULL;
3163: c->sor_workt = NULL;
3164: c->sor_work = NULL;
3166: c->compressedrow.use = a->compressedrow.use;
3167: c->compressedrow.nrows = a->compressedrow.nrows;
3168: if (a->compressedrow.use) {
3169: i = a->compressedrow.nrows;
3170: PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);
3171: PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));
3172: PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));
3173: PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));
3174: } else {
3175: c->compressedrow.use = PETSC_FALSE;
3176: c->compressedrow.i = NULL;
3177: c->compressedrow.rindex = NULL;
3178: }
3179: C->nonzerostate = A->nonzerostate;
3181: PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);
3182: PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));
3183: return(0);
3184: }
3188: PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3189: {
3193: MatCreate(PetscObjectComm((PetscObject)A),B);
3194: MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);
3195: MatSetType(*B,MATSEQBAIJ);
3196: MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);
3197: return(0);
3198: }
3202: PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3203: {
3204: Mat_SeqBAIJ *a;
3206: PetscInt i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs;
3207: PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3208: PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3209: PetscInt *masked,nmask,tmp,bs2,ishift;
3210: PetscMPIInt size;
3211: int fd;
3212: PetscScalar *aa;
3213: MPI_Comm comm;
3216: /* force binary viewer to load .info file if it has not yet done so */
3217: PetscViewerSetUp(viewer);
3218: PetscObjectGetComm((PetscObject)viewer,&comm);
3219: PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");
3220: PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);
3221: PetscOptionsEnd();
3222: if (bs < 0) bs = 1;
3223: bs2 = bs*bs;
3225: MPI_Comm_size(comm,&size);
3226: if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3227: PetscViewerBinaryGetDescriptor(viewer,&fd);
3228: PetscBinaryRead(fd,header,4,PETSC_INT);
3229: if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3230: M = header[1]; N = header[2]; nz = header[3];
3232: if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3233: if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3235: /*
3236: This code adds extra rows to make sure the number of rows is
3237: divisible by the blocksize
3238: */
3239: mbs = M/bs;
3240: extra_rows = bs - M + bs*(mbs);
3241: if (extra_rows == bs) extra_rows = 0;
3242: else mbs++;
3243: if (extra_rows) {
3244: PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");
3245: }
3247: /* Set global sizes if not already set */
3248: if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3249: MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
3250: } else { /* Check if the matrix global sizes are correct */
3251: MatGetSize(newmat,&rows,&cols);
3252: if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3253: MatGetLocalSize(newmat,&rows,&cols);
3254: }
3255: if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3256: }
3258: /* read in row lengths */
3259: PetscMalloc1(M+extra_rows,&rowlengths);
3260: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
3261: for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3263: /* read in column indices */
3264: PetscMalloc1(nz+extra_rows,&jj);
3265: PetscBinaryRead(fd,jj,nz,PETSC_INT);
3266: for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3268: /* loop over row lengths determining block row lengths */
3269: PetscCalloc1(mbs,&browlengths);
3270: PetscMalloc2(mbs,&mask,mbs,&masked);
3271: PetscMemzero(mask,mbs*sizeof(PetscInt));
3272: rowcount = 0;
3273: nzcount = 0;
3274: for (i=0; i<mbs; i++) {
3275: nmask = 0;
3276: for (j=0; j<bs; j++) {
3277: kmax = rowlengths[rowcount];
3278: for (k=0; k<kmax; k++) {
3279: tmp = jj[nzcount++]/bs;
3280: if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3281: }
3282: rowcount++;
3283: }
3284: browlengths[i] += nmask;
3285: /* zero out the mask elements we set */
3286: for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3287: }
3289: /* Do preallocation */
3290: MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);
3291: a = (Mat_SeqBAIJ*)newmat->data;
3293: /* set matrix "i" values */
3294: a->i[0] = 0;
3295: for (i=1; i<= mbs; i++) {
3296: a->i[i] = a->i[i-1] + browlengths[i-1];
3297: a->ilen[i-1] = browlengths[i-1];
3298: }
3299: a->nz = 0;
3300: for (i=0; i<mbs; i++) a->nz += browlengths[i];
3302: /* read in nonzero values */
3303: PetscMalloc1(nz+extra_rows,&aa);
3304: PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);
3305: for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3307: /* set "a" and "j" values into matrix */
3308: nzcount = 0; jcount = 0;
3309: for (i=0; i<mbs; i++) {
3310: nzcountb = nzcount;
3311: nmask = 0;
3312: for (j=0; j<bs; j++) {
3313: kmax = rowlengths[i*bs+j];
3314: for (k=0; k<kmax; k++) {
3315: tmp = jj[nzcount++]/bs;
3316: if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3317: }
3318: }
3319: /* sort the masked values */
3320: PetscSortInt(nmask,masked);
3322: /* set "j" values into matrix */
3323: maskcount = 1;
3324: for (j=0; j<nmask; j++) {
3325: a->j[jcount++] = masked[j];
3326: mask[masked[j]] = maskcount++;
3327: }
3328: /* set "a" values into matrix */
3329: ishift = bs2*a->i[i];
3330: for (j=0; j<bs; j++) {
3331: kmax = rowlengths[i*bs+j];
3332: for (k=0; k<kmax; k++) {
3333: tmp = jj[nzcountb]/bs;
3334: block = mask[tmp] - 1;
3335: point = jj[nzcountb] - bs*tmp;
3336: idx = ishift + bs2*block + j + bs*point;
3337: a->a[idx] = (MatScalar)aa[nzcountb++];
3338: }
3339: }
3340: /* zero out the mask elements we set */
3341: for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3342: }
3343: if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3345: PetscFree(rowlengths);
3346: PetscFree(browlengths);
3347: PetscFree(aa);
3348: PetscFree(jj);
3349: PetscFree2(mask,masked);
3351: MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
3352: MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
3353: return(0);
3354: }
3358: /*@C
3359: MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3360: compressed row) format. For good matrix assembly performance the
3361: user should preallocate the matrix storage by setting the parameter nz
3362: (or the array nnz). By setting these parameters accurately, performance
3363: during matrix assembly can be increased by more than a factor of 50.
3365: Collective on MPI_Comm
3367: Input Parameters:
3368: + comm - MPI communicator, set to PETSC_COMM_SELF
3369: . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3370: blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3371: . m - number of rows
3372: . n - number of columns
3373: . nz - number of nonzero blocks per block row (same for all rows)
3374: - nnz - array containing the number of nonzero blocks in the various block rows
3375: (possibly different for each block row) or NULL
3377: Output Parameter:
3378: . A - the matrix
3380: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3381: MatXXXXSetPreallocation() paradgm instead of this routine directly.
3382: [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3384: Options Database Keys:
3385: . -mat_no_unroll - uses code that does not unroll the loops in the
3386: block calculations (much slower)
3387: . -mat_block_size - size of the blocks to use
3389: Level: intermediate
3391: Notes:
3392: The number of rows and columns must be divisible by blocksize.
3394: If the nnz parameter is given then the nz parameter is ignored
3396: A nonzero block is any block that as 1 or more nonzeros in it
3398: The block AIJ format is fully compatible with standard Fortran 77
3399: storage. That is, the stored row and column indices can begin at
3400: either one (as in Fortran) or zero. See the users' manual for details.
3402: Specify the preallocated storage with either nz or nnz (not both).
3403: Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3404: allocation. See Users-Manual: ch_mat for details.
3405: matrices.
3407: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3408: @*/
3409: PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3410: {
3414: MatCreate(comm,A);
3415: MatSetSizes(*A,m,n,m,n);
3416: MatSetType(*A,MATSEQBAIJ);
3417: MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);
3418: return(0);
3419: }
3423: /*@C
3424: MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3425: per row in the matrix. For good matrix assembly performance the
3426: user should preallocate the matrix storage by setting the parameter nz
3427: (or the array nnz). By setting these parameters accurately, performance
3428: during matrix assembly can be increased by more than a factor of 50.
3430: Collective on MPI_Comm
3432: Input Parameters:
3433: + B - the matrix
3434: . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3435: blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3436: . nz - number of block nonzeros per block row (same for all rows)
3437: - nnz - array containing the number of block nonzeros in the various block rows
3438: (possibly different for each block row) or NULL
3440: Options Database Keys:
3441: . -mat_no_unroll - uses code that does not unroll the loops in the
3442: block calculations (much slower)
3443: . -mat_block_size - size of the blocks to use
3445: Level: intermediate
3447: Notes:
3448: If the nnz parameter is given then the nz parameter is ignored
3450: You can call MatGetInfo() to get information on how effective the preallocation was;
3451: for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3452: You can also run with the option -info and look for messages with the string
3453: malloc in them to see if additional memory allocation was needed.
3455: The block AIJ format is fully compatible with standard Fortran 77
3456: storage. That is, the stored row and column indices can begin at
3457: either one (as in Fortran) or zero. See the users' manual for details.
3459: Specify the preallocated storage with either nz or nnz (not both).
3460: Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3461: allocation. See Users-Manual: ch_mat for details.
3463: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3464: @*/
3465: PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3466: {
3473: PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));
3474: return(0);
3475: }
3479: /*@C
3480: MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3481: (the default sequential PETSc format).
3483: Collective on MPI_Comm
3485: Input Parameters:
3486: + B - the matrix
3487: . i - the indices into j for the start of each local row (starts with zero)
3488: . j - the column indices for each local row (starts with zero) these must be sorted for each row
3489: - v - optional values in the matrix
3491: Level: developer
3493: Notes:
3494: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs
3495: may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3496: over rows within a block and the last index is over columns within a block row. Fortran programs will likely set
3497: MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3498: block column and the second index is over columns within a block.
3500: .keywords: matrix, aij, compressed row, sparse
3502: .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3503: @*/
3504: PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3505: {
3512: PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));
3513: return(0);
3514: }
3519: /*@
3520: MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3522: Collective on MPI_Comm
3524: Input Parameters:
3525: + comm - must be an MPI communicator of size 1
3526: . bs - size of block
3527: . m - number of rows
3528: . n - number of columns
3529: . i - row indices
3530: . j - column indices
3531: - a - matrix values
3533: Output Parameter:
3534: . mat - the matrix
3536: Level: advanced
3538: Notes:
3539: The i, j, and a arrays are not copied by this routine, the user must free these arrays
3540: once the matrix is destroyed
3542: You cannot set new nonzero locations into this matrix, that will generate an error.
3544: The i and j indices are 0 based
3546: When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3548: The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3549: the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3550: block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory
3551: with column-major ordering within blocks.
3553: .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3555: @*/
3556: PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat)
3557: {
3559: PetscInt ii;
3560: Mat_SeqBAIJ *baij;
3563: if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3564: if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3566: MatCreate(comm,mat);
3567: MatSetSizes(*mat,m,n,m,n);
3568: MatSetType(*mat,MATSEQBAIJ);
3569: MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);
3570: baij = (Mat_SeqBAIJ*)(*mat)->data;
3571: PetscMalloc2(m,&baij->imax,m,&baij->ilen);
3572: PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));
3574: baij->i = i;
3575: baij->j = j;
3576: baij->a = a;
3578: baij->singlemalloc = PETSC_FALSE;
3579: baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3580: baij->free_a = PETSC_FALSE;
3581: baij->free_ij = PETSC_FALSE;
3583: for (ii=0; ii<m; ii++) {
3584: baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3585: #if defined(PETSC_USE_DEBUG)
3586: if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3587: #endif
3588: }
3589: #if defined(PETSC_USE_DEBUG)
3590: for (ii=0; ii<baij->i[m]; ii++) {
3591: if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3592: if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3593: }
3594: #endif
3596: MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
3597: MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
3598: return(0);
3599: }
3603: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3604: {
3608: MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);
3609: return(0);
3610: }