Actual source code: baij.c
petsc-3.4.5 2014-06-29
2: /*
3: Defines the basic matrix operations for the BAIJ (compressed row)
4: matrix storage format.
5: */
6: #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/
7: #include <petscblaslapack.h>
8: #include <petsc-private/kernels/blockinvert.h>
9: #include <petsc-private/kernels/blockmatmult.h>
13: PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
14: {
15: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data;
17: PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18: MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19: PetscReal shift = 0.0;
22: if (a->idiagvalid) {
23: if (values) *values = a->idiag;
24: return(0);
25: }
26: MatMarkDiagonal_SeqBAIJ(A);
27: diag_offset = a->diag;
28: if (!a->idiag) {
29: PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);
30: PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));
31: }
32: diag = a->idiag;
33: mdiag = a->idiag+bs2*mbs;
34: if (values) *values = a->idiag;
35: /* factor and invert each block */
36: switch (bs) {
37: case 1:
38: for (i=0; i<mbs; i++) {
39: odiag = v + 1*diag_offset[i];
40: diag[0] = odiag[0];
41: mdiag[0] = odiag[0];
42: diag[0] = (PetscScalar)1.0 / (diag[0] + shift);
43: diag += 1;
44: mdiag += 1;
45: }
46: break;
47: case 2:
48: for (i=0; i<mbs; i++) {
49: odiag = v + 4*diag_offset[i];
50: diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
51: mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
52: PetscKernel_A_gets_inverse_A_2(diag,shift);
53: diag += 4;
54: mdiag += 4;
55: }
56: break;
57: case 3:
58: for (i=0; i<mbs; i++) {
59: odiag = v + 9*diag_offset[i];
60: diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
61: diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
62: diag[8] = odiag[8];
63: mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
64: mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
65: mdiag[8] = odiag[8];
66: PetscKernel_A_gets_inverse_A_3(diag,shift);
67: diag += 9;
68: mdiag += 9;
69: }
70: break;
71: case 4:
72: for (i=0; i<mbs; i++) {
73: odiag = v + 16*diag_offset[i];
74: PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));
75: PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));
76: PetscKernel_A_gets_inverse_A_4(diag,shift);
77: diag += 16;
78: mdiag += 16;
79: }
80: break;
81: case 5:
82: for (i=0; i<mbs; i++) {
83: odiag = v + 25*diag_offset[i];
84: PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));
85: PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));
86: PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);
87: diag += 25;
88: mdiag += 25;
89: }
90: break;
91: case 6:
92: for (i=0; i<mbs; i++) {
93: odiag = v + 36*diag_offset[i];
94: PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));
95: PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));
96: PetscKernel_A_gets_inverse_A_6(diag,shift);
97: diag += 36;
98: mdiag += 36;
99: }
100: break;
101: case 7:
102: for (i=0; i<mbs; i++) {
103: odiag = v + 49*diag_offset[i];
104: PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));
105: PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));
106: PetscKernel_A_gets_inverse_A_7(diag,shift);
107: diag += 49;
108: mdiag += 49;
109: }
110: break;
111: default:
112: PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);
113: for (i=0; i<mbs; i++) {
114: odiag = v + bs2*diag_offset[i];
115: PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));
116: PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));
117: PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);
118: diag += bs2;
119: mdiag += bs2;
120: }
121: PetscFree2(v_work,v_pivots);
122: }
123: a->idiagvalid = PETSC_TRUE;
124: return(0);
125: }
129: PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
130: {
131: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
132: PetscScalar *x,*work,*w,*workt,*t;
133: const MatScalar *v,*aa = a->a, *idiag;
134: const PetscScalar *b,*xb;
135: PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
136: PetscErrorCode ierr;
137: PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
138: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
141: its = its*lits;
142: if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
143: if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
144: if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
145: if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
146: if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
148: if (!a->idiagvalid) {MatInvertBlockDiagonal(A,NULL);}
150: if (!m) return(0);
151: diag = a->diag;
152: idiag = a->idiag;
153: k = PetscMax(A->rmap->n,A->cmap->n);
154: if (!a->mult_work) {
155: PetscMalloc((2*k+1)*sizeof(PetscScalar),&a->mult_work);
156: }
157: work = a->mult_work;
158: t = work + k+1;
159: if (!a->sor_work) {
160: PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);
161: }
162: w = a->sor_work;
164: VecGetArray(xx,&x);
165: VecGetArrayRead(bb,&b);
167: if (flag & SOR_ZERO_INITIAL_GUESS) {
168: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
169: switch (bs) {
170: case 1:
171: PetscKernel_v_gets_A_times_w_1(x,idiag,b);
172: t[0] = b[0];
173: i2 = 1;
174: idiag += 1;
175: for (i=1; i<m; i++) {
176: v = aa + ai[i];
177: vi = aj + ai[i];
178: nz = diag[i] - ai[i];
179: s[0] = b[i2];
180: for (j=0; j<nz; j++) {
181: xw[0] = x[vi[j]];
182: PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
183: }
184: t[i2] = s[0];
185: PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
186: x[i2] = xw[0];
187: idiag += 1;
188: i2 += 1;
189: }
190: break;
191: case 2:
192: PetscKernel_v_gets_A_times_w_2(x,idiag,b);
193: t[0] = b[0]; t[1] = b[1];
194: i2 = 2;
195: idiag += 4;
196: for (i=1; i<m; i++) {
197: v = aa + 4*ai[i];
198: vi = aj + ai[i];
199: nz = diag[i] - ai[i];
200: s[0] = b[i2]; s[1] = b[i2+1];
201: for (j=0; j<nz; j++) {
202: idx = 2*vi[j];
203: it = 4*j;
204: xw[0] = x[idx]; xw[1] = x[1+idx];
205: PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
206: }
207: t[i2] = s[0]; t[i2+1] = s[1];
208: PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
209: x[i2] = xw[0]; x[i2+1] = xw[1];
210: idiag += 4;
211: i2 += 2;
212: }
213: break;
214: case 3:
215: PetscKernel_v_gets_A_times_w_3(x,idiag,b);
216: t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
217: i2 = 3;
218: idiag += 9;
219: for (i=1; i<m; i++) {
220: v = aa + 9*ai[i];
221: vi = aj + ai[i];
222: nz = diag[i] - ai[i];
223: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
224: while (nz--) {
225: idx = 3*(*vi++);
226: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
227: PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
228: v += 9;
229: }
230: t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
231: PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
232: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
233: idiag += 9;
234: i2 += 3;
235: }
236: break;
237: case 4:
238: PetscKernel_v_gets_A_times_w_4(x,idiag,b);
239: t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
240: i2 = 4;
241: idiag += 16;
242: for (i=1; i<m; i++) {
243: v = aa + 16*ai[i];
244: vi = aj + ai[i];
245: nz = diag[i] - ai[i];
246: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
247: while (nz--) {
248: idx = 4*(*vi++);
249: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
250: PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
251: v += 16;
252: }
253: t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
254: PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
255: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
256: idiag += 16;
257: i2 += 4;
258: }
259: break;
260: case 5:
261: PetscKernel_v_gets_A_times_w_5(x,idiag,b);
262: t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
263: i2 = 5;
264: idiag += 25;
265: for (i=1; i<m; i++) {
266: v = aa + 25*ai[i];
267: vi = aj + ai[i];
268: nz = diag[i] - ai[i];
269: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
270: while (nz--) {
271: idx = 5*(*vi++);
272: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
273: PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
274: v += 25;
275: }
276: t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
277: PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
278: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
279: idiag += 25;
280: i2 += 5;
281: }
282: break;
283: case 6:
284: PetscKernel_v_gets_A_times_w_6(x,idiag,b);
285: t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
286: i2 = 6;
287: idiag += 36;
288: for (i=1; i<m; i++) {
289: v = aa + 36*ai[i];
290: vi = aj + ai[i];
291: nz = diag[i] - ai[i];
292: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
293: while (nz--) {
294: idx = 6*(*vi++);
295: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
296: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
297: PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
298: v += 36;
299: }
300: t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
301: t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
302: PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
303: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
304: idiag += 36;
305: i2 += 6;
306: }
307: break;
308: case 7:
309: PetscKernel_v_gets_A_times_w_7(x,idiag,b);
310: t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
311: t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
312: i2 = 7;
313: idiag += 49;
314: for (i=1; i<m; i++) {
315: v = aa + 49*ai[i];
316: vi = aj + ai[i];
317: nz = diag[i] - ai[i];
318: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
319: s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
320: while (nz--) {
321: idx = 7*(*vi++);
322: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
323: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
324: PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
325: v += 49;
326: }
327: t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
328: t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
329: PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
330: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
331: x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
332: idiag += 49;
333: i2 += 7;
334: }
335: break;
336: default:
337: PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
338: PetscMemcpy(t,b,bs*sizeof(PetscScalar));
339: i2 = bs;
340: idiag += bs2;
341: for (i=1; i<m; i++) {
342: v = aa + bs2*ai[i];
343: vi = aj + ai[i];
344: nz = diag[i] - ai[i];
346: PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));
347: /* copy all rows of x that are needed into contiguous space */
348: workt = work;
349: for (j=0; j<nz; j++) {
350: PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
351: workt += bs;
352: }
353: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
354: PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));
355: PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
357: idiag += bs2;
358: i2 += bs;
359: }
360: break;
361: }
362: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
363: PetscLogFlops(1.0*bs2*a->nz);
364: xb = t;
365: }
366: else xb = b;
367: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
368: idiag = a->idiag+bs2*(a->mbs-1);
369: i2 = bs * (m-1);
370: switch (bs) {
371: case 1:
372: s[0] = xb[i2];
373: PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
374: x[i2] = xw[0];
375: i2 -= 1;
376: for (i=m-2; i>=0; i--) {
377: v = aa + (diag[i]+1);
378: vi = aj + diag[i] + 1;
379: nz = ai[i+1] - diag[i] - 1;
380: s[0] = xb[i2];
381: for (j=0; j<nz; j++) {
382: xw[0] = x[vi[j]];
383: PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
384: }
385: PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
386: x[i2] = xw[0];
387: idiag -= 1;
388: i2 -= 1;
389: }
390: break;
391: case 2:
392: s[0] = xb[i2]; s[1] = xb[i2+1];
393: PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
394: x[i2] = xw[0]; x[i2+1] = xw[1];
395: i2 -= 2;
396: idiag -= 4;
397: for (i=m-2; i>=0; i--) {
398: v = aa + 4*(diag[i] + 1);
399: vi = aj + diag[i] + 1;
400: nz = ai[i+1] - diag[i] - 1;
401: s[0] = xb[i2]; s[1] = xb[i2+1];
402: for (j=0; j<nz; j++) {
403: idx = 2*vi[j];
404: it = 4*j;
405: xw[0] = x[idx]; xw[1] = x[1+idx];
406: PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
407: }
408: PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
409: x[i2] = xw[0]; x[i2+1] = xw[1];
410: idiag -= 4;
411: i2 -= 2;
412: }
413: break;
414: case 3:
415: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
416: PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
417: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
418: i2 -= 3;
419: idiag -= 9;
420: for (i=m-2; i>=0; i--) {
421: v = aa + 9*(diag[i]+1);
422: vi = aj + diag[i] + 1;
423: nz = ai[i+1] - diag[i] - 1;
424: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
425: while (nz--) {
426: idx = 3*(*vi++);
427: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
428: PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
429: v += 9;
430: }
431: PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
432: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
433: idiag -= 9;
434: i2 -= 3;
435: }
436: break;
437: case 4:
438: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
439: PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
440: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
441: i2 -= 4;
442: idiag -= 16;
443: for (i=m-2; i>=0; i--) {
444: v = aa + 16*(diag[i]+1);
445: vi = aj + diag[i] + 1;
446: nz = ai[i+1] - diag[i] - 1;
447: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
448: while (nz--) {
449: idx = 4*(*vi++);
450: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
451: PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
452: v += 16;
453: }
454: PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
455: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
456: idiag -= 16;
457: i2 -= 4;
458: }
459: break;
460: case 5:
461: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
462: PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
463: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
464: i2 -= 5;
465: idiag -= 25;
466: for (i=m-2; i>=0; i--) {
467: v = aa + 25*(diag[i]+1);
468: vi = aj + diag[i] + 1;
469: nz = ai[i+1] - diag[i] - 1;
470: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
471: while (nz--) {
472: idx = 5*(*vi++);
473: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
474: PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
475: v += 25;
476: }
477: PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
478: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
479: idiag -= 25;
480: i2 -= 5;
481: }
482: break;
483: case 6:
484: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
485: PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
486: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
487: i2 -= 6;
488: idiag -= 36;
489: for (i=m-2; i>=0; i--) {
490: v = aa + 36*(diag[i]+1);
491: vi = aj + diag[i] + 1;
492: nz = ai[i+1] - diag[i] - 1;
493: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
494: while (nz--) {
495: idx = 6*(*vi++);
496: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
497: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
498: PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
499: v += 36;
500: }
501: PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
502: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
503: idiag -= 36;
504: i2 -= 6;
505: }
506: break;
507: case 7:
508: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
509: s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
510: PetscKernel_v_gets_A_times_w_7(x,idiag,b);
511: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
512: x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
513: i2 -= 7;
514: idiag -= 49;
515: for (i=m-2; i>=0; i--) {
516: v = aa + 49*(diag[i]+1);
517: vi = aj + diag[i] + 1;
518: nz = ai[i+1] - diag[i] - 1;
519: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
520: s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
521: while (nz--) {
522: idx = 7*(*vi++);
523: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
524: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
525: PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
526: v += 49;
527: }
528: PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
529: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
530: x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
531: idiag -= 49;
532: i2 -= 7;
533: }
534: break;
535: default:
536: PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));
537: PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
538: i2 -= bs;
539: idiag -= bs2;
540: for (i=m-2; i>=0; i--) {
541: v = aa + bs2*(diag[i]+1);
542: vi = aj + diag[i] + 1;
543: nz = ai[i+1] - diag[i] - 1;
545: PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));
546: /* copy all rows of x that are needed into contiguous space */
547: workt = work;
548: for (j=0; j<nz; j++) {
549: PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
550: workt += bs;
551: }
552: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
553: PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
555: idiag -= bs2;
556: i2 -= bs;
557: }
558: break;
559: }
560: PetscLogFlops(1.0*bs2*(a->nz));
561: }
562: its--;
563: }
564: while (its--) {
565: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
566: idiag = a->idiag;
567: i2 = 0;
568: switch (bs) {
569: case 1:
570: for (i=0; i<m; i++) {
571: v = aa + ai[i];
572: vi = aj + ai[i];
573: nz = ai[i+1] - ai[i];
574: s[0] = b[i2];
575: for (j=0; j<nz; j++) {
576: xw[0] = x[vi[j]];
577: PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
578: }
579: PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
580: x[i2] += xw[0];
581: idiag += 1;
582: i2 += 1;
583: }
584: break;
585: case 2:
586: for (i=0; i<m; i++) {
587: v = aa + 4*ai[i];
588: vi = aj + ai[i];
589: nz = ai[i+1] - ai[i];
590: s[0] = b[i2]; s[1] = b[i2+1];
591: for (j=0; j<nz; j++) {
592: idx = 2*vi[j];
593: it = 4*j;
594: xw[0] = x[idx]; xw[1] = x[1+idx];
595: PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
596: }
597: PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
598: x[i2] += xw[0]; x[i2+1] += xw[1];
599: idiag += 4;
600: i2 += 2;
601: }
602: break;
603: case 3:
604: for (i=0; i<m; i++) {
605: v = aa + 9*ai[i];
606: vi = aj + ai[i];
607: nz = ai[i+1] - ai[i];
608: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
609: while (nz--) {
610: idx = 3*(*vi++);
611: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
612: PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
613: v += 9;
614: }
615: PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
616: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
617: idiag += 9;
618: i2 += 3;
619: }
620: break;
621: case 4:
622: for (i=0; i<m; i++) {
623: v = aa + 16*ai[i];
624: vi = aj + ai[i];
625: nz = ai[i+1] - ai[i];
626: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
627: while (nz--) {
628: idx = 4*(*vi++);
629: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
630: PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
631: v += 16;
632: }
633: PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
634: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
635: idiag += 16;
636: i2 += 4;
637: }
638: break;
639: case 5:
640: for (i=0; i<m; i++) {
641: v = aa + 25*ai[i];
642: vi = aj + ai[i];
643: nz = ai[i+1] - ai[i];
644: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
645: while (nz--) {
646: idx = 5*(*vi++);
647: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
648: PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
649: v += 25;
650: }
651: PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
652: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
653: idiag += 25;
654: i2 += 5;
655: }
656: break;
657: case 6:
658: for (i=0; i<m; i++) {
659: v = aa + 36*ai[i];
660: vi = aj + ai[i];
661: nz = ai[i+1] - ai[i];
662: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
663: while (nz--) {
664: idx = 6*(*vi++);
665: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
666: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
667: PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
668: v += 36;
669: }
670: PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
671: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
672: x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
673: idiag += 36;
674: i2 += 6;
675: }
676: break;
677: case 7:
678: for (i=0; i<m; i++) {
679: v = aa + 49*ai[i];
680: vi = aj + ai[i];
681: nz = ai[i+1] - ai[i];
682: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
683: s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
684: while (nz--) {
685: idx = 7*(*vi++);
686: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
687: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
688: PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
689: v += 49;
690: }
691: PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
692: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
693: x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
694: idiag += 49;
695: i2 += 7;
696: }
697: break;
698: default:
699: for (i=0; i<m; i++) {
700: v = aa + bs2*ai[i];
701: vi = aj + ai[i];
702: nz = ai[i+1] - ai[i];
704: PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));
705: /* copy all rows of x that are needed into contiguous space */
706: workt = work;
707: for (j=0; j<nz; j++) {
708: PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
709: workt += bs;
710: }
711: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
712: PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
714: idiag += bs2;
715: i2 += bs;
716: }
717: break;
718: }
719: PetscLogFlops(2.0*bs2*a->nz);
720: }
721: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
722: idiag = a->idiag+bs2*(a->mbs-1);
723: i2 = bs * (m-1);
724: switch (bs) {
725: case 1:
726: for (i=m-1; i>=0; i--) {
727: v = aa + ai[i];
728: vi = aj + ai[i];
729: nz = ai[i+1] - ai[i];
730: s[0] = b[i2];
731: for (j=0; j<nz; j++) {
732: xw[0] = x[vi[j]];
733: PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
734: }
735: PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
736: x[i2] += xw[0];
737: idiag -= 1;
738: i2 -= 1;
739: }
740: break;
741: case 2:
742: for (i=m-1; i>=0; i--) {
743: v = aa + 4*ai[i];
744: vi = aj + ai[i];
745: nz = ai[i+1] - ai[i];
746: s[0] = b[i2]; s[1] = b[i2+1];
747: for (j=0; j<nz; j++) {
748: idx = 2*vi[j];
749: it = 4*j;
750: xw[0] = x[idx]; xw[1] = x[1+idx];
751: PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
752: }
753: PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
754: x[i2] += xw[0]; x[i2+1] += xw[1];
755: idiag -= 4;
756: i2 -= 2;
757: }
758: break;
759: case 3:
760: for (i=m-1; i>=0; i--) {
761: v = aa + 9*ai[i];
762: vi = aj + ai[i];
763: nz = ai[i+1] - ai[i];
764: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
765: while (nz--) {
766: idx = 3*(*vi++);
767: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
768: PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
769: v += 9;
770: }
771: PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
772: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
773: idiag -= 9;
774: i2 -= 3;
775: }
776: break;
777: case 4:
778: for (i=m-1; i>=0; i--) {
779: v = aa + 16*ai[i];
780: vi = aj + ai[i];
781: nz = ai[i+1] - ai[i];
782: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
783: while (nz--) {
784: idx = 4*(*vi++);
785: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
786: PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
787: v += 16;
788: }
789: PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
790: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
791: idiag -= 16;
792: i2 -= 4;
793: }
794: break;
795: case 5:
796: for (i=m-1; i>=0; i--) {
797: v = aa + 25*ai[i];
798: vi = aj + ai[i];
799: nz = ai[i+1] - ai[i];
800: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
801: while (nz--) {
802: idx = 5*(*vi++);
803: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
804: PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
805: v += 25;
806: }
807: PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
808: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
809: idiag -= 25;
810: i2 -= 5;
811: }
812: break;
813: case 6:
814: for (i=m-1; i>=0; i--) {
815: v = aa + 36*ai[i];
816: vi = aj + ai[i];
817: nz = ai[i+1] - ai[i];
818: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
819: while (nz--) {
820: idx = 6*(*vi++);
821: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
822: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
823: PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
824: v += 36;
825: }
826: PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
827: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
828: x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
829: idiag -= 36;
830: i2 -= 6;
831: }
832: break;
833: case 7:
834: for (i=m-1; i>=0; i--) {
835: v = aa + 49*ai[i];
836: vi = aj + ai[i];
837: nz = ai[i+1] - ai[i];
838: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
839: s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
840: while (nz--) {
841: idx = 7*(*vi++);
842: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
843: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
844: PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
845: v += 49;
846: }
847: PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
848: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
849: x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
850: idiag -= 49;
851: i2 -= 7;
852: }
853: break;
854: default:
855: for (i=m-1; i>=0; i--) {
856: v = aa + bs2*ai[i];
857: vi = aj + ai[i];
858: nz = ai[i+1] - ai[i];
860: PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));
861: /* copy all rows of x that are needed into contiguous space */
862: workt = work;
863: for (j=0; j<nz; j++) {
864: PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
865: workt += bs;
866: }
867: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
868: PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
870: idiag -= bs2;
871: i2 -= bs;
872: }
873: break;
874: }
875: PetscLogFlops(2.0*bs2*(a->nz));
876: }
877: }
878: VecRestoreArray(xx,&x);
879: VecRestoreArrayRead(bb,&b);
880: return(0);
881: }
884: /*
885: Special version for direct calls from Fortran (Used in PETSc-fun3d)
886: */
887: #if defined(PETSC_HAVE_FORTRAN_CAPS)
888: #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
889: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
890: #define matsetvaluesblocked4_ matsetvaluesblocked4
891: #endif
895: PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
896: {
897: Mat A = *AA;
898: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
899: PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
900: PetscInt *ai =a->i,*ailen=a->ilen;
901: PetscInt *aj =a->j,stepval,lastcol = -1;
902: const PetscScalar *value = v;
903: MatScalar *ap,*aa = a->a,*bap;
906: if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
907: stepval = (n-1)*4;
908: for (k=0; k<m; k++) { /* loop over added rows */
909: row = im[k];
910: rp = aj + ai[row];
911: ap = aa + 16*ai[row];
912: nrow = ailen[row];
913: low = 0;
914: high = nrow;
915: for (l=0; l<n; l++) { /* loop over added columns */
916: col = in[l];
917: if (col <= lastcol) low = 0;
918: else high = nrow;
919: lastcol = col;
920: value = v + k*(stepval+4 + l)*4;
921: while (high-low > 7) {
922: t = (low+high)/2;
923: if (rp[t] > col) high = t;
924: else low = t;
925: }
926: for (i=low; i<high; i++) {
927: if (rp[i] > col) break;
928: if (rp[i] == col) {
929: bap = ap + 16*i;
930: for (ii=0; ii<4; ii++,value+=stepval) {
931: for (jj=ii; jj<16; jj+=4) {
932: bap[jj] += *value++;
933: }
934: }
935: goto noinsert2;
936: }
937: }
938: N = nrow++ - 1;
939: high++; /* added new column index thus must search to one higher than before */
940: /* shift up all the later entries in this row */
941: for (ii=N; ii>=i; ii--) {
942: rp[ii+1] = rp[ii];
943: PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
944: }
945: if (N >= i) {
946: PetscMemzero(ap+16*i,16*sizeof(MatScalar));
947: }
948: rp[i] = col;
949: bap = ap + 16*i;
950: for (ii=0; ii<4; ii++,value+=stepval) {
951: for (jj=ii; jj<16; jj+=4) {
952: bap[jj] = *value++;
953: }
954: }
955: noinsert2:;
956: low = i;
957: }
958: ailen[row] = nrow;
959: }
960: PetscFunctionReturnVoid();
961: }
963: #if defined(PETSC_HAVE_FORTRAN_CAPS)
964: #define matsetvalues4_ MATSETVALUES4
965: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
966: #define matsetvalues4_ matsetvalues4
967: #endif
971: PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
972: {
973: Mat A = *AA;
974: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
975: PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
976: PetscInt *ai=a->i,*ailen=a->ilen;
977: PetscInt *aj=a->j,brow,bcol;
978: PetscInt ridx,cidx,lastcol = -1;
979: MatScalar *ap,value,*aa=a->a,*bap;
982: for (k=0; k<m; k++) { /* loop over added rows */
983: row = im[k]; brow = row/4;
984: rp = aj + ai[brow];
985: ap = aa + 16*ai[brow];
986: nrow = ailen[brow];
987: low = 0;
988: high = nrow;
989: for (l=0; l<n; l++) { /* loop over added columns */
990: col = in[l]; bcol = col/4;
991: ridx = row % 4; cidx = col % 4;
992: value = v[l + k*n];
993: if (col <= lastcol) low = 0;
994: else high = nrow;
995: lastcol = col;
996: while (high-low > 7) {
997: t = (low+high)/2;
998: if (rp[t] > bcol) high = t;
999: else low = t;
1000: }
1001: for (i=low; i<high; i++) {
1002: if (rp[i] > bcol) break;
1003: if (rp[i] == bcol) {
1004: bap = ap + 16*i + 4*cidx + ridx;
1005: *bap += value;
1006: goto noinsert1;
1007: }
1008: }
1009: N = nrow++ - 1;
1010: high++; /* added new column thus must search to one higher than before */
1011: /* shift up all the later entries in this row */
1012: for (ii=N; ii>=i; ii--) {
1013: rp[ii+1] = rp[ii];
1014: PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1015: }
1016: if (N>=i) {
1017: PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1018: }
1019: rp[i] = bcol;
1020: ap[16*i + 4*cidx + ridx] = value;
1021: noinsert1:;
1022: low = i;
1023: }
1024: ailen[brow] = nrow;
1025: }
1026: PetscFunctionReturnVoid();
1027: }
1029: /*
1030: Checks for missing diagonals
1031: */
1034: PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d)
1035: {
1036: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1038: PetscInt *diag,*jj = a->j,i;
1041: MatMarkDiagonal_SeqBAIJ(A);
1042: *missing = PETSC_FALSE;
1043: if (A->rmap->n > 0 && !jj) {
1044: *missing = PETSC_TRUE;
1045: if (d) *d = 0;
1046: PetscInfo(A,"Matrix has no entries therefore is missing diagonal");
1047: } else {
1048: diag = a->diag;
1049: for (i=0; i<a->mbs; i++) {
1050: if (jj[diag[i]] != i) {
1051: *missing = PETSC_TRUE;
1052: if (d) *d = i;
1053: PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1054: break;
1055: }
1056: }
1057: }
1058: return(0);
1059: }
1063: PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1064: {
1065: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1067: PetscInt i,j,m = a->mbs;
1070: if (!a->diag) {
1071: PetscMalloc(m*sizeof(PetscInt),&a->diag);
1072: PetscLogObjectMemory(A,m*sizeof(PetscInt));
1073: a->free_diag = PETSC_TRUE;
1074: }
1075: for (i=0; i<m; i++) {
1076: a->diag[i] = a->i[i+1];
1077: for (j=a->i[i]; j<a->i[i+1]; j++) {
1078: if (a->j[j] == i) {
1079: a->diag[i] = j;
1080: break;
1081: }
1082: }
1083: }
1084: return(0);
1085: }
1090: static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done)
1091: {
1092: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1094: PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1095: PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1098: *nn = n;
1099: if (!ia) return(0);
1100: if (symmetric) {
1101: MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);
1102: nz = tia[n];
1103: } else {
1104: tia = a->i; tja = a->j;
1105: }
1107: if (!blockcompressed && bs > 1) {
1108: (*nn) *= bs;
1109: /* malloc & create the natural set of indices */
1110: PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);
1111: if (n) {
1112: (*ia)[0] = 0;
1113: for (j=1; j<bs; j++) {
1114: (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1115: }
1116: }
1118: for (i=1; i<n; i++) {
1119: (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1120: for (j=1; j<bs; j++) {
1121: (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1122: }
1123: }
1124: if (n) {
1125: (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1126: }
1128: if (inja) {
1129: PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);
1130: cnt = 0;
1131: for (i=0; i<n; i++) {
1132: for (j=0; j<bs; j++) {
1133: for (k=tia[i]; k<tia[i+1]; k++) {
1134: for (l=0; l<bs; l++) {
1135: (*ja)[cnt++] = bs*tja[k] + l;
1136: }
1137: }
1138: }
1139: }
1140: }
1142: if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1143: PetscFree(tia);
1144: PetscFree(tja);
1145: }
1146: } else if (oshift == 1) {
1147: if (symmetric) {
1148: nz = tia[A->rmap->n/bs];
1149: /* add 1 to i and j indices */
1150: for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1151: *ia = tia;
1152: if (ja) {
1153: for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1154: *ja = tja;
1155: }
1156: } else {
1157: nz = a->i[A->rmap->n/bs];
1158: /* malloc space and add 1 to i and j indices */
1159: PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);
1160: for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1161: if (ja) {
1162: PetscMalloc(nz*sizeof(PetscInt),ja);
1163: for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1164: }
1165: }
1166: } else {
1167: *ia = tia;
1168: if (ja) *ja = tja;
1169: }
1170: return(0);
1171: }
1175: static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done)
1176: {
1180: if (!ia) return(0);
1181: if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1182: PetscFree(*ia);
1183: if (ja) {PetscFree(*ja);}
1184: }
1185: return(0);
1186: }
1190: PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1191: {
1192: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1196: #if defined(PETSC_USE_LOG)
1197: PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1198: #endif
1199: MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);
1200: ISDestroy(&a->row);
1201: ISDestroy(&a->col);
1202: if (a->free_diag) {PetscFree(a->diag);}
1203: PetscFree(a->idiag);
1204: if (a->free_imax_ilen) {PetscFree2(a->imax,a->ilen);}
1205: PetscFree(a->solve_work);
1206: PetscFree(a->mult_work);
1207: PetscFree(a->sor_work);
1208: ISDestroy(&a->icol);
1209: PetscFree(a->saved_values);
1210: PetscFree(a->xtoy);
1211: PetscFree2(a->compressedrow.i,a->compressedrow.rindex);
1213: MatDestroy(&a->sbaijMat);
1214: MatDestroy(&a->parent);
1215: PetscFree(A->data);
1217: PetscObjectChangeTypeName((PetscObject)A,0);
1218: PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);
1219: PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);
1220: PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);
1221: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);
1222: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);
1223: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);
1224: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);
1225: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);
1226: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);
1227: PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);
1228: return(0);
1229: }
1233: PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1234: {
1235: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1239: switch (op) {
1240: case MAT_ROW_ORIENTED:
1241: a->roworiented = flg;
1242: break;
1243: case MAT_KEEP_NONZERO_PATTERN:
1244: a->keepnonzeropattern = flg;
1245: break;
1246: case MAT_NEW_NONZERO_LOCATIONS:
1247: a->nonew = (flg ? 0 : 1);
1248: break;
1249: case MAT_NEW_NONZERO_LOCATION_ERR:
1250: a->nonew = (flg ? -1 : 0);
1251: break;
1252: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1253: a->nonew = (flg ? -2 : 0);
1254: break;
1255: case MAT_UNUSED_NONZERO_LOCATION_ERR:
1256: a->nounused = (flg ? -1 : 0);
1257: break;
1258: case MAT_CHECK_COMPRESSED_ROW:
1259: a->compressedrow.check = flg;
1260: break;
1261: case MAT_NEW_DIAGONALS:
1262: case MAT_IGNORE_OFF_PROC_ENTRIES:
1263: case MAT_USE_HASH_TABLE:
1264: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1265: break;
1266: case MAT_SPD:
1267: case MAT_SYMMETRIC:
1268: case MAT_STRUCTURALLY_SYMMETRIC:
1269: case MAT_HERMITIAN:
1270: case MAT_SYMMETRY_ETERNAL:
1271: /* These options are handled directly by MatSetOption() */
1272: break;
1273: default:
1274: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1275: }
1276: return(0);
1277: }
1281: PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1282: {
1283: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1285: PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1286: MatScalar *aa,*aa_i;
1287: PetscScalar *v_i;
1290: bs = A->rmap->bs;
1291: ai = a->i;
1292: aj = a->j;
1293: aa = a->a;
1294: bs2 = a->bs2;
1296: if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1298: bn = row/bs; /* Block number */
1299: bp = row % bs; /* Block Position */
1300: M = ai[bn+1] - ai[bn];
1301: *nz = bs*M;
1303: if (v) {
1304: *v = 0;
1305: if (*nz) {
1306: PetscMalloc((*nz)*sizeof(PetscScalar),v);
1307: for (i=0; i<M; i++) { /* for each block in the block row */
1308: v_i = *v + i*bs;
1309: aa_i = aa + bs2*(ai[bn] + i);
1310: for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1311: }
1312: }
1313: }
1315: if (idx) {
1316: *idx = 0;
1317: if (*nz) {
1318: PetscMalloc((*nz)*sizeof(PetscInt),idx);
1319: for (i=0; i<M; i++) { /* for each block in the block row */
1320: idx_i = *idx + i*bs;
1321: itmp = bs*aj[ai[bn] + i];
1322: for (j=0; j<bs; j++) idx_i[j] = itmp++;
1323: }
1324: }
1325: }
1326: return(0);
1327: }
1331: PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1332: {
1336: if (idx) {PetscFree(*idx);}
1337: if (v) {PetscFree(*v);}
1338: return(0);
1339: }
1341: extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1345: PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1346: {
1347: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
1348: Mat C;
1350: PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1351: PetscInt *rows,*cols,bs2=a->bs2;
1352: MatScalar *array;
1355: if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1356: if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1357: PetscMalloc((1+nbs)*sizeof(PetscInt),&col);
1358: PetscMemzero(col,(1+nbs)*sizeof(PetscInt));
1360: for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1361: MatCreate(PetscObjectComm((PetscObject)A),&C);
1362: MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);
1363: MatSetType(C,((PetscObject)A)->type_name);
1364: MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);
1365: PetscFree(col);
1366: } else {
1367: C = *B;
1368: }
1370: array = a->a;
1371: PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);
1372: for (i=0; i<mbs; i++) {
1373: cols[0] = i*bs;
1374: for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1375: len = ai[i+1] - ai[i];
1376: for (j=0; j<len; j++) {
1377: rows[0] = (*aj++)*bs;
1378: for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1379: MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);
1380: array += bs2;
1381: }
1382: }
1383: PetscFree2(rows,cols);
1385: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1386: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1388: if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1389: *B = C;
1390: } else {
1391: MatHeaderMerge(A,C);
1392: }
1393: return(0);
1394: }
1398: PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f)
1399: {
1401: Mat Btrans;
1404: *f = PETSC_FALSE;
1405: MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);
1406: MatEqual_SeqBAIJ(B,Btrans,f);
1407: MatDestroy(&Btrans);
1408: return(0);
1409: }
1413: static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1414: {
1415: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1417: PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1418: int fd;
1419: PetscScalar *aa;
1420: FILE *file;
1423: PetscViewerBinaryGetDescriptor(viewer,&fd);
1424: PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);
1425: col_lens[0] = MAT_FILE_CLASSID;
1427: col_lens[1] = A->rmap->N;
1428: col_lens[2] = A->cmap->n;
1429: col_lens[3] = a->nz*bs2;
1431: /* store lengths of each row and write (including header) to file */
1432: count = 0;
1433: for (i=0; i<a->mbs; i++) {
1434: for (j=0; j<bs; j++) {
1435: col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1436: }
1437: }
1438: PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);
1439: PetscFree(col_lens);
1441: /* store column indices (zero start index) */
1442: PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);
1443: count = 0;
1444: for (i=0; i<a->mbs; i++) {
1445: for (j=0; j<bs; j++) {
1446: for (k=a->i[i]; k<a->i[i+1]; k++) {
1447: for (l=0; l<bs; l++) {
1448: jj[count++] = bs*a->j[k] + l;
1449: }
1450: }
1451: }
1452: }
1453: PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);
1454: PetscFree(jj);
1456: /* store nonzero values */
1457: PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);
1458: count = 0;
1459: for (i=0; i<a->mbs; i++) {
1460: for (j=0; j<bs; j++) {
1461: for (k=a->i[i]; k<a->i[i+1]; k++) {
1462: for (l=0; l<bs; l++) {
1463: aa[count++] = a->a[bs2*k + l*bs + j];
1464: }
1465: }
1466: }
1467: }
1468: PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);
1469: PetscFree(aa);
1471: PetscViewerBinaryGetInfoPointer(viewer,&file);
1472: if (file) {
1473: fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1474: }
1475: return(0);
1476: }
1480: static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1481: {
1482: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1483: PetscErrorCode ierr;
1484: PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1485: PetscViewerFormat format;
1488: PetscViewerGetFormat(viewer,&format);
1489: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1490: PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);
1491: } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1492: Mat aij;
1493: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);
1494: MatView(aij,viewer);
1495: MatDestroy(&aij);
1496: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1497: return(0);
1498: } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1499: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1500: PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");
1501: for (i=0; i<a->mbs; i++) {
1502: for (j=0; j<bs; j++) {
1503: PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1504: for (k=a->i[i]; k<a->i[i+1]; k++) {
1505: for (l=0; l<bs; l++) {
1506: #if defined(PETSC_USE_COMPLEX)
1507: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1508: PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1509: PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1510: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1511: PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1512: PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1513: } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1514: PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
1515: }
1516: #else
1517: if (a->a[bs2*k + l*bs + j] != 0.0) {
1518: PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
1519: }
1520: #endif
1521: }
1522: }
1523: PetscViewerASCIIPrintf(viewer,"\n");
1524: }
1525: }
1526: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1527: } else {
1528: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1529: PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");
1530: for (i=0; i<a->mbs; i++) {
1531: for (j=0; j<bs; j++) {
1532: PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1533: for (k=a->i[i]; k<a->i[i+1]; k++) {
1534: for (l=0; l<bs; l++) {
1535: #if defined(PETSC_USE_COMPLEX)
1536: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1537: PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1538: PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1539: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1540: PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1541: PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1542: } else {
1543: PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
1544: }
1545: #else
1546: PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
1547: #endif
1548: }
1549: }
1550: PetscViewerASCIIPrintf(viewer,"\n");
1551: }
1552: }
1553: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1554: }
1555: PetscViewerFlush(viewer);
1556: return(0);
1557: }
1559: #include <petscdraw.h>
1562: static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1563: {
1564: Mat A = (Mat) Aa;
1565: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
1566: PetscErrorCode ierr;
1567: PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1568: PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1569: MatScalar *aa;
1570: PetscViewer viewer;
1571: PetscViewerFormat format;
1574: PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);
1575: PetscViewerGetFormat(viewer,&format);
1577: PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);
1579: /* loop over matrix elements drawing boxes */
1581: if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1582: color = PETSC_DRAW_BLUE;
1583: for (i=0,row=0; i<mbs; i++,row+=bs) {
1584: for (j=a->i[i]; j<a->i[i+1]; j++) {
1585: y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1586: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1587: aa = a->a + j*bs2;
1588: for (k=0; k<bs; k++) {
1589: for (l=0; l<bs; l++) {
1590: if (PetscRealPart(*aa++) >= 0.) continue;
1591: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1592: }
1593: }
1594: }
1595: }
1596: color = PETSC_DRAW_CYAN;
1597: for (i=0,row=0; i<mbs; i++,row+=bs) {
1598: for (j=a->i[i]; j<a->i[i+1]; j++) {
1599: y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1600: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1601: aa = a->a + j*bs2;
1602: for (k=0; k<bs; k++) {
1603: for (l=0; l<bs; l++) {
1604: if (PetscRealPart(*aa++) != 0.) continue;
1605: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1606: }
1607: }
1608: }
1609: }
1610: color = PETSC_DRAW_RED;
1611: for (i=0,row=0; i<mbs; i++,row+=bs) {
1612: for (j=a->i[i]; j<a->i[i+1]; j++) {
1613: y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1614: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1615: aa = a->a + j*bs2;
1616: for (k=0; k<bs; k++) {
1617: for (l=0; l<bs; l++) {
1618: if (PetscRealPart(*aa++) <= 0.) continue;
1619: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1620: }
1621: }
1622: }
1623: }
1624: } else {
1625: /* use contour shading to indicate magnitude of values */
1626: /* first determine max of all nonzero values */
1627: PetscDraw popup;
1628: PetscReal scale,maxv = 0.0;
1630: for (i=0; i<a->nz*a->bs2; i++) {
1631: if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1632: }
1633: scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1634: PetscDrawGetPopup(draw,&popup);
1635: if (popup) {
1636: PetscDrawScalePopup(popup,0.0,maxv);
1637: }
1638: for (i=0,row=0; i<mbs; i++,row+=bs) {
1639: for (j=a->i[i]; j<a->i[i+1]; j++) {
1640: y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1641: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1642: aa = a->a + j*bs2;
1643: for (k=0; k<bs; k++) {
1644: for (l=0; l<bs; l++) {
1645: color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1646: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1647: }
1648: }
1649: }
1650: }
1651: }
1652: return(0);
1653: }
1657: static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1658: {
1660: PetscReal xl,yl,xr,yr,w,h;
1661: PetscDraw draw;
1662: PetscBool isnull;
1665: PetscViewerDrawGetDraw(viewer,0,&draw);
1666: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
1668: PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
1669: xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1670: xr += w; yr += h; xl = -w; yl = -h;
1671: PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
1672: PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
1673: PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);
1674: return(0);
1675: }
1679: PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1680: {
1682: PetscBool iascii,isbinary,isdraw;
1685: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1686: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1687: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1688: if (iascii) {
1689: MatView_SeqBAIJ_ASCII(A,viewer);
1690: } else if (isbinary) {
1691: MatView_SeqBAIJ_Binary(A,viewer);
1692: } else if (isdraw) {
1693: MatView_SeqBAIJ_Draw(A,viewer);
1694: } else {
1695: Mat B;
1696: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
1697: MatView(B,viewer);
1698: MatDestroy(&B);
1699: }
1700: return(0);
1701: }
1706: PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1707: {
1708: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1709: PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1710: PetscInt *ai = a->i,*ailen = a->ilen;
1711: PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1712: MatScalar *ap,*aa = a->a;
1715: for (k=0; k<m; k++) { /* loop over rows */
1716: row = im[k]; brow = row/bs;
1717: if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1718: if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1719: rp = aj + ai[brow]; ap = aa + bs2*ai[brow];
1720: nrow = ailen[brow];
1721: for (l=0; l<n; l++) { /* loop over columns */
1722: if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1723: if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1724: col = in[l];
1725: bcol = col/bs;
1726: cidx = col%bs;
1727: ridx = row%bs;
1728: high = nrow;
1729: low = 0; /* assume unsorted */
1730: while (high-low > 5) {
1731: t = (low+high)/2;
1732: if (rp[t] > bcol) high = t;
1733: else low = t;
1734: }
1735: for (i=low; i<high; i++) {
1736: if (rp[i] > bcol) break;
1737: if (rp[i] == bcol) {
1738: *v++ = ap[bs2*i+bs*cidx+ridx];
1739: goto finished;
1740: }
1741: }
1742: *v++ = 0.0;
1743: finished:;
1744: }
1745: }
1746: return(0);
1747: }
1751: PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1752: {
1753: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1754: PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1755: PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1756: PetscErrorCode ierr;
1757: PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1758: PetscBool roworiented=a->roworiented;
1759: const PetscScalar *value = v;
1760: MatScalar *ap,*aa = a->a,*bap;
1763: if (roworiented) {
1764: stepval = (n-1)*bs;
1765: } else {
1766: stepval = (m-1)*bs;
1767: }
1768: for (k=0; k<m; k++) { /* loop over added rows */
1769: row = im[k];
1770: if (row < 0) continue;
1771: #if defined(PETSC_USE_DEBUG)
1772: if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1773: #endif
1774: rp = aj + ai[row];
1775: ap = aa + bs2*ai[row];
1776: rmax = imax[row];
1777: nrow = ailen[row];
1778: low = 0;
1779: high = nrow;
1780: for (l=0; l<n; l++) { /* loop over added columns */
1781: if (in[l] < 0) continue;
1782: #if defined(PETSC_USE_DEBUG)
1783: if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1784: #endif
1785: col = in[l];
1786: if (roworiented) {
1787: value = v + (k*(stepval+bs) + l)*bs;
1788: } else {
1789: value = v + (l*(stepval+bs) + k)*bs;
1790: }
1791: if (col <= lastcol) low = 0;
1792: else high = nrow;
1793: lastcol = col;
1794: while (high-low > 7) {
1795: t = (low+high)/2;
1796: if (rp[t] > col) high = t;
1797: else low = t;
1798: }
1799: for (i=low; i<high; i++) {
1800: if (rp[i] > col) break;
1801: if (rp[i] == col) {
1802: bap = ap + bs2*i;
1803: if (roworiented) {
1804: if (is == ADD_VALUES) {
1805: for (ii=0; ii<bs; ii++,value+=stepval) {
1806: for (jj=ii; jj<bs2; jj+=bs) {
1807: bap[jj] += *value++;
1808: }
1809: }
1810: } else {
1811: for (ii=0; ii<bs; ii++,value+=stepval) {
1812: for (jj=ii; jj<bs2; jj+=bs) {
1813: bap[jj] = *value++;
1814: }
1815: }
1816: }
1817: } else {
1818: if (is == ADD_VALUES) {
1819: for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1820: for (jj=0; jj<bs; jj++) {
1821: bap[jj] += value[jj];
1822: }
1823: bap += bs;
1824: }
1825: } else {
1826: for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1827: for (jj=0; jj<bs; jj++) {
1828: bap[jj] = value[jj];
1829: }
1830: bap += bs;
1831: }
1832: }
1833: }
1834: goto noinsert2;
1835: }
1836: }
1837: if (nonew == 1) goto noinsert2;
1838: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1839: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1840: N = nrow++ - 1; high++;
1841: /* shift up all the later entries in this row */
1842: for (ii=N; ii>=i; ii--) {
1843: rp[ii+1] = rp[ii];
1844: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1845: }
1846: if (N >= i) {
1847: PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1848: }
1849: rp[i] = col;
1850: bap = ap + bs2*i;
1851: if (roworiented) {
1852: for (ii=0; ii<bs; ii++,value+=stepval) {
1853: for (jj=ii; jj<bs2; jj+=bs) {
1854: bap[jj] = *value++;
1855: }
1856: }
1857: } else {
1858: for (ii=0; ii<bs; ii++,value+=stepval) {
1859: for (jj=0; jj<bs; jj++) {
1860: *bap++ = *value++;
1861: }
1862: }
1863: }
1864: noinsert2:;
1865: low = i;
1866: }
1867: ailen[row] = nrow;
1868: }
1869: return(0);
1870: }
1874: PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1875: {
1876: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1877: PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1878: PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen;
1880: PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1881: MatScalar *aa = a->a,*ap;
1882: PetscReal ratio=0.6;
1885: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
1887: if (m) rmax = ailen[0];
1888: for (i=1; i<mbs; i++) {
1889: /* move each row back by the amount of empty slots (fshift) before it*/
1890: fshift += imax[i-1] - ailen[i-1];
1891: rmax = PetscMax(rmax,ailen[i]);
1892: if (fshift) {
1893: ip = aj + ai[i]; ap = aa + bs2*ai[i];
1894: N = ailen[i];
1895: for (j=0; j<N; j++) {
1896: ip[j-fshift] = ip[j];
1898: PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));
1899: }
1900: }
1901: ai[i] = ai[i-1] + ailen[i-1];
1902: }
1903: if (mbs) {
1904: fshift += imax[mbs-1] - ailen[mbs-1];
1905: ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1906: }
1907: /* reset ilen and imax for each row */
1908: for (i=0; i<mbs; i++) {
1909: ailen[i] = imax[i] = ai[i+1] - ai[i];
1910: }
1911: a->nz = ai[mbs];
1913: /* diagonals may have moved, so kill the diagonal pointers */
1914: a->idiagvalid = PETSC_FALSE;
1915: if (fshift && a->diag) {
1916: PetscFree(a->diag);
1917: PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));
1918: a->diag = 0;
1919: }
1920: if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1921: PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);
1922: PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);
1923: PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);
1925: A->info.mallocs += a->reallocs;
1926: a->reallocs = 0;
1927: A->info.nz_unneeded = (PetscReal)fshift*bs2;
1929: MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);
1931: A->same_nonzero = PETSC_TRUE;
1932: return(0);
1933: }
1935: /*
1936: This function returns an array of flags which indicate the locations of contiguous
1937: blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9]
1938: then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1939: Assume: sizes should be long enough to hold all the values.
1940: */
1943: static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1944: {
1945: PetscInt i,j,k,row;
1946: PetscBool flg;
1949: for (i=0,j=0; i<n; j++) {
1950: row = idx[i];
1951: if (row%bs!=0) { /* Not the begining of a block */
1952: sizes[j] = 1;
1953: i++;
1954: } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1955: sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */
1956: i++;
1957: } else { /* Begining of the block, so check if the complete block exists */
1958: flg = PETSC_TRUE;
1959: for (k=1; k<bs; k++) {
1960: if (row+k != idx[i+k]) { /* break in the block */
1961: flg = PETSC_FALSE;
1962: break;
1963: }
1964: }
1965: if (flg) { /* No break in the bs */
1966: sizes[j] = bs;
1967: i += bs;
1968: } else {
1969: sizes[j] = 1;
1970: i++;
1971: }
1972: }
1973: }
1974: *bs_max = j;
1975: return(0);
1976: }
1980: PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
1981: {
1982: Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1983: PetscErrorCode ierr;
1984: PetscInt i,j,k,count,*rows;
1985: PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
1986: PetscScalar zero = 0.0;
1987: MatScalar *aa;
1988: const PetscScalar *xx;
1989: PetscScalar *bb;
1992: /* fix right hand side if needed */
1993: if (x && b) {
1994: VecGetArrayRead(x,&xx);
1995: VecGetArray(b,&bb);
1996: for (i=0; i<is_n; i++) {
1997: bb[is_idx[i]] = diag*xx[is_idx[i]];
1998: }
1999: VecRestoreArrayRead(x,&xx);
2000: VecRestoreArray(b,&bb);
2001: }
2003: /* Make a copy of the IS and sort it */
2004: /* allocate memory for rows,sizes */
2005: PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);
2007: /* copy IS values to rows, and sort them */
2008: for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2009: PetscSortInt(is_n,rows);
2011: if (baij->keepnonzeropattern) {
2012: for (i=0; i<is_n; i++) sizes[i] = 1;
2013: bs_max = is_n;
2014: A->same_nonzero = PETSC_TRUE;
2015: } else {
2016: MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);
2018: A->same_nonzero = PETSC_FALSE;
2019: }
2021: for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2022: row = rows[j];
2023: if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2024: count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2025: aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2026: if (sizes[i] == bs && !baij->keepnonzeropattern) {
2027: if (diag != (PetscScalar)0.0) {
2028: if (baij->ilen[row/bs] > 0) {
2029: baij->ilen[row/bs] = 1;
2030: baij->j[baij->i[row/bs]] = row/bs;
2032: PetscMemzero(aa,count*bs*sizeof(MatScalar));
2033: }
2034: /* Now insert all the diagonal values for this bs */
2035: for (k=0; k<bs; k++) {
2036: (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);
2037: }
2038: } else { /* (diag == 0.0) */
2039: baij->ilen[row/bs] = 0;
2040: } /* end (diag == 0.0) */
2041: } else { /* (sizes[i] != bs) */
2042: #if defined(PETSC_USE_DEBUG)
2043: if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2044: #endif
2045: for (k=0; k<count; k++) {
2046: aa[0] = zero;
2047: aa += bs;
2048: }
2049: if (diag != (PetscScalar)0.0) {
2050: (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);
2051: }
2052: }
2053: }
2055: PetscFree2(rows,sizes);
2056: MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
2057: return(0);
2058: }
2062: PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2063: {
2064: Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
2065: PetscErrorCode ierr;
2066: PetscInt i,j,k,count;
2067: PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col;
2068: PetscScalar zero = 0.0;
2069: MatScalar *aa;
2070: const PetscScalar *xx;
2071: PetscScalar *bb;
2072: PetscBool *zeroed,vecs = PETSC_FALSE;
2075: /* fix right hand side if needed */
2076: if (x && b) {
2077: VecGetArrayRead(x,&xx);
2078: VecGetArray(b,&bb);
2079: vecs = PETSC_TRUE;
2080: }
2081: A->same_nonzero = PETSC_TRUE;
2083: /* zero the columns */
2084: PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);
2085: PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));
2086: for (i=0; i<is_n; i++) {
2087: if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2088: zeroed[is_idx[i]] = PETSC_TRUE;
2089: }
2090: for (i=0; i<A->rmap->N; i++) {
2091: if (!zeroed[i]) {
2092: row = i/bs;
2093: for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2094: for (k=0; k<bs; k++) {
2095: col = bs*baij->j[j] + k;
2096: if (zeroed[col]) {
2097: aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2098: if (vecs) bb[i] -= aa[0]*xx[col];
2099: aa[0] = 0.0;
2100: }
2101: }
2102: }
2103: } else if (vecs) bb[i] = diag*xx[i];
2104: }
2105: PetscFree(zeroed);
2106: if (vecs) {
2107: VecRestoreArrayRead(x,&xx);
2108: VecRestoreArray(b,&bb);
2109: }
2111: /* zero the rows */
2112: for (i=0; i<is_n; i++) {
2113: row = is_idx[i];
2114: count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2115: aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2116: for (k=0; k<count; k++) {
2117: aa[0] = zero;
2118: aa += bs;
2119: }
2120: if (diag != (PetscScalar)0.0) {
2121: (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);
2122: }
2123: }
2124: MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
2125: return(0);
2126: }
2130: PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2131: {
2132: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2133: PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2134: PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2135: PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2137: PetscInt ridx,cidx,bs2=a->bs2;
2138: PetscBool roworiented=a->roworiented;
2139: MatScalar *ap,value,*aa=a->a,*bap;
2143: for (k=0; k<m; k++) { /* loop over added rows */
2144: row = im[k];
2145: brow = row/bs;
2146: if (row < 0) continue;
2147: #if defined(PETSC_USE_DEBUG)
2148: if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2149: #endif
2150: rp = aj + ai[brow];
2151: ap = aa + bs2*ai[brow];
2152: rmax = imax[brow];
2153: nrow = ailen[brow];
2154: low = 0;
2155: high = nrow;
2156: for (l=0; l<n; l++) { /* loop over added columns */
2157: if (in[l] < 0) continue;
2158: #if defined(PETSC_USE_DEBUG)
2159: if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2160: #endif
2161: col = in[l]; bcol = col/bs;
2162: ridx = row % bs; cidx = col % bs;
2163: if (roworiented) {
2164: value = v[l + k*n];
2165: } else {
2166: value = v[k + l*m];
2167: }
2168: if (col <= lastcol) low = 0; else high = nrow;
2169: lastcol = col;
2170: while (high-low > 7) {
2171: t = (low+high)/2;
2172: if (rp[t] > bcol) high = t;
2173: else low = t;
2174: }
2175: for (i=low; i<high; i++) {
2176: if (rp[i] > bcol) break;
2177: if (rp[i] == bcol) {
2178: bap = ap + bs2*i + bs*cidx + ridx;
2179: if (is == ADD_VALUES) *bap += value;
2180: else *bap = value;
2181: goto noinsert1;
2182: }
2183: }
2184: if (nonew == 1) goto noinsert1;
2185: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2186: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2187: N = nrow++ - 1; high++;
2188: /* shift up all the later entries in this row */
2189: for (ii=N; ii>=i; ii--) {
2190: rp[ii+1] = rp[ii];
2191: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
2192: }
2193: if (N>=i) {
2194: PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
2195: }
2196: rp[i] = bcol;
2197: ap[bs2*i + bs*cidx + ridx] = value;
2198: a->nz++;
2199: noinsert1:;
2200: low = i;
2201: }
2202: ailen[brow] = nrow;
2203: }
2204: A->same_nonzero = PETSC_FALSE;
2205: return(0);
2206: }
2210: PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2211: {
2212: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
2213: Mat outA;
2215: PetscBool row_identity,col_identity;
2218: if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2219: ISIdentity(row,&row_identity);
2220: ISIdentity(col,&col_identity);
2221: if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2223: outA = inA;
2224: inA->factortype = MAT_FACTOR_LU;
2226: MatMarkDiagonal_SeqBAIJ(inA);
2228: PetscObjectReference((PetscObject)row);
2229: ISDestroy(&a->row);
2230: a->row = row;
2231: PetscObjectReference((PetscObject)col);
2232: ISDestroy(&a->col);
2233: a->col = col;
2235: /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2236: ISDestroy(&a->icol);
2237: ISInvertPermutation(col,PETSC_DECIDE,&a->icol);
2238: PetscLogObjectParent(inA,a->icol);
2240: MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));
2241: if (!a->solve_work) {
2242: PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);
2243: PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));
2244: }
2245: MatLUFactorNumeric(outA,inA,info);
2246: return(0);
2247: }
2251: PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2252: {
2253: Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2254: PetscInt i,nz,mbs;
2257: nz = baij->maxnz;
2258: mbs = baij->mbs;
2259: for (i=0; i<nz; i++) {
2260: baij->j[i] = indices[i];
2261: }
2262: baij->nz = nz;
2263: for (i=0; i<mbs; i++) {
2264: baij->ilen[i] = baij->imax[i];
2265: }
2266: return(0);
2267: }
2271: /*@
2272: MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2273: in the matrix.
2275: Input Parameters:
2276: + mat - the SeqBAIJ matrix
2277: - indices - the column indices
2279: Level: advanced
2281: Notes:
2282: This can be called if you have precomputed the nonzero structure of the
2283: matrix and want to provide it to the matrix object to improve the performance
2284: of the MatSetValues() operation.
2286: You MUST have set the correct numbers of nonzeros per row in the call to
2287: MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2289: MUST be called before any calls to MatSetValues();
2291: @*/
2292: PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2293: {
2299: PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));
2300: return(0);
2301: }
2305: PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2306: {
2307: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2309: PetscInt i,j,n,row,bs,*ai,*aj,mbs;
2310: PetscReal atmp;
2311: PetscScalar *x,zero = 0.0;
2312: MatScalar *aa;
2313: PetscInt ncols,brow,krow,kcol;
2316: if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2317: bs = A->rmap->bs;
2318: aa = a->a;
2319: ai = a->i;
2320: aj = a->j;
2321: mbs = a->mbs;
2323: VecSet(v,zero);
2324: VecGetArray(v,&x);
2325: VecGetLocalSize(v,&n);
2326: if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2327: for (i=0; i<mbs; i++) {
2328: ncols = ai[1] - ai[0]; ai++;
2329: brow = bs*i;
2330: for (j=0; j<ncols; j++) {
2331: for (kcol=0; kcol<bs; kcol++) {
2332: for (krow=0; krow<bs; krow++) {
2333: atmp = PetscAbsScalar(*aa);aa++;
2334: row = brow + krow; /* row index */
2335: /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2336: if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2337: }
2338: }
2339: aj++;
2340: }
2341: }
2342: VecRestoreArray(v,&x);
2343: return(0);
2344: }
2348: PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2349: {
2353: /* If the two matrices have the same copy implementation, use fast copy. */
2354: if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2355: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2356: Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2357: PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2359: if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2360: if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2361: PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));
2362: } else {
2363: MatCopy_Basic(A,B,str);
2364: }
2365: return(0);
2366: }
2370: PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2371: {
2375: MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);
2376: return(0);
2377: }
2381: PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2382: {
2383: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2386: *array = a->a;
2387: return(0);
2388: }
2392: PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2393: {
2395: return(0);
2396: }
2400: PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2401: {
2402: Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2404: PetscInt i,bs=Y->rmap->bs,j,bs2=bs*bs;
2405: PetscBLASInt one=1;
2408: if (str == SAME_NONZERO_PATTERN) {
2409: PetscScalar alpha = a;
2410: PetscBLASInt bnz;
2411: PetscBLASIntCast(x->nz*bs2,&bnz);
2412: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2413: } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2414: if (y->xtoy && y->XtoY != X) {
2415: PetscFree(y->xtoy);
2416: MatDestroy(&y->XtoY);
2417: }
2418: if (!y->xtoy) { /* get xtoy */
2419: MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,NULL, y->i,y->j,NULL, &y->xtoy);
2420: y->XtoY = X;
2421: PetscObjectReference((PetscObject)X);
2422: }
2423: for (i=0; i<x->nz; i++) {
2424: j = 0;
2425: while (j < bs2) {
2426: y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2427: j++;
2428: }
2429: }
2430: PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
2431: } else {
2432: MatAXPY_Basic(Y,a,X,str);
2433: }
2434: return(0);
2435: }
2439: PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2440: {
2441: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2442: PetscInt i,nz = a->bs2*a->i[a->mbs];
2443: MatScalar *aa = a->a;
2446: for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2447: return(0);
2448: }
2452: PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2453: {
2454: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2455: PetscInt i,nz = a->bs2*a->i[a->mbs];
2456: MatScalar *aa = a->a;
2459: for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2460: return(0);
2461: }
2463: extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2467: /*
2468: Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2469: */
2470: PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done)
2471: {
2472: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2474: PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2475: PetscInt nz = a->i[m],row,*jj,mr,col;
2478: *nn = n;
2479: if (!ia) return(0);
2480: if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2481: else {
2482: PetscMalloc((n+1)*sizeof(PetscInt),&collengths);
2483: PetscMemzero(collengths,n*sizeof(PetscInt));
2484: PetscMalloc((n+1)*sizeof(PetscInt),&cia);
2485: PetscMalloc((nz+1)*sizeof(PetscInt),&cja);
2486: jj = a->j;
2487: for (i=0; i<nz; i++) {
2488: collengths[jj[i]]++;
2489: }
2490: cia[0] = oshift;
2491: for (i=0; i<n; i++) {
2492: cia[i+1] = cia[i] + collengths[i];
2493: }
2494: PetscMemzero(collengths,n*sizeof(PetscInt));
2495: jj = a->j;
2496: for (row=0; row<m; row++) {
2497: mr = a->i[row+1] - a->i[row];
2498: for (i=0; i<mr; i++) {
2499: col = *jj++;
2501: cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2502: }
2503: }
2504: PetscFree(collengths);
2505: *ia = cia; *ja = cja;
2506: }
2507: return(0);
2508: }
2512: PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done)
2513: {
2517: if (!ia) return(0);
2518: PetscFree(*ia);
2519: PetscFree(*ja);
2520: return(0);
2521: }
2525: PetscErrorCode MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2526: {
2527: PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
2529: PetscInt bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow;
2530: PetscScalar dx,*y,*xx,*w3_array;
2531: PetscScalar *vscale_array;
2532: PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2533: Vec w1 = coloring->w1,w2=coloring->w2,w3;
2534: void *fctx = coloring->fctx;
2535: PetscBool flg = PETSC_FALSE;
2536: PetscInt ctype = coloring->ctype,N,col_start=0,col_end=0;
2537: Vec x1_tmp;
2543: if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2545: PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);
2546: MatSetUnfactored(J);
2547: PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);
2548: if (flg) {
2549: PetscInfo(coloring,"Not calling MatZeroEntries()\n");
2550: } else {
2551: PetscBool assembled;
2552: MatAssembled(J,&assembled);
2553: if (assembled) {
2554: MatZeroEntries(J);
2555: }
2556: }
2558: x1_tmp = x1;
2559: if (!coloring->vscale) {
2560: VecDuplicate(x1_tmp,&coloring->vscale);
2561: }
2563: if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2564: VecNorm(x1_tmp,NORM_2,&unorm);
2565: }
2566: VecGetOwnershipRange(w1,&start,&end); /* OwnershipRange is used by ghosted x! */
2568: /* Set w1 = F(x1) */
2569: if (!coloring->fset) {
2570: PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);
2571: (*f)(sctx,x1_tmp,w1,fctx);
2572: PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);
2573: } else {
2574: coloring->fset = PETSC_FALSE;
2575: }
2577: if (!coloring->w3) {
2578: VecDuplicate(x1_tmp,&coloring->w3);
2579: PetscLogObjectParent(coloring,coloring->w3);
2580: }
2581: w3 = coloring->w3;
2583: /* Compute all the local scale factors, including ghost points */
2584: VecGetLocalSize(x1_tmp,&N);
2585: VecGetArray(x1_tmp,&xx);
2586: VecGetArray(coloring->vscale,&vscale_array);
2587: if (ctype == IS_COLORING_GHOSTED) {
2588: col_start = 0; col_end = N;
2589: } else if (ctype == IS_COLORING_GLOBAL) {
2590: xx = xx - start;
2591: vscale_array = vscale_array - start;
2592: col_start = start; col_end = N + start;
2593: }
2594: for (col=col_start; col<col_end; col++) {
2595: /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2596: if (coloring->htype[0] == 'w') {
2597: dx = 1.0 + unorm;
2598: } else {
2599: dx = xx[col];
2600: }
2601: if (dx == (PetscScalar)0.0) dx = 1.0;
2602: if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin;
2603: else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2604: dx *= epsilon;
2605: vscale_array[col] = (PetscScalar)1.0/dx;
2606: }
2607: if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start;
2608: VecRestoreArray(coloring->vscale,&vscale_array);
2609: if (ctype == IS_COLORING_GLOBAL) {
2610: VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);
2611: VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);
2612: }
2613: if (coloring->vscaleforrow) {
2614: vscaleforrow = coloring->vscaleforrow;
2615: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2617: PetscMalloc(bs*sizeof(PetscInt),&srows);
2618: /*
2619: Loop over each color
2620: */
2621: VecGetArray(coloring->vscale,&vscale_array);
2622: for (k=0; k<coloring->ncolors; k++) {
2623: coloring->currentcolor = k;
2624: for (i=0; i<bs; i++) {
2625: VecCopy(x1_tmp,w3);
2626: VecGetArray(w3,&w3_array);
2627: if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2628: /*
2629: Loop over each column associated with color
2630: adding the perturbation to the vector w3.
2631: */
2632: for (l=0; l<coloring->ncolumns[k]; l++) {
2633: col = i + bs*coloring->columns[k][l]; /* local column of the matrix we are probing for */
2634: if (coloring->htype[0] == 'w') {
2635: dx = 1.0 + unorm;
2636: } else {
2637: dx = xx[col];
2638: }
2639: if (dx == (PetscScalar)0.0) dx = 1.0;
2640: if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin;
2641: else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2642: dx *= epsilon;
2643: if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2644: w3_array[col] += dx;
2645: }
2646: if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2647: VecRestoreArray(w3,&w3_array);
2649: /*
2650: Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2651: w2 = F(x1 + dx) - F(x1)
2652: */
2653: PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);
2654: (*f)(sctx,w3,w2,fctx);
2655: PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);
2656: VecAXPY(w2,-1.0,w1);
2658: /*
2659: Loop over rows of vector, putting results into Jacobian matrix
2660: */
2661: VecGetArray(w2,&y);
2662: for (l=0; l<coloring->nrows[k]; l++) {
2663: row = bs*coloring->rows[k][l]; /* local row index */
2664: col = i + bs*coloring->columnsforrow[k][l]; /* global column index */
2665: for (j=0; j<bs; j++) {
2666: y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2667: srows[j] = row + start + j;
2668: }
2669: MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);
2670: }
2671: VecRestoreArray(w2,&y);
2672: }
2673: } /* endof for each color */
2674: if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2675: VecRestoreArray(coloring->vscale,&vscale_array);
2676: VecRestoreArray(x1_tmp,&xx);
2677: PetscFree(srows);
2679: coloring->currentcolor = -1;
2681: MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
2682: MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
2683: PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);
2684: return(0);
2685: }
2687: /* -------------------------------------------------------------------*/
2688: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2689: MatGetRow_SeqBAIJ,
2690: MatRestoreRow_SeqBAIJ,
2691: MatMult_SeqBAIJ_N,
2692: /* 4*/ MatMultAdd_SeqBAIJ_N,
2693: MatMultTranspose_SeqBAIJ,
2694: MatMultTransposeAdd_SeqBAIJ,
2695: 0,
2696: 0,
2697: 0,
2698: /* 10*/ 0,
2699: MatLUFactor_SeqBAIJ,
2700: 0,
2701: 0,
2702: MatTranspose_SeqBAIJ,
2703: /* 15*/ MatGetInfo_SeqBAIJ,
2704: MatEqual_SeqBAIJ,
2705: MatGetDiagonal_SeqBAIJ,
2706: MatDiagonalScale_SeqBAIJ,
2707: MatNorm_SeqBAIJ,
2708: /* 20*/ 0,
2709: MatAssemblyEnd_SeqBAIJ,
2710: MatSetOption_SeqBAIJ,
2711: MatZeroEntries_SeqBAIJ,
2712: /* 24*/ MatZeroRows_SeqBAIJ,
2713: 0,
2714: 0,
2715: 0,
2716: 0,
2717: /* 29*/ MatSetUp_SeqBAIJ,
2718: 0,
2719: 0,
2720: 0,
2721: 0,
2722: /* 34*/ MatDuplicate_SeqBAIJ,
2723: 0,
2724: 0,
2725: MatILUFactor_SeqBAIJ,
2726: 0,
2727: /* 39*/ MatAXPY_SeqBAIJ,
2728: MatGetSubMatrices_SeqBAIJ,
2729: MatIncreaseOverlap_SeqBAIJ,
2730: MatGetValues_SeqBAIJ,
2731: MatCopy_SeqBAIJ,
2732: /* 44*/ 0,
2733: MatScale_SeqBAIJ,
2734: 0,
2735: 0,
2736: MatZeroRowsColumns_SeqBAIJ,
2737: /* 49*/ 0,
2738: MatGetRowIJ_SeqBAIJ,
2739: MatRestoreRowIJ_SeqBAIJ,
2740: MatGetColumnIJ_SeqBAIJ,
2741: MatRestoreColumnIJ_SeqBAIJ,
2742: /* 54*/ MatFDColoringCreate_SeqAIJ,
2743: 0,
2744: 0,
2745: 0,
2746: MatSetValuesBlocked_SeqBAIJ,
2747: /* 59*/ MatGetSubMatrix_SeqBAIJ,
2748: MatDestroy_SeqBAIJ,
2749: MatView_SeqBAIJ,
2750: 0,
2751: 0,
2752: /* 64*/ 0,
2753: 0,
2754: 0,
2755: 0,
2756: 0,
2757: /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2758: 0,
2759: MatConvert_Basic,
2760: 0,
2761: 0,
2762: /* 74*/ 0,
2763: MatFDColoringApply_BAIJ,
2764: 0,
2765: 0,
2766: 0,
2767: /* 79*/ 0,
2768: 0,
2769: 0,
2770: 0,
2771: MatLoad_SeqBAIJ,
2772: /* 84*/ 0,
2773: 0,
2774: 0,
2775: 0,
2776: 0,
2777: /* 89*/ 0,
2778: 0,
2779: 0,
2780: 0,
2781: 0,
2782: /* 94*/ 0,
2783: 0,
2784: 0,
2785: 0,
2786: 0,
2787: /* 99*/ 0,
2788: 0,
2789: 0,
2790: 0,
2791: 0,
2792: /*104*/ 0,
2793: MatRealPart_SeqBAIJ,
2794: MatImaginaryPart_SeqBAIJ,
2795: 0,
2796: 0,
2797: /*109*/ 0,
2798: 0,
2799: 0,
2800: 0,
2801: MatMissingDiagonal_SeqBAIJ,
2802: /*114*/ 0,
2803: 0,
2804: 0,
2805: 0,
2806: 0,
2807: /*119*/ 0,
2808: 0,
2809: MatMultHermitianTranspose_SeqBAIJ,
2810: MatMultHermitianTransposeAdd_SeqBAIJ,
2811: 0,
2812: /*124*/ 0,
2813: 0,
2814: MatInvertBlockDiagonal_SeqBAIJ,
2815: 0,
2816: 0,
2817: /*129*/ 0,
2818: 0,
2819: 0,
2820: 0,
2821: 0,
2822: /*134*/ 0,
2823: 0,
2824: 0,
2825: 0,
2826: 0,
2827: /*139*/ 0,
2828: 0
2829: };
2833: PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
2834: {
2835: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data;
2836: PetscInt nz = aij->i[aij->mbs]*aij->bs2;
2840: if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2842: /* allocate space for values if not already there */
2843: if (!aij->saved_values) {
2844: PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);
2845: PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));
2846: }
2848: /* copy values over */
2849: PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));
2850: return(0);
2851: }
2855: PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
2856: {
2857: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data;
2859: PetscInt nz = aij->i[aij->mbs]*aij->bs2;
2862: if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2863: if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2865: /* copy values over */
2866: PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));
2867: return(0);
2868: }
2870: PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2871: PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2875: PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2876: {
2877: Mat_SeqBAIJ *b;
2879: PetscInt i,mbs,nbs,bs2;
2880: PetscBool flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2883: if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2884: if (nz == MAT_SKIP_ALLOCATION) {
2885: skipallocation = PETSC_TRUE;
2886: nz = 0;
2887: }
2889: PetscLayoutSetBlockSize(B->rmap,bs);
2890: PetscLayoutSetBlockSize(B->cmap,bs);
2891: PetscLayoutSetUp(B->rmap);
2892: PetscLayoutSetUp(B->cmap);
2893: PetscLayoutGetBlockSize(B->rmap,&bs);
2895: B->preallocated = PETSC_TRUE;
2897: mbs = B->rmap->n/bs;
2898: nbs = B->cmap->n/bs;
2899: bs2 = bs*bs;
2901: if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2903: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2904: if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2905: if (nnz) {
2906: for (i=0; i<mbs; i++) {
2907: if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2908: if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2909: }
2910: }
2912: b = (Mat_SeqBAIJ*)B->data;
2913: PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");
2914: PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,PETSC_FALSE,&flg,NULL);
2915: PetscOptionsEnd();
2917: if (!flg) {
2918: switch (bs) {
2919: case 1:
2920: B->ops->mult = MatMult_SeqBAIJ_1;
2921: B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2922: break;
2923: case 2:
2924: B->ops->mult = MatMult_SeqBAIJ_2;
2925: B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2926: break;
2927: case 3:
2928: B->ops->mult = MatMult_SeqBAIJ_3;
2929: B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2930: break;
2931: case 4:
2932: B->ops->mult = MatMult_SeqBAIJ_4;
2933: B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2934: break;
2935: case 5:
2936: B->ops->mult = MatMult_SeqBAIJ_5;
2937: B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2938: break;
2939: case 6:
2940: B->ops->mult = MatMult_SeqBAIJ_6;
2941: B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2942: break;
2943: case 7:
2944: B->ops->mult = MatMult_SeqBAIJ_7;
2945: B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2946: break;
2947: case 15:
2948: B->ops->mult = MatMult_SeqBAIJ_15_ver1;
2949: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2950: break;
2951: default:
2952: B->ops->mult = MatMult_SeqBAIJ_N;
2953: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2954: break;
2955: }
2956: }
2957: B->ops->sor = MatSOR_SeqBAIJ;
2958: b->mbs = mbs;
2959: b->nbs = nbs;
2960: if (!skipallocation) {
2961: if (!b->imax) {
2962: PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);
2963: PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));
2965: b->free_imax_ilen = PETSC_TRUE;
2966: }
2967: /* b->ilen will count nonzeros in each block row so far. */
2968: for (i=0; i<mbs; i++) b->ilen[i] = 0;
2969: if (!nnz) {
2970: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2971: else if (nz < 0) nz = 1;
2972: for (i=0; i<mbs; i++) b->imax[i] = nz;
2973: nz = nz*mbs;
2974: } else {
2975: nz = 0;
2976: for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2977: }
2979: /* allocate the matrix space */
2980: MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);
2981: PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);
2982: PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));
2983: PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));
2984: PetscMemzero(b->j,nz*sizeof(PetscInt));
2986: b->singlemalloc = PETSC_TRUE;
2987: b->i[0] = 0;
2988: for (i=1; i<mbs+1; i++) {
2989: b->i[i] = b->i[i-1] + b->imax[i-1];
2990: }
2991: b->free_a = PETSC_TRUE;
2992: b->free_ij = PETSC_TRUE;
2993: } else {
2994: b->free_a = PETSC_FALSE;
2995: b->free_ij = PETSC_FALSE;
2996: }
2998: b->bs2 = bs2;
2999: b->mbs = mbs;
3000: b->nz = 0;
3001: b->maxnz = nz;
3002: B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3003: if (realalloc) {MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);}
3004: return(0);
3005: }
3009: PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3010: {
3011: PetscInt i,m,nz,nz_max=0,*nnz;
3012: PetscScalar *values=0;
3016: if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3017: PetscLayoutSetBlockSize(B->rmap,bs);
3018: PetscLayoutSetBlockSize(B->cmap,bs);
3019: PetscLayoutSetUp(B->rmap);
3020: PetscLayoutSetUp(B->cmap);
3021: PetscLayoutGetBlockSize(B->rmap,&bs);
3022: m = B->rmap->n/bs;
3024: if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
3025: PetscMalloc((m+1) * sizeof(PetscInt), &nnz);
3026: for (i=0; i<m; i++) {
3027: nz = ii[i+1]- ii[i];
3028: if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
3029: nz_max = PetscMax(nz_max, nz);
3030: nnz[i] = nz;
3031: }
3032: MatSeqBAIJSetPreallocation(B,bs,0,nnz);
3033: PetscFree(nnz);
3035: values = (PetscScalar*)V;
3036: if (!values) {
3037: PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);
3038: PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));
3039: }
3040: for (i=0; i<m; i++) {
3041: PetscInt ncols = ii[i+1] - ii[i];
3042: const PetscInt *icols = jj + ii[i];
3043: const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3044: MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);
3045: }
3046: if (!V) { PetscFree(values); }
3047: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3048: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3049: MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3050: return(0);
3051: }
3053: PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3054: PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*);
3055: #if defined(PETSC_HAVE_MUMPS)
3056: PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3057: #endif
3058: extern PetscErrorCode MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,PetscBool*);
3060: /*MC
3061: MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3062: block sparse compressed row format.
3064: Options Database Keys:
3065: . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3067: Level: beginner
3069: .seealso: MatCreateSeqBAIJ()
3070: M*/
3072: PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3076: PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3077: {
3079: PetscMPIInt size;
3080: Mat_SeqBAIJ *b;
3083: MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);
3084: if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3086: PetscNewLog(B,Mat_SeqBAIJ,&b);
3087: B->data = (void*)b;
3088: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
3090: b->row = 0;
3091: b->col = 0;
3092: b->icol = 0;
3093: b->reallocs = 0;
3094: b->saved_values = 0;
3096: b->roworiented = PETSC_TRUE;
3097: b->nonew = 0;
3098: b->diag = 0;
3099: b->solve_work = 0;
3100: b->mult_work = 0;
3101: B->spptr = 0;
3102: B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2;
3103: b->keepnonzeropattern = PETSC_FALSE;
3104: b->xtoy = 0;
3105: b->XtoY = 0;
3106: B->same_nonzero = PETSC_FALSE;
3108: PetscObjectComposeFunction((PetscObject)B,"MatGetFactorAvailable_petsc_C",MatGetFactorAvailable_seqbaij_petsc);
3109: PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_petsc_C",MatGetFactor_seqbaij_petsc);
3110: PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_bstrm_C",MatGetFactor_seqbaij_bstrm);
3111: #if defined(PETSC_HAVE_MUMPS)
3112: PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C", MatGetFactor_baij_mumps);
3113: #endif
3114: PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);
3115: PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);
3116: PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);
3117: PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);
3118: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);
3119: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);
3120: PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);
3121: PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);
3122: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",MatConvert_SeqBAIJ_SeqBSTRM);
3123: PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);
3124: PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);
3125: return(0);
3126: }
3130: PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3131: {
3132: Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3134: PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3137: if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3139: if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3140: c->imax = a->imax;
3141: c->ilen = a->ilen;
3142: c->free_imax_ilen = PETSC_FALSE;
3143: } else {
3144: PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);
3145: PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));
3146: for (i=0; i<mbs; i++) {
3147: c->imax[i] = a->imax[i];
3148: c->ilen[i] = a->ilen[i];
3149: }
3150: c->free_imax_ilen = PETSC_TRUE;
3151: }
3153: /* allocate the matrix space */
3154: if (mallocmatspace) {
3155: if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3156: PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);
3157: PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));
3158: PetscMemzero(c->a,bs2*nz*sizeof(PetscScalar));
3160: c->i = a->i;
3161: c->j = a->j;
3162: c->singlemalloc = PETSC_FALSE;
3163: c->free_a = PETSC_TRUE;
3164: c->free_ij = PETSC_FALSE;
3165: c->parent = A;
3166: C->preallocated = PETSC_TRUE;
3167: C->assembled = PETSC_TRUE;
3169: PetscObjectReference((PetscObject)A);
3170: MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3171: MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3172: } else {
3173: PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);
3174: PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));
3176: c->singlemalloc = PETSC_TRUE;
3177: c->free_a = PETSC_TRUE;
3178: c->free_ij = PETSC_TRUE;
3180: PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));
3181: if (mbs > 0) {
3182: PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));
3183: if (cpvalues == MAT_COPY_VALUES) {
3184: PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));
3185: } else {
3186: PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));
3187: }
3188: }
3189: C->preallocated = PETSC_TRUE;
3190: C->assembled = PETSC_TRUE;
3191: }
3192: }
3194: c->roworiented = a->roworiented;
3195: c->nonew = a->nonew;
3197: PetscLayoutReference(A->rmap,&C->rmap);
3198: PetscLayoutReference(A->cmap,&C->cmap);
3200: c->bs2 = a->bs2;
3201: c->mbs = a->mbs;
3202: c->nbs = a->nbs;
3204: if (a->diag) {
3205: if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3206: c->diag = a->diag;
3207: c->free_diag = PETSC_FALSE;
3208: } else {
3209: PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);
3210: PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));
3211: for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3212: c->free_diag = PETSC_TRUE;
3213: }
3214: } else c->diag = 0;
3216: c->nz = a->nz;
3217: c->maxnz = a->nz; /* Since we allocate exactly the right amount */
3218: c->solve_work = 0;
3219: c->mult_work = 0;
3221: c->compressedrow.use = a->compressedrow.use;
3222: c->compressedrow.nrows = a->compressedrow.nrows;
3223: c->compressedrow.check = a->compressedrow.check;
3224: if (a->compressedrow.use) {
3225: i = a->compressedrow.nrows;
3226: PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);
3227: PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));
3228: PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));
3229: PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));
3230: } else {
3231: c->compressedrow.use = PETSC_FALSE;
3232: c->compressedrow.i = NULL;
3233: c->compressedrow.rindex = NULL;
3234: }
3235: C->same_nonzero = A->same_nonzero;
3237: PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);
3238: PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));
3239: return(0);
3240: }
3244: PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3245: {
3249: MatCreate(PetscObjectComm((PetscObject)A),B);
3250: MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);
3251: MatSetType(*B,MATSEQBAIJ);
3252: MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);
3253: return(0);
3254: }
3258: PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3259: {
3260: Mat_SeqBAIJ *a;
3262: PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1;
3263: PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3264: PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3265: PetscInt *masked,nmask,tmp,bs2,ishift;
3266: PetscMPIInt size;
3267: int fd;
3268: PetscScalar *aa;
3269: MPI_Comm comm;
3272: PetscObjectGetComm((PetscObject)viewer,&comm);
3273: PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");
3274: PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);
3275: PetscOptionsEnd();
3276: bs2 = bs*bs;
3278: MPI_Comm_size(comm,&size);
3279: if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3280: PetscViewerBinaryGetDescriptor(viewer,&fd);
3281: PetscBinaryRead(fd,header,4,PETSC_INT);
3282: if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3283: M = header[1]; N = header[2]; nz = header[3];
3285: if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3286: if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3288: /*
3289: This code adds extra rows to make sure the number of rows is
3290: divisible by the blocksize
3291: */
3292: mbs = M/bs;
3293: extra_rows = bs - M + bs*(mbs);
3294: if (extra_rows == bs) extra_rows = 0;
3295: else mbs++;
3296: if (extra_rows) {
3297: PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");
3298: }
3300: /* Set global sizes if not already set */
3301: if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3302: MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
3303: } else { /* Check if the matrix global sizes are correct */
3304: MatGetSize(newmat,&rows,&cols);
3305: if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3306: MatGetLocalSize(newmat,&rows,&cols);
3307: }
3308: if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3309: }
3311: /* read in row lengths */
3312: PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);
3313: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
3314: for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3316: /* read in column indices */
3317: PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);
3318: PetscBinaryRead(fd,jj,nz,PETSC_INT);
3319: for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3321: /* loop over row lengths determining block row lengths */
3322: PetscMalloc(mbs*sizeof(PetscInt),&browlengths);
3323: PetscMemzero(browlengths,mbs*sizeof(PetscInt));
3324: PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);
3325: PetscMemzero(mask,mbs*sizeof(PetscInt));
3326: rowcount = 0;
3327: nzcount = 0;
3328: for (i=0; i<mbs; i++) {
3329: nmask = 0;
3330: for (j=0; j<bs; j++) {
3331: kmax = rowlengths[rowcount];
3332: for (k=0; k<kmax; k++) {
3333: tmp = jj[nzcount++]/bs;
3334: if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3335: }
3336: rowcount++;
3337: }
3338: browlengths[i] += nmask;
3339: /* zero out the mask elements we set */
3340: for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3341: }
3343: /* Do preallocation */
3344: MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);
3345: a = (Mat_SeqBAIJ*)newmat->data;
3347: /* set matrix "i" values */
3348: a->i[0] = 0;
3349: for (i=1; i<= mbs; i++) {
3350: a->i[i] = a->i[i-1] + browlengths[i-1];
3351: a->ilen[i-1] = browlengths[i-1];
3352: }
3353: a->nz = 0;
3354: for (i=0; i<mbs; i++) a->nz += browlengths[i];
3356: /* read in nonzero values */
3357: PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);
3358: PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);
3359: for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3361: /* set "a" and "j" values into matrix */
3362: nzcount = 0; jcount = 0;
3363: for (i=0; i<mbs; i++) {
3364: nzcountb = nzcount;
3365: nmask = 0;
3366: for (j=0; j<bs; j++) {
3367: kmax = rowlengths[i*bs+j];
3368: for (k=0; k<kmax; k++) {
3369: tmp = jj[nzcount++]/bs;
3370: if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3371: }
3372: }
3373: /* sort the masked values */
3374: PetscSortInt(nmask,masked);
3376: /* set "j" values into matrix */
3377: maskcount = 1;
3378: for (j=0; j<nmask; j++) {
3379: a->j[jcount++] = masked[j];
3380: mask[masked[j]] = maskcount++;
3381: }
3382: /* set "a" values into matrix */
3383: ishift = bs2*a->i[i];
3384: for (j=0; j<bs; j++) {
3385: kmax = rowlengths[i*bs+j];
3386: for (k=0; k<kmax; k++) {
3387: tmp = jj[nzcountb]/bs;
3388: block = mask[tmp] - 1;
3389: point = jj[nzcountb] - bs*tmp;
3390: idx = ishift + bs2*block + j + bs*point;
3391: a->a[idx] = (MatScalar)aa[nzcountb++];
3392: }
3393: }
3394: /* zero out the mask elements we set */
3395: for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3396: }
3397: if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3399: PetscFree(rowlengths);
3400: PetscFree(browlengths);
3401: PetscFree(aa);
3402: PetscFree(jj);
3403: PetscFree2(mask,masked);
3405: MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
3406: MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
3407: return(0);
3408: }
3412: /*@C
3413: MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3414: compressed row) format. For good matrix assembly performance the
3415: user should preallocate the matrix storage by setting the parameter nz
3416: (or the array nnz). By setting these parameters accurately, performance
3417: during matrix assembly can be increased by more than a factor of 50.
3419: Collective on MPI_Comm
3421: Input Parameters:
3422: + comm - MPI communicator, set to PETSC_COMM_SELF
3423: . bs - size of block
3424: . m - number of rows
3425: . n - number of columns
3426: . nz - number of nonzero blocks per block row (same for all rows)
3427: - nnz - array containing the number of nonzero blocks in the various block rows
3428: (possibly different for each block row) or NULL
3430: Output Parameter:
3431: . A - the matrix
3433: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3434: MatXXXXSetPreallocation() paradgm instead of this routine directly.
3435: [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3437: Options Database Keys:
3438: . -mat_no_unroll - uses code that does not unroll the loops in the
3439: block calculations (much slower)
3440: . -mat_block_size - size of the blocks to use
3442: Level: intermediate
3444: Notes:
3445: The number of rows and columns must be divisible by blocksize.
3447: If the nnz parameter is given then the nz parameter is ignored
3449: A nonzero block is any block that as 1 or more nonzeros in it
3451: The block AIJ format is fully compatible with standard Fortran 77
3452: storage. That is, the stored row and column indices can begin at
3453: either one (as in Fortran) or zero. See the users' manual for details.
3455: Specify the preallocated storage with either nz or nnz (not both).
3456: Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3457: allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3458: matrices.
3460: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3461: @*/
3462: PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3463: {
3467: MatCreate(comm,A);
3468: MatSetSizes(*A,m,n,m,n);
3469: MatSetType(*A,MATSEQBAIJ);
3470: MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);
3471: return(0);
3472: }
3476: /*@C
3477: MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3478: per row in the matrix. For good matrix assembly performance the
3479: user should preallocate the matrix storage by setting the parameter nz
3480: (or the array nnz). By setting these parameters accurately, performance
3481: during matrix assembly can be increased by more than a factor of 50.
3483: Collective on MPI_Comm
3485: Input Parameters:
3486: + A - the matrix
3487: . bs - size of block
3488: . nz - number of block nonzeros per block row (same for all rows)
3489: - nnz - array containing the number of block nonzeros in the various block rows
3490: (possibly different for each block row) or NULL
3492: Options Database Keys:
3493: . -mat_no_unroll - uses code that does not unroll the loops in the
3494: block calculations (much slower)
3495: . -mat_block_size - size of the blocks to use
3497: Level: intermediate
3499: Notes:
3500: If the nnz parameter is given then the nz parameter is ignored
3502: You can call MatGetInfo() to get information on how effective the preallocation was;
3503: for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3504: You can also run with the option -info and look for messages with the string
3505: malloc in them to see if additional memory allocation was needed.
3507: The block AIJ format is fully compatible with standard Fortran 77
3508: storage. That is, the stored row and column indices can begin at
3509: either one (as in Fortran) or zero. See the users' manual for details.
3511: Specify the preallocated storage with either nz or nnz (not both).
3512: Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3513: allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3515: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3516: @*/
3517: PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3518: {
3525: PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));
3526: return(0);
3527: }
3531: /*@C
3532: MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3533: (the default sequential PETSc format).
3535: Collective on MPI_Comm
3537: Input Parameters:
3538: + A - the matrix
3539: . i - the indices into j for the start of each local row (starts with zero)
3540: . j - the column indices for each local row (starts with zero) these must be sorted for each row
3541: - v - optional values in the matrix
3543: Level: developer
3545: .keywords: matrix, aij, compressed row, sparse
3547: .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3548: @*/
3549: PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3550: {
3557: PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));
3558: return(0);
3559: }
3564: /*@
3565: MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3567: Collective on MPI_Comm
3569: Input Parameters:
3570: + comm - must be an MPI communicator of size 1
3571: . bs - size of block
3572: . m - number of rows
3573: . n - number of columns
3574: . i - row indices
3575: . j - column indices
3576: - a - matrix values
3578: Output Parameter:
3579: . mat - the matrix
3581: Level: advanced
3583: Notes:
3584: The i, j, and a arrays are not copied by this routine, the user must free these arrays
3585: once the matrix is destroyed
3587: You cannot set new nonzero locations into this matrix, that will generate an error.
3589: The i and j indices are 0 based
3591: When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3594: .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3596: @*/
3597: PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat)
3598: {
3600: PetscInt ii;
3601: Mat_SeqBAIJ *baij;
3604: if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3605: if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3607: MatCreate(comm,mat);
3608: MatSetSizes(*mat,m,n,m,n);
3609: MatSetType(*mat,MATSEQBAIJ);
3610: MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);
3611: baij = (Mat_SeqBAIJ*)(*mat)->data;
3612: PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);
3613: PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));
3615: baij->i = i;
3616: baij->j = j;
3617: baij->a = a;
3619: baij->singlemalloc = PETSC_FALSE;
3620: baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3621: baij->free_a = PETSC_FALSE;
3622: baij->free_ij = PETSC_FALSE;
3624: for (ii=0; ii<m; ii++) {
3625: baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3626: #if defined(PETSC_USE_DEBUG)
3627: if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3628: #endif
3629: }
3630: #if defined(PETSC_USE_DEBUG)
3631: for (ii=0; ii<baij->i[m]; ii++) {
3632: if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3633: if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3634: }
3635: #endif
3637: MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
3638: MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
3639: return(0);
3640: }