Actual source code: baij.c
petsc-3.6.1 2015-08-06
2: /*
3: Defines the basic matrix operations for the BAIJ (compressed row)
4: matrix storage format.
5: */
6: #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/
7: #include <petscblaslapack.h>
8: #include <petsc/private/kernels/blockinvert.h>
9: #include <petsc/private/kernels/blockmatmult.h>
13: PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
14: {
15: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data;
17: PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18: MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19: PetscReal shift = 0.0;
22: if (a->idiagvalid) {
23: if (values) *values = a->idiag;
24: return(0);
25: }
26: MatMarkDiagonal_SeqBAIJ(A);
27: diag_offset = a->diag;
28: if (!a->idiag) {
29: PetscMalloc1(2*bs2*mbs,&a->idiag);
30: PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));
31: }
32: diag = a->idiag;
33: mdiag = a->idiag+bs2*mbs;
34: if (values) *values = a->idiag;
35: /* factor and invert each block */
36: switch (bs) {
37: case 1:
38: for (i=0; i<mbs; i++) {
39: odiag = v + 1*diag_offset[i];
40: diag[0] = odiag[0];
41: mdiag[0] = odiag[0];
42: diag[0] = (PetscScalar)1.0 / (diag[0] + shift);
43: diag += 1;
44: mdiag += 1;
45: }
46: break;
47: case 2:
48: for (i=0; i<mbs; i++) {
49: odiag = v + 4*diag_offset[i];
50: diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
51: mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
52: PetscKernel_A_gets_inverse_A_2(diag,shift);
53: diag += 4;
54: mdiag += 4;
55: }
56: break;
57: case 3:
58: for (i=0; i<mbs; i++) {
59: odiag = v + 9*diag_offset[i];
60: diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
61: diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
62: diag[8] = odiag[8];
63: mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
64: mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
65: mdiag[8] = odiag[8];
66: PetscKernel_A_gets_inverse_A_3(diag,shift);
67: diag += 9;
68: mdiag += 9;
69: }
70: break;
71: case 4:
72: for (i=0; i<mbs; i++) {
73: odiag = v + 16*diag_offset[i];
74: PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));
75: PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));
76: PetscKernel_A_gets_inverse_A_4(diag,shift);
77: diag += 16;
78: mdiag += 16;
79: }
80: break;
81: case 5:
82: for (i=0; i<mbs; i++) {
83: odiag = v + 25*diag_offset[i];
84: PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));
85: PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));
86: PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);
87: diag += 25;
88: mdiag += 25;
89: }
90: break;
91: case 6:
92: for (i=0; i<mbs; i++) {
93: odiag = v + 36*diag_offset[i];
94: PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));
95: PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));
96: PetscKernel_A_gets_inverse_A_6(diag,shift);
97: diag += 36;
98: mdiag += 36;
99: }
100: break;
101: case 7:
102: for (i=0; i<mbs; i++) {
103: odiag = v + 49*diag_offset[i];
104: PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));
105: PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));
106: PetscKernel_A_gets_inverse_A_7(diag,shift);
107: diag += 49;
108: mdiag += 49;
109: }
110: break;
111: default:
112: PetscMalloc2(bs,&v_work,bs,&v_pivots);
113: for (i=0; i<mbs; i++) {
114: odiag = v + bs2*diag_offset[i];
115: PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));
116: PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));
117: PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);
118: diag += bs2;
119: mdiag += bs2;
120: }
121: PetscFree2(v_work,v_pivots);
122: }
123: a->idiagvalid = PETSC_TRUE;
124: return(0);
125: }
129: PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
130: {
131: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
132: PetscScalar *x,*work,*w,*workt,*t;
133: const MatScalar *v,*aa = a->a, *idiag;
134: const PetscScalar *b,*xb;
135: PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
136: PetscErrorCode ierr;
137: PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
138: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
141: if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */
142: its = its*lits;
143: if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
144: if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
145: if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
146: if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
147: if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
149: if (!a->idiagvalid) {MatInvertBlockDiagonal(A,NULL);}
151: if (!m) return(0);
152: diag = a->diag;
153: idiag = a->idiag;
154: k = PetscMax(A->rmap->n,A->cmap->n);
155: if (!a->mult_work) {
156: PetscMalloc1(k+1,&a->mult_work);
157: }
158: if (!a->sor_workt) {
159: PetscMalloc1(k,&a->sor_workt);
160: }
161: if (!a->sor_work) {
162: PetscMalloc1(bs,&a->sor_work);
163: }
164: work = a->mult_work;
165: t = a->sor_workt;
166: w = a->sor_work;
168: VecGetArray(xx,&x);
169: VecGetArrayRead(bb,&b);
171: if (flag & SOR_ZERO_INITIAL_GUESS) {
172: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
173: switch (bs) {
174: case 1:
175: PetscKernel_v_gets_A_times_w_1(x,idiag,b);
176: t[0] = b[0];
177: i2 = 1;
178: idiag += 1;
179: for (i=1; i<m; i++) {
180: v = aa + ai[i];
181: vi = aj + ai[i];
182: nz = diag[i] - ai[i];
183: s[0] = b[i2];
184: for (j=0; j<nz; j++) {
185: xw[0] = x[vi[j]];
186: PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
187: }
188: t[i2] = s[0];
189: PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
190: x[i2] = xw[0];
191: idiag += 1;
192: i2 += 1;
193: }
194: break;
195: case 2:
196: PetscKernel_v_gets_A_times_w_2(x,idiag,b);
197: t[0] = b[0]; t[1] = b[1];
198: i2 = 2;
199: idiag += 4;
200: for (i=1; i<m; i++) {
201: v = aa + 4*ai[i];
202: vi = aj + ai[i];
203: nz = diag[i] - ai[i];
204: s[0] = b[i2]; s[1] = b[i2+1];
205: for (j=0; j<nz; j++) {
206: idx = 2*vi[j];
207: it = 4*j;
208: xw[0] = x[idx]; xw[1] = x[1+idx];
209: PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
210: }
211: t[i2] = s[0]; t[i2+1] = s[1];
212: PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
213: x[i2] = xw[0]; x[i2+1] = xw[1];
214: idiag += 4;
215: i2 += 2;
216: }
217: break;
218: case 3:
219: PetscKernel_v_gets_A_times_w_3(x,idiag,b);
220: t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
221: i2 = 3;
222: idiag += 9;
223: for (i=1; i<m; i++) {
224: v = aa + 9*ai[i];
225: vi = aj + ai[i];
226: nz = diag[i] - ai[i];
227: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
228: while (nz--) {
229: idx = 3*(*vi++);
230: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
231: PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
232: v += 9;
233: }
234: t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
235: PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
236: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
237: idiag += 9;
238: i2 += 3;
239: }
240: break;
241: case 4:
242: PetscKernel_v_gets_A_times_w_4(x,idiag,b);
243: t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
244: i2 = 4;
245: idiag += 16;
246: for (i=1; i<m; i++) {
247: v = aa + 16*ai[i];
248: vi = aj + ai[i];
249: nz = diag[i] - ai[i];
250: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
251: while (nz--) {
252: idx = 4*(*vi++);
253: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
254: PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
255: v += 16;
256: }
257: t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
258: PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
259: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
260: idiag += 16;
261: i2 += 4;
262: }
263: break;
264: case 5:
265: PetscKernel_v_gets_A_times_w_5(x,idiag,b);
266: t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
267: i2 = 5;
268: idiag += 25;
269: for (i=1; i<m; i++) {
270: v = aa + 25*ai[i];
271: vi = aj + ai[i];
272: nz = diag[i] - ai[i];
273: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
274: while (nz--) {
275: idx = 5*(*vi++);
276: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
277: PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
278: v += 25;
279: }
280: t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
281: PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
282: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
283: idiag += 25;
284: i2 += 5;
285: }
286: break;
287: case 6:
288: PetscKernel_v_gets_A_times_w_6(x,idiag,b);
289: t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
290: i2 = 6;
291: idiag += 36;
292: for (i=1; i<m; i++) {
293: v = aa + 36*ai[i];
294: vi = aj + ai[i];
295: nz = diag[i] - ai[i];
296: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
297: while (nz--) {
298: idx = 6*(*vi++);
299: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
300: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
301: PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
302: v += 36;
303: }
304: t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
305: t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
306: PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
307: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
308: idiag += 36;
309: i2 += 6;
310: }
311: break;
312: case 7:
313: PetscKernel_v_gets_A_times_w_7(x,idiag,b);
314: t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
315: t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
316: i2 = 7;
317: idiag += 49;
318: for (i=1; i<m; i++) {
319: v = aa + 49*ai[i];
320: vi = aj + ai[i];
321: nz = diag[i] - ai[i];
322: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
323: s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
324: while (nz--) {
325: idx = 7*(*vi++);
326: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
327: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
328: PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
329: v += 49;
330: }
331: t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
332: t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
333: PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
334: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
335: x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
336: idiag += 49;
337: i2 += 7;
338: }
339: break;
340: default:
341: PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
342: PetscMemcpy(t,b,bs*sizeof(PetscScalar));
343: i2 = bs;
344: idiag += bs2;
345: for (i=1; i<m; i++) {
346: v = aa + bs2*ai[i];
347: vi = aj + ai[i];
348: nz = diag[i] - ai[i];
350: PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));
351: /* copy all rows of x that are needed into contiguous space */
352: workt = work;
353: for (j=0; j<nz; j++) {
354: PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
355: workt += bs;
356: }
357: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
358: PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));
359: PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
361: idiag += bs2;
362: i2 += bs;
363: }
364: break;
365: }
366: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
367: PetscLogFlops(1.0*bs2*a->nz);
368: xb = t;
369: }
370: else xb = b;
371: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
372: idiag = a->idiag+bs2*(a->mbs-1);
373: i2 = bs * (m-1);
374: switch (bs) {
375: case 1:
376: s[0] = xb[i2];
377: PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
378: x[i2] = xw[0];
379: i2 -= 1;
380: for (i=m-2; i>=0; i--) {
381: v = aa + (diag[i]+1);
382: vi = aj + diag[i] + 1;
383: nz = ai[i+1] - diag[i] - 1;
384: s[0] = xb[i2];
385: for (j=0; j<nz; j++) {
386: xw[0] = x[vi[j]];
387: PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
388: }
389: PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
390: x[i2] = xw[0];
391: idiag -= 1;
392: i2 -= 1;
393: }
394: break;
395: case 2:
396: s[0] = xb[i2]; s[1] = xb[i2+1];
397: PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
398: x[i2] = xw[0]; x[i2+1] = xw[1];
399: i2 -= 2;
400: idiag -= 4;
401: for (i=m-2; i>=0; i--) {
402: v = aa + 4*(diag[i] + 1);
403: vi = aj + diag[i] + 1;
404: nz = ai[i+1] - diag[i] - 1;
405: s[0] = xb[i2]; s[1] = xb[i2+1];
406: for (j=0; j<nz; j++) {
407: idx = 2*vi[j];
408: it = 4*j;
409: xw[0] = x[idx]; xw[1] = x[1+idx];
410: PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
411: }
412: PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
413: x[i2] = xw[0]; x[i2+1] = xw[1];
414: idiag -= 4;
415: i2 -= 2;
416: }
417: break;
418: case 3:
419: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
420: PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
421: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
422: i2 -= 3;
423: idiag -= 9;
424: for (i=m-2; i>=0; i--) {
425: v = aa + 9*(diag[i]+1);
426: vi = aj + diag[i] + 1;
427: nz = ai[i+1] - diag[i] - 1;
428: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
429: while (nz--) {
430: idx = 3*(*vi++);
431: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
432: PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
433: v += 9;
434: }
435: PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
436: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
437: idiag -= 9;
438: i2 -= 3;
439: }
440: break;
441: case 4:
442: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
443: PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
444: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
445: i2 -= 4;
446: idiag -= 16;
447: for (i=m-2; i>=0; i--) {
448: v = aa + 16*(diag[i]+1);
449: vi = aj + diag[i] + 1;
450: nz = ai[i+1] - diag[i] - 1;
451: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
452: while (nz--) {
453: idx = 4*(*vi++);
454: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
455: PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
456: v += 16;
457: }
458: PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
459: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
460: idiag -= 16;
461: i2 -= 4;
462: }
463: break;
464: case 5:
465: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
466: PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
467: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
468: i2 -= 5;
469: idiag -= 25;
470: for (i=m-2; i>=0; i--) {
471: v = aa + 25*(diag[i]+1);
472: vi = aj + diag[i] + 1;
473: nz = ai[i+1] - diag[i] - 1;
474: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
475: while (nz--) {
476: idx = 5*(*vi++);
477: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
478: PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
479: v += 25;
480: }
481: PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
482: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
483: idiag -= 25;
484: i2 -= 5;
485: }
486: break;
487: case 6:
488: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
489: PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
490: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
491: i2 -= 6;
492: idiag -= 36;
493: for (i=m-2; i>=0; i--) {
494: v = aa + 36*(diag[i]+1);
495: vi = aj + diag[i] + 1;
496: nz = ai[i+1] - diag[i] - 1;
497: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
498: while (nz--) {
499: idx = 6*(*vi++);
500: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
501: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
502: PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
503: v += 36;
504: }
505: PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
506: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
507: idiag -= 36;
508: i2 -= 6;
509: }
510: break;
511: case 7:
512: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
513: s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
514: PetscKernel_v_gets_A_times_w_7(x,idiag,b);
515: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
516: x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
517: i2 -= 7;
518: idiag -= 49;
519: for (i=m-2; i>=0; i--) {
520: v = aa + 49*(diag[i]+1);
521: vi = aj + diag[i] + 1;
522: nz = ai[i+1] - diag[i] - 1;
523: s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
524: s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
525: while (nz--) {
526: idx = 7*(*vi++);
527: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
528: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
529: PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
530: v += 49;
531: }
532: PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
533: x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
534: x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
535: idiag -= 49;
536: i2 -= 7;
537: }
538: break;
539: default:
540: PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));
541: PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
542: i2 -= bs;
543: idiag -= bs2;
544: for (i=m-2; i>=0; i--) {
545: v = aa + bs2*(diag[i]+1);
546: vi = aj + diag[i] + 1;
547: nz = ai[i+1] - diag[i] - 1;
549: PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));
550: /* copy all rows of x that are needed into contiguous space */
551: workt = work;
552: for (j=0; j<nz; j++) {
553: PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
554: workt += bs;
555: }
556: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
557: PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
559: idiag -= bs2;
560: i2 -= bs;
561: }
562: break;
563: }
564: PetscLogFlops(1.0*bs2*(a->nz));
565: }
566: its--;
567: }
568: while (its--) {
569: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
570: idiag = a->idiag;
571: i2 = 0;
572: switch (bs) {
573: case 1:
574: for (i=0; i<m; i++) {
575: v = aa + ai[i];
576: vi = aj + ai[i];
577: nz = ai[i+1] - ai[i];
578: s[0] = b[i2];
579: for (j=0; j<nz; j++) {
580: xw[0] = x[vi[j]];
581: PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
582: }
583: PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
584: x[i2] += xw[0];
585: idiag += 1;
586: i2 += 1;
587: }
588: break;
589: case 2:
590: for (i=0; i<m; i++) {
591: v = aa + 4*ai[i];
592: vi = aj + ai[i];
593: nz = ai[i+1] - ai[i];
594: s[0] = b[i2]; s[1] = b[i2+1];
595: for (j=0; j<nz; j++) {
596: idx = 2*vi[j];
597: it = 4*j;
598: xw[0] = x[idx]; xw[1] = x[1+idx];
599: PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
600: }
601: PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
602: x[i2] += xw[0]; x[i2+1] += xw[1];
603: idiag += 4;
604: i2 += 2;
605: }
606: break;
607: case 3:
608: for (i=0; i<m; i++) {
609: v = aa + 9*ai[i];
610: vi = aj + ai[i];
611: nz = ai[i+1] - ai[i];
612: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
613: while (nz--) {
614: idx = 3*(*vi++);
615: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
616: PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
617: v += 9;
618: }
619: PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
620: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
621: idiag += 9;
622: i2 += 3;
623: }
624: break;
625: case 4:
626: for (i=0; i<m; i++) {
627: v = aa + 16*ai[i];
628: vi = aj + ai[i];
629: nz = ai[i+1] - ai[i];
630: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
631: while (nz--) {
632: idx = 4*(*vi++);
633: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
634: PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
635: v += 16;
636: }
637: PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
638: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
639: idiag += 16;
640: i2 += 4;
641: }
642: break;
643: case 5:
644: for (i=0; i<m; i++) {
645: v = aa + 25*ai[i];
646: vi = aj + ai[i];
647: nz = ai[i+1] - ai[i];
648: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
649: while (nz--) {
650: idx = 5*(*vi++);
651: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
652: PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
653: v += 25;
654: }
655: PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
656: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
657: idiag += 25;
658: i2 += 5;
659: }
660: break;
661: case 6:
662: for (i=0; i<m; i++) {
663: v = aa + 36*ai[i];
664: vi = aj + ai[i];
665: nz = ai[i+1] - ai[i];
666: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
667: while (nz--) {
668: idx = 6*(*vi++);
669: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
670: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
671: PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
672: v += 36;
673: }
674: PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
675: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
676: x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
677: idiag += 36;
678: i2 += 6;
679: }
680: break;
681: case 7:
682: for (i=0; i<m; i++) {
683: v = aa + 49*ai[i];
684: vi = aj + ai[i];
685: nz = ai[i+1] - ai[i];
686: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
687: s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
688: while (nz--) {
689: idx = 7*(*vi++);
690: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
691: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
692: PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
693: v += 49;
694: }
695: PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
696: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
697: x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
698: idiag += 49;
699: i2 += 7;
700: }
701: break;
702: default:
703: for (i=0; i<m; i++) {
704: v = aa + bs2*ai[i];
705: vi = aj + ai[i];
706: nz = ai[i+1] - ai[i];
708: PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));
709: /* copy all rows of x that are needed into contiguous space */
710: workt = work;
711: for (j=0; j<nz; j++) {
712: PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
713: workt += bs;
714: }
715: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
716: PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
718: idiag += bs2;
719: i2 += bs;
720: }
721: break;
722: }
723: PetscLogFlops(2.0*bs2*a->nz);
724: }
725: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
726: idiag = a->idiag+bs2*(a->mbs-1);
727: i2 = bs * (m-1);
728: switch (bs) {
729: case 1:
730: for (i=m-1; i>=0; i--) {
731: v = aa + ai[i];
732: vi = aj + ai[i];
733: nz = ai[i+1] - ai[i];
734: s[0] = b[i2];
735: for (j=0; j<nz; j++) {
736: xw[0] = x[vi[j]];
737: PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
738: }
739: PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
740: x[i2] += xw[0];
741: idiag -= 1;
742: i2 -= 1;
743: }
744: break;
745: case 2:
746: for (i=m-1; i>=0; i--) {
747: v = aa + 4*ai[i];
748: vi = aj + ai[i];
749: nz = ai[i+1] - ai[i];
750: s[0] = b[i2]; s[1] = b[i2+1];
751: for (j=0; j<nz; j++) {
752: idx = 2*vi[j];
753: it = 4*j;
754: xw[0] = x[idx]; xw[1] = x[1+idx];
755: PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
756: }
757: PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
758: x[i2] += xw[0]; x[i2+1] += xw[1];
759: idiag -= 4;
760: i2 -= 2;
761: }
762: break;
763: case 3:
764: for (i=m-1; i>=0; i--) {
765: v = aa + 9*ai[i];
766: vi = aj + ai[i];
767: nz = ai[i+1] - ai[i];
768: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
769: while (nz--) {
770: idx = 3*(*vi++);
771: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
772: PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
773: v += 9;
774: }
775: PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
776: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
777: idiag -= 9;
778: i2 -= 3;
779: }
780: break;
781: case 4:
782: for (i=m-1; i>=0; i--) {
783: v = aa + 16*ai[i];
784: vi = aj + ai[i];
785: nz = ai[i+1] - ai[i];
786: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
787: while (nz--) {
788: idx = 4*(*vi++);
789: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
790: PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
791: v += 16;
792: }
793: PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
794: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
795: idiag -= 16;
796: i2 -= 4;
797: }
798: break;
799: case 5:
800: for (i=m-1; i>=0; i--) {
801: v = aa + 25*ai[i];
802: vi = aj + ai[i];
803: nz = ai[i+1] - ai[i];
804: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
805: while (nz--) {
806: idx = 5*(*vi++);
807: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
808: PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
809: v += 25;
810: }
811: PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
812: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
813: idiag -= 25;
814: i2 -= 5;
815: }
816: break;
817: case 6:
818: for (i=m-1; i>=0; i--) {
819: v = aa + 36*ai[i];
820: vi = aj + ai[i];
821: nz = ai[i+1] - ai[i];
822: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
823: while (nz--) {
824: idx = 6*(*vi++);
825: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
826: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
827: PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
828: v += 36;
829: }
830: PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
831: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
832: x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
833: idiag -= 36;
834: i2 -= 6;
835: }
836: break;
837: case 7:
838: for (i=m-1; i>=0; i--) {
839: v = aa + 49*ai[i];
840: vi = aj + ai[i];
841: nz = ai[i+1] - ai[i];
842: s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
843: s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
844: while (nz--) {
845: idx = 7*(*vi++);
846: xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
847: xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
848: PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
849: v += 49;
850: }
851: PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
852: x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
853: x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
854: idiag -= 49;
855: i2 -= 7;
856: }
857: break;
858: default:
859: for (i=m-1; i>=0; i--) {
860: v = aa + bs2*ai[i];
861: vi = aj + ai[i];
862: nz = ai[i+1] - ai[i];
864: PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));
865: /* copy all rows of x that are needed into contiguous space */
866: workt = work;
867: for (j=0; j<nz; j++) {
868: PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
869: workt += bs;
870: }
871: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
872: PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
874: idiag -= bs2;
875: i2 -= bs;
876: }
877: break;
878: }
879: PetscLogFlops(2.0*bs2*(a->nz));
880: }
881: }
882: VecRestoreArray(xx,&x);
883: VecRestoreArrayRead(bb,&b);
884: return(0);
885: }
888: /*
889: Special version for direct calls from Fortran (Used in PETSc-fun3d)
890: */
891: #if defined(PETSC_HAVE_FORTRAN_CAPS)
892: #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
893: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
894: #define matsetvaluesblocked4_ matsetvaluesblocked4
895: #endif
899: PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
900: {
901: Mat A = *AA;
902: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
903: PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
904: PetscInt *ai =a->i,*ailen=a->ilen;
905: PetscInt *aj =a->j,stepval,lastcol = -1;
906: const PetscScalar *value = v;
907: MatScalar *ap,*aa = a->a,*bap;
910: if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
911: stepval = (n-1)*4;
912: for (k=0; k<m; k++) { /* loop over added rows */
913: row = im[k];
914: rp = aj + ai[row];
915: ap = aa + 16*ai[row];
916: nrow = ailen[row];
917: low = 0;
918: high = nrow;
919: for (l=0; l<n; l++) { /* loop over added columns */
920: col = in[l];
921: if (col <= lastcol) low = 0;
922: else high = nrow;
923: lastcol = col;
924: value = v + k*(stepval+4 + l)*4;
925: while (high-low > 7) {
926: t = (low+high)/2;
927: if (rp[t] > col) high = t;
928: else low = t;
929: }
930: for (i=low; i<high; i++) {
931: if (rp[i] > col) break;
932: if (rp[i] == col) {
933: bap = ap + 16*i;
934: for (ii=0; ii<4; ii++,value+=stepval) {
935: for (jj=ii; jj<16; jj+=4) {
936: bap[jj] += *value++;
937: }
938: }
939: goto noinsert2;
940: }
941: }
942: N = nrow++ - 1;
943: high++; /* added new column index thus must search to one higher than before */
944: /* shift up all the later entries in this row */
945: for (ii=N; ii>=i; ii--) {
946: rp[ii+1] = rp[ii];
947: PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
948: }
949: if (N >= i) {
950: PetscMemzero(ap+16*i,16*sizeof(MatScalar));
951: }
952: rp[i] = col;
953: bap = ap + 16*i;
954: for (ii=0; ii<4; ii++,value+=stepval) {
955: for (jj=ii; jj<16; jj+=4) {
956: bap[jj] = *value++;
957: }
958: }
959: noinsert2:;
960: low = i;
961: }
962: ailen[row] = nrow;
963: }
964: PetscFunctionReturnVoid();
965: }
967: #if defined(PETSC_HAVE_FORTRAN_CAPS)
968: #define matsetvalues4_ MATSETVALUES4
969: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
970: #define matsetvalues4_ matsetvalues4
971: #endif
975: PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
976: {
977: Mat A = *AA;
978: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
979: PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
980: PetscInt *ai=a->i,*ailen=a->ilen;
981: PetscInt *aj=a->j,brow,bcol;
982: PetscInt ridx,cidx,lastcol = -1;
983: MatScalar *ap,value,*aa=a->a,*bap;
986: for (k=0; k<m; k++) { /* loop over added rows */
987: row = im[k]; brow = row/4;
988: rp = aj + ai[brow];
989: ap = aa + 16*ai[brow];
990: nrow = ailen[brow];
991: low = 0;
992: high = nrow;
993: for (l=0; l<n; l++) { /* loop over added columns */
994: col = in[l]; bcol = col/4;
995: ridx = row % 4; cidx = col % 4;
996: value = v[l + k*n];
997: if (col <= lastcol) low = 0;
998: else high = nrow;
999: lastcol = col;
1000: while (high-low > 7) {
1001: t = (low+high)/2;
1002: if (rp[t] > bcol) high = t;
1003: else low = t;
1004: }
1005: for (i=low; i<high; i++) {
1006: if (rp[i] > bcol) break;
1007: if (rp[i] == bcol) {
1008: bap = ap + 16*i + 4*cidx + ridx;
1009: *bap += value;
1010: goto noinsert1;
1011: }
1012: }
1013: N = nrow++ - 1;
1014: high++; /* added new column thus must search to one higher than before */
1015: /* shift up all the later entries in this row */
1016: for (ii=N; ii>=i; ii--) {
1017: rp[ii+1] = rp[ii];
1018: PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1019: }
1020: if (N>=i) {
1021: PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1022: }
1023: rp[i] = bcol;
1024: ap[16*i + 4*cidx + ridx] = value;
1025: noinsert1:;
1026: low = i;
1027: }
1028: ailen[brow] = nrow;
1029: }
1030: PetscFunctionReturnVoid();
1031: }
1033: /*
1034: Checks for missing diagonals
1035: */
1038: PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d)
1039: {
1040: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1042: PetscInt *diag,*ii = a->i,i;
1045: MatMarkDiagonal_SeqBAIJ(A);
1046: *missing = PETSC_FALSE;
1047: if (A->rmap->n > 0 && !ii) {
1048: *missing = PETSC_TRUE;
1049: if (d) *d = 0;
1050: PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");
1051: } else {
1052: diag = a->diag;
1053: for (i=0; i<a->mbs; i++) {
1054: if (diag[i] >= ii[i+1]) {
1055: *missing = PETSC_TRUE;
1056: if (d) *d = i;
1057: PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);
1058: break;
1059: }
1060: }
1061: }
1062: return(0);
1063: }
1067: PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1068: {
1069: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1071: PetscInt i,j,m = a->mbs;
1074: if (!a->diag) {
1075: PetscMalloc1(m,&a->diag);
1076: PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));
1077: a->free_diag = PETSC_TRUE;
1078: }
1079: for (i=0; i<m; i++) {
1080: a->diag[i] = a->i[i+1];
1081: for (j=a->i[i]; j<a->i[i+1]; j++) {
1082: if (a->j[j] == i) {
1083: a->diag[i] = j;
1084: break;
1085: }
1086: }
1087: }
1088: return(0);
1089: }
1094: static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done)
1095: {
1096: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1098: PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1099: PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1102: *nn = n;
1103: if (!ia) return(0);
1104: if (symmetric) {
1105: MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);
1106: nz = tia[n];
1107: } else {
1108: tia = a->i; tja = a->j;
1109: }
1111: if (!blockcompressed && bs > 1) {
1112: (*nn) *= bs;
1113: /* malloc & create the natural set of indices */
1114: PetscMalloc1((n+1)*bs,ia);
1115: if (n) {
1116: (*ia)[0] = 0;
1117: for (j=1; j<bs; j++) {
1118: (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1119: }
1120: }
1122: for (i=1; i<n; i++) {
1123: (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1124: for (j=1; j<bs; j++) {
1125: (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1126: }
1127: }
1128: if (n) {
1129: (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1130: }
1132: if (inja) {
1133: PetscMalloc1(nz*bs*bs,ja);
1134: cnt = 0;
1135: for (i=0; i<n; i++) {
1136: for (j=0; j<bs; j++) {
1137: for (k=tia[i]; k<tia[i+1]; k++) {
1138: for (l=0; l<bs; l++) {
1139: (*ja)[cnt++] = bs*tja[k] + l;
1140: }
1141: }
1142: }
1143: }
1144: }
1146: if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1147: PetscFree(tia);
1148: PetscFree(tja);
1149: }
1150: } else if (oshift == 1) {
1151: if (symmetric) {
1152: nz = tia[A->rmap->n/bs];
1153: /* add 1 to i and j indices */
1154: for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1155: *ia = tia;
1156: if (ja) {
1157: for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1158: *ja = tja;
1159: }
1160: } else {
1161: nz = a->i[A->rmap->n/bs];
1162: /* malloc space and add 1 to i and j indices */
1163: PetscMalloc1(A->rmap->n/bs+1,ia);
1164: for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1165: if (ja) {
1166: PetscMalloc1(nz,ja);
1167: for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1168: }
1169: }
1170: } else {
1171: *ia = tia;
1172: if (ja) *ja = tja;
1173: }
1174: return(0);
1175: }
1179: static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done)
1180: {
1184: if (!ia) return(0);
1185: if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1186: PetscFree(*ia);
1187: if (ja) {PetscFree(*ja);}
1188: }
1189: return(0);
1190: }
1194: PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1195: {
1196: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1200: #if defined(PETSC_USE_LOG)
1201: PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1202: #endif
1203: MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);
1204: ISDestroy(&a->row);
1205: ISDestroy(&a->col);
1206: if (a->free_diag) {PetscFree(a->diag);}
1207: PetscFree(a->idiag);
1208: if (a->free_imax_ilen) {PetscFree2(a->imax,a->ilen);}
1209: PetscFree(a->solve_work);
1210: PetscFree(a->mult_work);
1211: PetscFree(a->sor_workt);
1212: PetscFree(a->sor_work);
1213: ISDestroy(&a->icol);
1214: PetscFree(a->saved_values);
1215: PetscFree2(a->compressedrow.i,a->compressedrow.rindex);
1217: MatDestroy(&a->sbaijMat);
1218: MatDestroy(&a->parent);
1219: PetscFree(A->data);
1221: PetscObjectChangeTypeName((PetscObject)A,0);
1222: PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);
1223: PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);
1224: PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);
1225: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);
1226: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);
1227: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);
1228: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);
1229: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);
1230: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);
1231: PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);
1232: return(0);
1233: }
1237: PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1238: {
1239: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1243: switch (op) {
1244: case MAT_ROW_ORIENTED:
1245: a->roworiented = flg;
1246: break;
1247: case MAT_KEEP_NONZERO_PATTERN:
1248: a->keepnonzeropattern = flg;
1249: break;
1250: case MAT_NEW_NONZERO_LOCATIONS:
1251: a->nonew = (flg ? 0 : 1);
1252: break;
1253: case MAT_NEW_NONZERO_LOCATION_ERR:
1254: a->nonew = (flg ? -1 : 0);
1255: break;
1256: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1257: a->nonew = (flg ? -2 : 0);
1258: break;
1259: case MAT_UNUSED_NONZERO_LOCATION_ERR:
1260: a->nounused = (flg ? -1 : 0);
1261: break;
1262: case MAT_NEW_DIAGONALS:
1263: case MAT_IGNORE_OFF_PROC_ENTRIES:
1264: case MAT_USE_HASH_TABLE:
1265: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1266: break;
1267: case MAT_SPD:
1268: case MAT_SYMMETRIC:
1269: case MAT_STRUCTURALLY_SYMMETRIC:
1270: case MAT_HERMITIAN:
1271: case MAT_SYMMETRY_ETERNAL:
1272: /* These options are handled directly by MatSetOption() */
1273: break;
1274: default:
1275: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1276: }
1277: return(0);
1278: }
1280: /* used for both SeqBAIJ and SeqSBAIJ matrices */
1283: PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1284: {
1286: PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1287: MatScalar *aa_i;
1288: PetscScalar *v_i;
1291: bs = A->rmap->bs;
1292: bs2 = bs*bs;
1293: if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1295: bn = row/bs; /* Block number */
1296: bp = row % bs; /* Block Position */
1297: M = ai[bn+1] - ai[bn];
1298: *nz = bs*M;
1300: if (v) {
1301: *v = 0;
1302: if (*nz) {
1303: PetscMalloc1(*nz,v);
1304: for (i=0; i<M; i++) { /* for each block in the block row */
1305: v_i = *v + i*bs;
1306: aa_i = aa + bs2*(ai[bn] + i);
1307: for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1308: }
1309: }
1310: }
1312: if (idx) {
1313: *idx = 0;
1314: if (*nz) {
1315: PetscMalloc1(*nz,idx);
1316: for (i=0; i<M; i++) { /* for each block in the block row */
1317: idx_i = *idx + i*bs;
1318: itmp = bs*aj[ai[bn] + i];
1319: for (j=0; j<bs; j++) idx_i[j] = itmp++;
1320: }
1321: }
1322: }
1323: return(0);
1324: }
1328: PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1329: {
1330: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1334: MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);
1335: return(0);
1336: }
1340: PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1341: {
1345: if (idx) {PetscFree(*idx);}
1346: if (v) {PetscFree(*v);}
1347: return(0);
1348: }
1350: extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1354: PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1355: {
1356: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
1357: Mat C;
1359: PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1360: PetscInt *rows,*cols,bs2=a->bs2;
1361: MatScalar *array;
1364: if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1365: if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1366: PetscCalloc1(1+nbs,&col);
1368: for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1369: MatCreate(PetscObjectComm((PetscObject)A),&C);
1370: MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);
1371: MatSetType(C,((PetscObject)A)->type_name);
1372: MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);
1373: PetscFree(col);
1374: } else {
1375: C = *B;
1376: }
1378: array = a->a;
1379: PetscMalloc2(bs,&rows,bs,&cols);
1380: for (i=0; i<mbs; i++) {
1381: cols[0] = i*bs;
1382: for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1383: len = ai[i+1] - ai[i];
1384: for (j=0; j<len; j++) {
1385: rows[0] = (*aj++)*bs;
1386: for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1387: MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);
1388: array += bs2;
1389: }
1390: }
1391: PetscFree2(rows,cols);
1393: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1394: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1396: if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1397: *B = C;
1398: } else {
1399: MatHeaderMerge(A,C);
1400: }
1401: return(0);
1402: }
1406: PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f)
1407: {
1409: Mat Btrans;
1412: *f = PETSC_FALSE;
1413: MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);
1414: MatEqual_SeqBAIJ(B,Btrans,f);
1415: MatDestroy(&Btrans);
1416: return(0);
1417: }
1421: static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1422: {
1423: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1425: PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1426: int fd;
1427: PetscScalar *aa;
1428: FILE *file;
1431: PetscViewerBinaryGetDescriptor(viewer,&fd);
1432: PetscMalloc1(4+A->rmap->N,&col_lens);
1433: col_lens[0] = MAT_FILE_CLASSID;
1435: col_lens[1] = A->rmap->N;
1436: col_lens[2] = A->cmap->n;
1437: col_lens[3] = a->nz*bs2;
1439: /* store lengths of each row and write (including header) to file */
1440: count = 0;
1441: for (i=0; i<a->mbs; i++) {
1442: for (j=0; j<bs; j++) {
1443: col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1444: }
1445: }
1446: PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);
1447: PetscFree(col_lens);
1449: /* store column indices (zero start index) */
1450: PetscMalloc1((a->nz+1)*bs2,&jj);
1451: count = 0;
1452: for (i=0; i<a->mbs; i++) {
1453: for (j=0; j<bs; j++) {
1454: for (k=a->i[i]; k<a->i[i+1]; k++) {
1455: for (l=0; l<bs; l++) {
1456: jj[count++] = bs*a->j[k] + l;
1457: }
1458: }
1459: }
1460: }
1461: PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);
1462: PetscFree(jj);
1464: /* store nonzero values */
1465: PetscMalloc1((a->nz+1)*bs2,&aa);
1466: count = 0;
1467: for (i=0; i<a->mbs; i++) {
1468: for (j=0; j<bs; j++) {
1469: for (k=a->i[i]; k<a->i[i+1]; k++) {
1470: for (l=0; l<bs; l++) {
1471: aa[count++] = a->a[bs2*k + l*bs + j];
1472: }
1473: }
1474: }
1475: }
1476: PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);
1477: PetscFree(aa);
1479: PetscViewerBinaryGetInfoPointer(viewer,&file);
1480: if (file) {
1481: fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1482: }
1483: return(0);
1484: }
1488: static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1489: {
1490: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1491: PetscErrorCode ierr;
1492: PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1493: PetscViewerFormat format;
1496: PetscViewerGetFormat(viewer,&format);
1497: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1498: PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);
1499: } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1500: const char *matname;
1501: Mat aij;
1502: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);
1503: PetscObjectGetName((PetscObject)A,&matname);
1504: PetscObjectSetName((PetscObject)aij,matname);
1505: MatView(aij,viewer);
1506: MatDestroy(&aij);
1507: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1508: return(0);
1509: } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1510: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1511: for (i=0; i<a->mbs; i++) {
1512: for (j=0; j<bs; j++) {
1513: PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1514: for (k=a->i[i]; k<a->i[i+1]; k++) {
1515: for (l=0; l<bs; l++) {
1516: #if defined(PETSC_USE_COMPLEX)
1517: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1518: PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1519: (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1520: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1521: PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1522: (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1523: } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1524: PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));
1525: }
1526: #else
1527: if (a->a[bs2*k + l*bs + j] != 0.0) {
1528: PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);
1529: }
1530: #endif
1531: }
1532: }
1533: PetscViewerASCIIPrintf(viewer,"\n");
1534: }
1535: }
1536: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1537: } else {
1538: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1539: for (i=0; i<a->mbs; i++) {
1540: for (j=0; j<bs; j++) {
1541: PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1542: for (k=a->i[i]; k<a->i[i+1]; k++) {
1543: for (l=0; l<bs; l++) {
1544: #if defined(PETSC_USE_COMPLEX)
1545: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1546: PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1547: (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1548: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1549: PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1550: (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1551: } else {
1552: PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));
1553: }
1554: #else
1555: PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);
1556: #endif
1557: }
1558: }
1559: PetscViewerASCIIPrintf(viewer,"\n");
1560: }
1561: }
1562: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1563: }
1564: PetscViewerFlush(viewer);
1565: return(0);
1566: }
1568: #include <petscdraw.h>
1571: static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1572: {
1573: Mat A = (Mat) Aa;
1574: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
1575: PetscErrorCode ierr;
1576: PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1577: PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1578: MatScalar *aa;
1579: PetscViewer viewer;
1580: PetscViewerFormat format;
1583: PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);
1584: PetscViewerGetFormat(viewer,&format);
1586: PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);
1588: /* loop over matrix elements drawing boxes */
1590: if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1591: color = PETSC_DRAW_BLUE;
1592: for (i=0,row=0; i<mbs; i++,row+=bs) {
1593: for (j=a->i[i]; j<a->i[i+1]; j++) {
1594: y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1595: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1596: aa = a->a + j*bs2;
1597: for (k=0; k<bs; k++) {
1598: for (l=0; l<bs; l++) {
1599: if (PetscRealPart(*aa++) >= 0.) continue;
1600: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1601: }
1602: }
1603: }
1604: }
1605: color = PETSC_DRAW_CYAN;
1606: for (i=0,row=0; i<mbs; i++,row+=bs) {
1607: for (j=a->i[i]; j<a->i[i+1]; j++) {
1608: y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1609: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1610: aa = a->a + j*bs2;
1611: for (k=0; k<bs; k++) {
1612: for (l=0; l<bs; l++) {
1613: if (PetscRealPart(*aa++) != 0.) continue;
1614: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1615: }
1616: }
1617: }
1618: }
1619: color = PETSC_DRAW_RED;
1620: for (i=0,row=0; i<mbs; i++,row+=bs) {
1621: for (j=a->i[i]; j<a->i[i+1]; j++) {
1622: y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1623: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1624: aa = a->a + j*bs2;
1625: for (k=0; k<bs; k++) {
1626: for (l=0; l<bs; l++) {
1627: if (PetscRealPart(*aa++) <= 0.) continue;
1628: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1629: }
1630: }
1631: }
1632: }
1633: } else {
1634: /* use contour shading to indicate magnitude of values */
1635: /* first determine max of all nonzero values */
1636: PetscDraw popup;
1637: PetscReal scale,maxv = 0.0;
1639: for (i=0; i<a->nz*a->bs2; i++) {
1640: if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1641: }
1642: scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1643: PetscDrawGetPopup(draw,&popup);
1644: if (popup) {
1645: PetscDrawScalePopup(popup,0.0,maxv);
1646: }
1647: for (i=0,row=0; i<mbs; i++,row+=bs) {
1648: for (j=a->i[i]; j<a->i[i+1]; j++) {
1649: y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1650: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1651: aa = a->a + j*bs2;
1652: for (k=0; k<bs; k++) {
1653: for (l=0; l<bs; l++) {
1654: color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1655: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1656: }
1657: }
1658: }
1659: }
1660: }
1661: return(0);
1662: }
1666: static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1667: {
1669: PetscReal xl,yl,xr,yr,w,h;
1670: PetscDraw draw;
1671: PetscBool isnull;
1674: PetscViewerDrawGetDraw(viewer,0,&draw);
1675: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
1677: PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
1678: xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1679: xr += w; yr += h; xl = -w; yl = -h;
1680: PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
1681: PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
1682: PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);
1683: return(0);
1684: }
1688: PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1689: {
1691: PetscBool iascii,isbinary,isdraw;
1694: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1695: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1696: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1697: if (iascii) {
1698: MatView_SeqBAIJ_ASCII(A,viewer);
1699: } else if (isbinary) {
1700: MatView_SeqBAIJ_Binary(A,viewer);
1701: } else if (isdraw) {
1702: MatView_SeqBAIJ_Draw(A,viewer);
1703: } else {
1704: Mat B;
1705: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
1706: MatView(B,viewer);
1707: MatDestroy(&B);
1708: }
1709: return(0);
1710: }
1715: PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1716: {
1717: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1718: PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1719: PetscInt *ai = a->i,*ailen = a->ilen;
1720: PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1721: MatScalar *ap,*aa = a->a;
1724: for (k=0; k<m; k++) { /* loop over rows */
1725: row = im[k]; brow = row/bs;
1726: if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1727: if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1728: rp = aj + ai[brow]; ap = aa + bs2*ai[brow];
1729: nrow = ailen[brow];
1730: for (l=0; l<n; l++) { /* loop over columns */
1731: if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1732: if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1733: col = in[l];
1734: bcol = col/bs;
1735: cidx = col%bs;
1736: ridx = row%bs;
1737: high = nrow;
1738: low = 0; /* assume unsorted */
1739: while (high-low > 5) {
1740: t = (low+high)/2;
1741: if (rp[t] > bcol) high = t;
1742: else low = t;
1743: }
1744: for (i=low; i<high; i++) {
1745: if (rp[i] > bcol) break;
1746: if (rp[i] == bcol) {
1747: *v++ = ap[bs2*i+bs*cidx+ridx];
1748: goto finished;
1749: }
1750: }
1751: *v++ = 0.0;
1752: finished:;
1753: }
1754: }
1755: return(0);
1756: }
1760: PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1761: {
1762: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1763: PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1764: PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1765: PetscErrorCode ierr;
1766: PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1767: PetscBool roworiented=a->roworiented;
1768: const PetscScalar *value = v;
1769: MatScalar *ap,*aa = a->a,*bap;
1772: if (roworiented) {
1773: stepval = (n-1)*bs;
1774: } else {
1775: stepval = (m-1)*bs;
1776: }
1777: for (k=0; k<m; k++) { /* loop over added rows */
1778: row = im[k];
1779: if (row < 0) continue;
1780: #if defined(PETSC_USE_DEBUG)
1781: if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1782: #endif
1783: rp = aj + ai[row];
1784: ap = aa + bs2*ai[row];
1785: rmax = imax[row];
1786: nrow = ailen[row];
1787: low = 0;
1788: high = nrow;
1789: for (l=0; l<n; l++) { /* loop over added columns */
1790: if (in[l] < 0) continue;
1791: #if defined(PETSC_USE_DEBUG)
1792: if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1793: #endif
1794: col = in[l];
1795: if (roworiented) {
1796: value = v + (k*(stepval+bs) + l)*bs;
1797: } else {
1798: value = v + (l*(stepval+bs) + k)*bs;
1799: }
1800: if (col <= lastcol) low = 0;
1801: else high = nrow;
1802: lastcol = col;
1803: while (high-low > 7) {
1804: t = (low+high)/2;
1805: if (rp[t] > col) high = t;
1806: else low = t;
1807: }
1808: for (i=low; i<high; i++) {
1809: if (rp[i] > col) break;
1810: if (rp[i] == col) {
1811: bap = ap + bs2*i;
1812: if (roworiented) {
1813: if (is == ADD_VALUES) {
1814: for (ii=0; ii<bs; ii++,value+=stepval) {
1815: for (jj=ii; jj<bs2; jj+=bs) {
1816: bap[jj] += *value++;
1817: }
1818: }
1819: } else {
1820: for (ii=0; ii<bs; ii++,value+=stepval) {
1821: for (jj=ii; jj<bs2; jj+=bs) {
1822: bap[jj] = *value++;
1823: }
1824: }
1825: }
1826: } else {
1827: if (is == ADD_VALUES) {
1828: for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1829: for (jj=0; jj<bs; jj++) {
1830: bap[jj] += value[jj];
1831: }
1832: bap += bs;
1833: }
1834: } else {
1835: for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1836: for (jj=0; jj<bs; jj++) {
1837: bap[jj] = value[jj];
1838: }
1839: bap += bs;
1840: }
1841: }
1842: }
1843: goto noinsert2;
1844: }
1845: }
1846: if (nonew == 1) goto noinsert2;
1847: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1848: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1849: N = nrow++ - 1; high++;
1850: /* shift up all the later entries in this row */
1851: for (ii=N; ii>=i; ii--) {
1852: rp[ii+1] = rp[ii];
1853: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1854: }
1855: if (N >= i) {
1856: PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1857: }
1858: rp[i] = col;
1859: bap = ap + bs2*i;
1860: if (roworiented) {
1861: for (ii=0; ii<bs; ii++,value+=stepval) {
1862: for (jj=ii; jj<bs2; jj+=bs) {
1863: bap[jj] = *value++;
1864: }
1865: }
1866: } else {
1867: for (ii=0; ii<bs; ii++,value+=stepval) {
1868: for (jj=0; jj<bs; jj++) {
1869: *bap++ = *value++;
1870: }
1871: }
1872: }
1873: noinsert2:;
1874: low = i;
1875: }
1876: ailen[row] = nrow;
1877: }
1878: return(0);
1879: }
1883: PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1884: {
1885: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1886: PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1887: PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen;
1889: PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1890: MatScalar *aa = a->a,*ap;
1891: PetscReal ratio=0.6;
1894: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
1896: if (m) rmax = ailen[0];
1897: for (i=1; i<mbs; i++) {
1898: /* move each row back by the amount of empty slots (fshift) before it*/
1899: fshift += imax[i-1] - ailen[i-1];
1900: rmax = PetscMax(rmax,ailen[i]);
1901: if (fshift) {
1902: ip = aj + ai[i]; ap = aa + bs2*ai[i];
1903: N = ailen[i];
1904: for (j=0; j<N; j++) {
1905: ip[j-fshift] = ip[j];
1907: PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));
1908: }
1909: }
1910: ai[i] = ai[i-1] + ailen[i-1];
1911: }
1912: if (mbs) {
1913: fshift += imax[mbs-1] - ailen[mbs-1];
1914: ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1915: }
1917: /* reset ilen and imax for each row */
1918: a->nonzerorowcnt = 0;
1919: for (i=0; i<mbs; i++) {
1920: ailen[i] = imax[i] = ai[i+1] - ai[i];
1921: a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1922: }
1923: a->nz = ai[mbs];
1925: /* diagonals may have moved, so kill the diagonal pointers */
1926: a->idiagvalid = PETSC_FALSE;
1927: if (fshift && a->diag) {
1928: PetscFree(a->diag);
1929: PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));
1930: a->diag = 0;
1931: }
1932: if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1933: PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);
1934: PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);
1935: PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);
1937: A->info.mallocs += a->reallocs;
1938: a->reallocs = 0;
1939: A->info.nz_unneeded = (PetscReal)fshift*bs2;
1940: a->rmax = rmax;
1942: MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);
1943: return(0);
1944: }
1946: /*
1947: This function returns an array of flags which indicate the locations of contiguous
1948: blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9]
1949: then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1950: Assume: sizes should be long enough to hold all the values.
1951: */
1954: static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1955: {
1956: PetscInt i,j,k,row;
1957: PetscBool flg;
1960: for (i=0,j=0; i<n; j++) {
1961: row = idx[i];
1962: if (row%bs!=0) { /* Not the begining of a block */
1963: sizes[j] = 1;
1964: i++;
1965: } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1966: sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */
1967: i++;
1968: } else { /* Begining of the block, so check if the complete block exists */
1969: flg = PETSC_TRUE;
1970: for (k=1; k<bs; k++) {
1971: if (row+k != idx[i+k]) { /* break in the block */
1972: flg = PETSC_FALSE;
1973: break;
1974: }
1975: }
1976: if (flg) { /* No break in the bs */
1977: sizes[j] = bs;
1978: i += bs;
1979: } else {
1980: sizes[j] = 1;
1981: i++;
1982: }
1983: }
1984: }
1985: *bs_max = j;
1986: return(0);
1987: }
1991: PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
1992: {
1993: Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1994: PetscErrorCode ierr;
1995: PetscInt i,j,k,count,*rows;
1996: PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
1997: PetscScalar zero = 0.0;
1998: MatScalar *aa;
1999: const PetscScalar *xx;
2000: PetscScalar *bb;
2003: /* fix right hand side if needed */
2004: if (x && b) {
2005: VecGetArrayRead(x,&xx);
2006: VecGetArray(b,&bb);
2007: for (i=0; i<is_n; i++) {
2008: bb[is_idx[i]] = diag*xx[is_idx[i]];
2009: }
2010: VecRestoreArrayRead(x,&xx);
2011: VecRestoreArray(b,&bb);
2012: }
2014: /* Make a copy of the IS and sort it */
2015: /* allocate memory for rows,sizes */
2016: PetscMalloc2(is_n,&rows,2*is_n,&sizes);
2018: /* copy IS values to rows, and sort them */
2019: for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2020: PetscSortInt(is_n,rows);
2022: if (baij->keepnonzeropattern) {
2023: for (i=0; i<is_n; i++) sizes[i] = 1;
2024: bs_max = is_n;
2025: } else {
2026: MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);
2027: A->nonzerostate++;
2028: }
2030: for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2031: row = rows[j];
2032: if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2033: count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2034: aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2035: if (sizes[i] == bs && !baij->keepnonzeropattern) {
2036: if (diag != (PetscScalar)0.0) {
2037: if (baij->ilen[row/bs] > 0) {
2038: baij->ilen[row/bs] = 1;
2039: baij->j[baij->i[row/bs]] = row/bs;
2041: PetscMemzero(aa,count*bs*sizeof(MatScalar));
2042: }
2043: /* Now insert all the diagonal values for this bs */
2044: for (k=0; k<bs; k++) {
2045: (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);
2046: }
2047: } else { /* (diag == 0.0) */
2048: baij->ilen[row/bs] = 0;
2049: } /* end (diag == 0.0) */
2050: } else { /* (sizes[i] != bs) */
2051: #if defined(PETSC_USE_DEBUG)
2052: if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2053: #endif
2054: for (k=0; k<count; k++) {
2055: aa[0] = zero;
2056: aa += bs;
2057: }
2058: if (diag != (PetscScalar)0.0) {
2059: (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);
2060: }
2061: }
2062: }
2064: PetscFree2(rows,sizes);
2065: MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
2066: return(0);
2067: }
2071: PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2072: {
2073: Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
2074: PetscErrorCode ierr;
2075: PetscInt i,j,k,count;
2076: PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col;
2077: PetscScalar zero = 0.0;
2078: MatScalar *aa;
2079: const PetscScalar *xx;
2080: PetscScalar *bb;
2081: PetscBool *zeroed,vecs = PETSC_FALSE;
2084: /* fix right hand side if needed */
2085: if (x && b) {
2086: VecGetArrayRead(x,&xx);
2087: VecGetArray(b,&bb);
2088: vecs = PETSC_TRUE;
2089: }
2091: /* zero the columns */
2092: PetscCalloc1(A->rmap->n,&zeroed);
2093: for (i=0; i<is_n; i++) {
2094: if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2095: zeroed[is_idx[i]] = PETSC_TRUE;
2096: }
2097: for (i=0; i<A->rmap->N; i++) {
2098: if (!zeroed[i]) {
2099: row = i/bs;
2100: for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2101: for (k=0; k<bs; k++) {
2102: col = bs*baij->j[j] + k;
2103: if (zeroed[col]) {
2104: aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2105: if (vecs) bb[i] -= aa[0]*xx[col];
2106: aa[0] = 0.0;
2107: }
2108: }
2109: }
2110: } else if (vecs) bb[i] = diag*xx[i];
2111: }
2112: PetscFree(zeroed);
2113: if (vecs) {
2114: VecRestoreArrayRead(x,&xx);
2115: VecRestoreArray(b,&bb);
2116: }
2118: /* zero the rows */
2119: for (i=0; i<is_n; i++) {
2120: row = is_idx[i];
2121: count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2122: aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2123: for (k=0; k<count; k++) {
2124: aa[0] = zero;
2125: aa += bs;
2126: }
2127: if (diag != (PetscScalar)0.0) {
2128: (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);
2129: }
2130: }
2131: MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
2132: return(0);
2133: }
2137: PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2138: {
2139: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2140: PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2141: PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2142: PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2144: PetscInt ridx,cidx,bs2=a->bs2;
2145: PetscBool roworiented=a->roworiented;
2146: MatScalar *ap,value,*aa=a->a,*bap;
2149: for (k=0; k<m; k++) { /* loop over added rows */
2150: row = im[k];
2151: brow = row/bs;
2152: if (row < 0) continue;
2153: #if defined(PETSC_USE_DEBUG)
2154: if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2155: #endif
2156: rp = aj + ai[brow];
2157: ap = aa + bs2*ai[brow];
2158: rmax = imax[brow];
2159: nrow = ailen[brow];
2160: low = 0;
2161: high = nrow;
2162: for (l=0; l<n; l++) { /* loop over added columns */
2163: if (in[l] < 0) continue;
2164: #if defined(PETSC_USE_DEBUG)
2165: if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2166: #endif
2167: col = in[l]; bcol = col/bs;
2168: ridx = row % bs; cidx = col % bs;
2169: if (roworiented) {
2170: value = v[l + k*n];
2171: } else {
2172: value = v[k + l*m];
2173: }
2174: if (col <= lastcol) low = 0; else high = nrow;
2175: lastcol = col;
2176: while (high-low > 7) {
2177: t = (low+high)/2;
2178: if (rp[t] > bcol) high = t;
2179: else low = t;
2180: }
2181: for (i=low; i<high; i++) {
2182: if (rp[i] > bcol) break;
2183: if (rp[i] == bcol) {
2184: bap = ap + bs2*i + bs*cidx + ridx;
2185: if (is == ADD_VALUES) *bap += value;
2186: else *bap = value;
2187: goto noinsert1;
2188: }
2189: }
2190: if (nonew == 1) goto noinsert1;
2191: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2192: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2193: N = nrow++ - 1; high++;
2194: /* shift up all the later entries in this row */
2195: for (ii=N; ii>=i; ii--) {
2196: rp[ii+1] = rp[ii];
2197: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
2198: }
2199: if (N>=i) {
2200: PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
2201: }
2202: rp[i] = bcol;
2203: ap[bs2*i + bs*cidx + ridx] = value;
2204: a->nz++;
2205: A->nonzerostate++;
2206: noinsert1:;
2207: low = i;
2208: }
2209: ailen[brow] = nrow;
2210: }
2211: return(0);
2212: }
2216: PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2217: {
2218: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
2219: Mat outA;
2221: PetscBool row_identity,col_identity;
2224: if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2225: ISIdentity(row,&row_identity);
2226: ISIdentity(col,&col_identity);
2227: if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2229: outA = inA;
2230: inA->factortype = MAT_FACTOR_LU;
2232: MatMarkDiagonal_SeqBAIJ(inA);
2234: PetscObjectReference((PetscObject)row);
2235: ISDestroy(&a->row);
2236: a->row = row;
2237: PetscObjectReference((PetscObject)col);
2238: ISDestroy(&a->col);
2239: a->col = col;
2241: /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2242: ISDestroy(&a->icol);
2243: ISInvertPermutation(col,PETSC_DECIDE,&a->icol);
2244: PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);
2246: MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));
2247: if (!a->solve_work) {
2248: PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);
2249: PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));
2250: }
2251: MatLUFactorNumeric(outA,inA,info);
2252: return(0);
2253: }
2257: PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2258: {
2259: Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2260: PetscInt i,nz,mbs;
2263: nz = baij->maxnz;
2264: mbs = baij->mbs;
2265: for (i=0; i<nz; i++) {
2266: baij->j[i] = indices[i];
2267: }
2268: baij->nz = nz;
2269: for (i=0; i<mbs; i++) {
2270: baij->ilen[i] = baij->imax[i];
2271: }
2272: return(0);
2273: }
2277: /*@
2278: MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2279: in the matrix.
2281: Input Parameters:
2282: + mat - the SeqBAIJ matrix
2283: - indices - the column indices
2285: Level: advanced
2287: Notes:
2288: This can be called if you have precomputed the nonzero structure of the
2289: matrix and want to provide it to the matrix object to improve the performance
2290: of the MatSetValues() operation.
2292: You MUST have set the correct numbers of nonzeros per row in the call to
2293: MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2295: MUST be called before any calls to MatSetValues();
2297: @*/
2298: PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2299: {
2305: PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));
2306: return(0);
2307: }
2311: PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2312: {
2313: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2315: PetscInt i,j,n,row,bs,*ai,*aj,mbs;
2316: PetscReal atmp;
2317: PetscScalar *x,zero = 0.0;
2318: MatScalar *aa;
2319: PetscInt ncols,brow,krow,kcol;
2322: if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2323: bs = A->rmap->bs;
2324: aa = a->a;
2325: ai = a->i;
2326: aj = a->j;
2327: mbs = a->mbs;
2329: VecSet(v,zero);
2330: VecGetArray(v,&x);
2331: VecGetLocalSize(v,&n);
2332: if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2333: for (i=0; i<mbs; i++) {
2334: ncols = ai[1] - ai[0]; ai++;
2335: brow = bs*i;
2336: for (j=0; j<ncols; j++) {
2337: for (kcol=0; kcol<bs; kcol++) {
2338: for (krow=0; krow<bs; krow++) {
2339: atmp = PetscAbsScalar(*aa);aa++;
2340: row = brow + krow; /* row index */
2341: if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2342: }
2343: }
2344: aj++;
2345: }
2346: }
2347: VecRestoreArray(v,&x);
2348: return(0);
2349: }
2353: PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2354: {
2358: /* If the two matrices have the same copy implementation, use fast copy. */
2359: if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2360: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2361: Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2362: PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2364: if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2365: if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2366: PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));
2367: } else {
2368: MatCopy_Basic(A,B,str);
2369: }
2370: return(0);
2371: }
2375: PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2376: {
2380: MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);
2381: return(0);
2382: }
2386: PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2387: {
2388: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2391: *array = a->a;
2392: return(0);
2393: }
2397: PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2398: {
2400: return(0);
2401: }
2405: PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2406: {
2407: PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2408: Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data;
2409: Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data;
2413: /* Set the number of nonzeros in the new matrix */
2414: MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);
2415: return(0);
2416: }
2420: PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2421: {
2422: Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2424: PetscInt bs=Y->rmap->bs,bs2=bs*bs;
2425: PetscBLASInt one=1;
2428: if (str == SAME_NONZERO_PATTERN) {
2429: PetscScalar alpha = a;
2430: PetscBLASInt bnz;
2431: PetscBLASIntCast(x->nz*bs2,&bnz);
2432: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2433: PetscObjectStateIncrease((PetscObject)Y);
2434: } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2435: MatAXPY_Basic(Y,a,X,str);
2436: } else {
2437: Mat B;
2438: PetscInt *nnz;
2439: if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2440: PetscMalloc1(Y->rmap->N,&nnz);
2441: MatCreate(PetscObjectComm((PetscObject)Y),&B);
2442: PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);
2443: MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);
2444: MatSetBlockSizesFromMats(B,Y,Y);
2445: MatSetType(B,(MatType) ((PetscObject)Y)->type_name);
2446: MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);
2447: MatSeqBAIJSetPreallocation(B,bs,0,nnz);
2448: MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
2449: MatHeaderReplace(Y,B);
2450: PetscFree(nnz);
2451: }
2452: return(0);
2453: }
2457: PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2458: {
2459: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2460: PetscInt i,nz = a->bs2*a->i[a->mbs];
2461: MatScalar *aa = a->a;
2464: for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2465: return(0);
2466: }
2470: PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2471: {
2472: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2473: PetscInt i,nz = a->bs2*a->i[a->mbs];
2474: MatScalar *aa = a->a;
2477: for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2478: return(0);
2479: }
2483: /*
2484: Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2485: */
2486: PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done)
2487: {
2488: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2490: PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2491: PetscInt nz = a->i[m],row,*jj,mr,col;
2494: *nn = n;
2495: if (!ia) return(0);
2496: if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2497: else {
2498: PetscCalloc1(n+1,&collengths);
2499: PetscMalloc1(n+1,&cia);
2500: PetscMalloc1(nz+1,&cja);
2501: jj = a->j;
2502: for (i=0; i<nz; i++) {
2503: collengths[jj[i]]++;
2504: }
2505: cia[0] = oshift;
2506: for (i=0; i<n; i++) {
2507: cia[i+1] = cia[i] + collengths[i];
2508: }
2509: PetscMemzero(collengths,n*sizeof(PetscInt));
2510: jj = a->j;
2511: for (row=0; row<m; row++) {
2512: mr = a->i[row+1] - a->i[row];
2513: for (i=0; i<mr; i++) {
2514: col = *jj++;
2516: cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2517: }
2518: }
2519: PetscFree(collengths);
2520: *ia = cia; *ja = cja;
2521: }
2522: return(0);
2523: }
2527: PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done)
2528: {
2532: if (!ia) return(0);
2533: PetscFree(*ia);
2534: PetscFree(*ja);
2535: return(0);
2536: }
2538: /*
2539: MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2540: MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2541: spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2542: */
2545: PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done)
2546: {
2547: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2549: PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2550: PetscInt nz = a->i[m],row,*jj,mr,col;
2551: PetscInt *cspidx;
2554: *nn = n;
2555: if (!ia) return(0);
2557: PetscCalloc1(n+1,&collengths);
2558: PetscMalloc1(n+1,&cia);
2559: PetscMalloc1(nz+1,&cja);
2560: PetscMalloc1(nz+1,&cspidx);
2561: jj = a->j;
2562: for (i=0; i<nz; i++) {
2563: collengths[jj[i]]++;
2564: }
2565: cia[0] = oshift;
2566: for (i=0; i<n; i++) {
2567: cia[i+1] = cia[i] + collengths[i];
2568: }
2569: PetscMemzero(collengths,n*sizeof(PetscInt));
2570: jj = a->j;
2571: for (row=0; row<m; row++) {
2572: mr = a->i[row+1] - a->i[row];
2573: for (i=0; i<mr; i++) {
2574: col = *jj++;
2575: cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2576: cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2577: }
2578: }
2579: PetscFree(collengths);
2580: *ia = cia; *ja = cja;
2581: *spidx = cspidx;
2582: return(0);
2583: }
2587: PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done)
2588: {
2592: MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);
2593: PetscFree(*spidx);
2594: return(0);
2595: }
2599: PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2600: {
2602: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data;
2605: if (!aij->nz) {
2606: MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);
2607: }
2608: MatShift_Basic(Y,a);
2609: return(0);
2610: }
2612: /* -------------------------------------------------------------------*/
2613: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2614: MatGetRow_SeqBAIJ,
2615: MatRestoreRow_SeqBAIJ,
2616: MatMult_SeqBAIJ_N,
2617: /* 4*/ MatMultAdd_SeqBAIJ_N,
2618: MatMultTranspose_SeqBAIJ,
2619: MatMultTransposeAdd_SeqBAIJ,
2620: 0,
2621: 0,
2622: 0,
2623: /* 10*/ 0,
2624: MatLUFactor_SeqBAIJ,
2625: 0,
2626: 0,
2627: MatTranspose_SeqBAIJ,
2628: /* 15*/ MatGetInfo_SeqBAIJ,
2629: MatEqual_SeqBAIJ,
2630: MatGetDiagonal_SeqBAIJ,
2631: MatDiagonalScale_SeqBAIJ,
2632: MatNorm_SeqBAIJ,
2633: /* 20*/ 0,
2634: MatAssemblyEnd_SeqBAIJ,
2635: MatSetOption_SeqBAIJ,
2636: MatZeroEntries_SeqBAIJ,
2637: /* 24*/ MatZeroRows_SeqBAIJ,
2638: 0,
2639: 0,
2640: 0,
2641: 0,
2642: /* 29*/ MatSetUp_SeqBAIJ,
2643: 0,
2644: 0,
2645: 0,
2646: 0,
2647: /* 34*/ MatDuplicate_SeqBAIJ,
2648: 0,
2649: 0,
2650: MatILUFactor_SeqBAIJ,
2651: 0,
2652: /* 39*/ MatAXPY_SeqBAIJ,
2653: MatGetSubMatrices_SeqBAIJ,
2654: MatIncreaseOverlap_SeqBAIJ,
2655: MatGetValues_SeqBAIJ,
2656: MatCopy_SeqBAIJ,
2657: /* 44*/ 0,
2658: MatScale_SeqBAIJ,
2659: MatShift_SeqBAIJ,
2660: 0,
2661: MatZeroRowsColumns_SeqBAIJ,
2662: /* 49*/ 0,
2663: MatGetRowIJ_SeqBAIJ,
2664: MatRestoreRowIJ_SeqBAIJ,
2665: MatGetColumnIJ_SeqBAIJ,
2666: MatRestoreColumnIJ_SeqBAIJ,
2667: /* 54*/ MatFDColoringCreate_SeqXAIJ,
2668: 0,
2669: 0,
2670: 0,
2671: MatSetValuesBlocked_SeqBAIJ,
2672: /* 59*/ MatGetSubMatrix_SeqBAIJ,
2673: MatDestroy_SeqBAIJ,
2674: MatView_SeqBAIJ,
2675: 0,
2676: 0,
2677: /* 64*/ 0,
2678: 0,
2679: 0,
2680: 0,
2681: 0,
2682: /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2683: 0,
2684: MatConvert_Basic,
2685: 0,
2686: 0,
2687: /* 74*/ 0,
2688: MatFDColoringApply_BAIJ,
2689: 0,
2690: 0,
2691: 0,
2692: /* 79*/ 0,
2693: 0,
2694: 0,
2695: 0,
2696: MatLoad_SeqBAIJ,
2697: /* 84*/ 0,
2698: 0,
2699: 0,
2700: 0,
2701: 0,
2702: /* 89*/ 0,
2703: 0,
2704: 0,
2705: 0,
2706: 0,
2707: /* 94*/ 0,
2708: 0,
2709: 0,
2710: 0,
2711: 0,
2712: /* 99*/ 0,
2713: 0,
2714: 0,
2715: 0,
2716: 0,
2717: /*104*/ 0,
2718: MatRealPart_SeqBAIJ,
2719: MatImaginaryPart_SeqBAIJ,
2720: 0,
2721: 0,
2722: /*109*/ 0,
2723: 0,
2724: 0,
2725: 0,
2726: MatMissingDiagonal_SeqBAIJ,
2727: /*114*/ 0,
2728: 0,
2729: 0,
2730: 0,
2731: 0,
2732: /*119*/ 0,
2733: 0,
2734: MatMultHermitianTranspose_SeqBAIJ,
2735: MatMultHermitianTransposeAdd_SeqBAIJ,
2736: 0,
2737: /*124*/ 0,
2738: 0,
2739: MatInvertBlockDiagonal_SeqBAIJ,
2740: 0,
2741: 0,
2742: /*129*/ 0,
2743: 0,
2744: 0,
2745: 0,
2746: 0,
2747: /*134*/ 0,
2748: 0,
2749: 0,
2750: 0,
2751: 0,
2752: /*139*/ 0,
2753: 0,
2754: 0,
2755: MatFDColoringSetUp_SeqXAIJ,
2756: 0,
2757: /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ
2758: };
2762: PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
2763: {
2764: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data;
2765: PetscInt nz = aij->i[aij->mbs]*aij->bs2;
2769: if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2771: /* allocate space for values if not already there */
2772: if (!aij->saved_values) {
2773: PetscMalloc1(nz+1,&aij->saved_values);
2774: PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));
2775: }
2777: /* copy values over */
2778: PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));
2779: return(0);
2780: }
2784: PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
2785: {
2786: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data;
2788: PetscInt nz = aij->i[aij->mbs]*aij->bs2;
2791: if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2792: if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2794: /* copy values over */
2795: PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));
2796: return(0);
2797: }
2799: PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2800: PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2804: PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2805: {
2806: Mat_SeqBAIJ *b;
2808: PetscInt i,mbs,nbs,bs2;
2809: PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2812: if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2813: if (nz == MAT_SKIP_ALLOCATION) {
2814: skipallocation = PETSC_TRUE;
2815: nz = 0;
2816: }
2818: MatSetBlockSize(B,PetscAbs(bs));
2819: PetscLayoutSetUp(B->rmap);
2820: PetscLayoutSetUp(B->cmap);
2821: PetscLayoutGetBlockSize(B->rmap,&bs);
2823: B->preallocated = PETSC_TRUE;
2825: mbs = B->rmap->n/bs;
2826: nbs = B->cmap->n/bs;
2827: bs2 = bs*bs;
2829: if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2831: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2832: if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2833: if (nnz) {
2834: for (i=0; i<mbs; i++) {
2835: if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2836: if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2837: }
2838: }
2840: b = (Mat_SeqBAIJ*)B->data;
2841: PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");
2842: PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);
2843: PetscOptionsEnd();
2845: if (!flg) {
2846: switch (bs) {
2847: case 1:
2848: B->ops->mult = MatMult_SeqBAIJ_1;
2849: B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2850: break;
2851: case 2:
2852: B->ops->mult = MatMult_SeqBAIJ_2;
2853: B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2854: break;
2855: case 3:
2856: B->ops->mult = MatMult_SeqBAIJ_3;
2857: B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2858: break;
2859: case 4:
2860: B->ops->mult = MatMult_SeqBAIJ_4;
2861: B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2862: break;
2863: case 5:
2864: B->ops->mult = MatMult_SeqBAIJ_5;
2865: B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2866: break;
2867: case 6:
2868: B->ops->mult = MatMult_SeqBAIJ_6;
2869: B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2870: break;
2871: case 7:
2872: B->ops->mult = MatMult_SeqBAIJ_7;
2873: B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2874: break;
2875: case 15:
2876: B->ops->mult = MatMult_SeqBAIJ_15_ver1;
2877: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2878: break;
2879: default:
2880: B->ops->mult = MatMult_SeqBAIJ_N;
2881: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2882: break;
2883: }
2884: }
2885: B->ops->sor = MatSOR_SeqBAIJ;
2886: b->mbs = mbs;
2887: b->nbs = nbs;
2888: if (!skipallocation) {
2889: if (!b->imax) {
2890: PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);
2891: PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));
2893: b->free_imax_ilen = PETSC_TRUE;
2894: }
2895: /* b->ilen will count nonzeros in each block row so far. */
2896: for (i=0; i<mbs; i++) b->ilen[i] = 0;
2897: if (!nnz) {
2898: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2899: else if (nz < 0) nz = 1;
2900: for (i=0; i<mbs; i++) b->imax[i] = nz;
2901: nz = nz*mbs;
2902: } else {
2903: nz = 0;
2904: for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2905: }
2907: /* allocate the matrix space */
2908: MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);
2909: PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);
2910: PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));
2911: PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));
2912: PetscMemzero(b->j,nz*sizeof(PetscInt));
2914: b->singlemalloc = PETSC_TRUE;
2915: b->i[0] = 0;
2916: for (i=1; i<mbs+1; i++) {
2917: b->i[i] = b->i[i-1] + b->imax[i-1];
2918: }
2919: b->free_a = PETSC_TRUE;
2920: b->free_ij = PETSC_TRUE;
2921: } else {
2922: b->free_a = PETSC_FALSE;
2923: b->free_ij = PETSC_FALSE;
2924: }
2926: b->bs2 = bs2;
2927: b->mbs = mbs;
2928: b->nz = 0;
2929: b->maxnz = nz;
2930: B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2931: if (realalloc) {MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);}
2932: return(0);
2933: }
2937: PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2938: {
2939: PetscInt i,m,nz,nz_max=0,*nnz;
2940: PetscScalar *values=0;
2941: PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2945: if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2946: PetscLayoutSetBlockSize(B->rmap,bs);
2947: PetscLayoutSetBlockSize(B->cmap,bs);
2948: PetscLayoutSetUp(B->rmap);
2949: PetscLayoutSetUp(B->cmap);
2950: PetscLayoutGetBlockSize(B->rmap,&bs);
2951: m = B->rmap->n/bs;
2953: if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2954: PetscMalloc1(m+1, &nnz);
2955: for (i=0; i<m; i++) {
2956: nz = ii[i+1]- ii[i];
2957: if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2958: nz_max = PetscMax(nz_max, nz);
2959: nnz[i] = nz;
2960: }
2961: MatSeqBAIJSetPreallocation(B,bs,0,nnz);
2962: PetscFree(nnz);
2964: values = (PetscScalar*)V;
2965: if (!values) {
2966: PetscCalloc1(bs*bs*(nz_max+1),&values);
2967: }
2968: for (i=0; i<m; i++) {
2969: PetscInt ncols = ii[i+1] - ii[i];
2970: const PetscInt *icols = jj + ii[i];
2971: const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2972: if (!roworiented) {
2973: MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);
2974: } else {
2975: PetscInt j;
2976: for (j=0; j<ncols; j++) {
2977: const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2978: MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);
2979: }
2980: }
2981: }
2982: if (!V) { PetscFree(values); }
2983: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2984: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2985: MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
2986: return(0);
2987: }
2989: /*MC
2990: MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2991: block sparse compressed row format.
2993: Options Database Keys:
2994: . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2996: Level: beginner
2998: .seealso: MatCreateSeqBAIJ()
2999: M*/
3001: PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3005: PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3006: {
3008: PetscMPIInt size;
3009: Mat_SeqBAIJ *b;
3012: MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);
3013: if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3015: PetscNewLog(B,&b);
3016: B->data = (void*)b;
3017: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
3019: b->row = 0;
3020: b->col = 0;
3021: b->icol = 0;
3022: b->reallocs = 0;
3023: b->saved_values = 0;
3025: b->roworiented = PETSC_TRUE;
3026: b->nonew = 0;
3027: b->diag = 0;
3028: B->spptr = 0;
3029: B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2;
3030: b->keepnonzeropattern = PETSC_FALSE;
3032: PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);
3033: PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);
3034: PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);
3035: PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);
3036: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);
3037: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);
3038: PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);
3039: PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);
3040: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",MatConvert_SeqBAIJ_SeqBSTRM);
3041: PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);
3042: PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);
3043: return(0);
3044: }
3048: PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3049: {
3050: Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3052: PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3055: if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3057: if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3058: c->imax = a->imax;
3059: c->ilen = a->ilen;
3060: c->free_imax_ilen = PETSC_FALSE;
3061: } else {
3062: PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);
3063: PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));
3064: for (i=0; i<mbs; i++) {
3065: c->imax[i] = a->imax[i];
3066: c->ilen[i] = a->ilen[i];
3067: }
3068: c->free_imax_ilen = PETSC_TRUE;
3069: }
3071: /* allocate the matrix space */
3072: if (mallocmatspace) {
3073: if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3074: PetscCalloc1(bs2*nz,&c->a);
3075: PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));
3077: c->i = a->i;
3078: c->j = a->j;
3079: c->singlemalloc = PETSC_FALSE;
3080: c->free_a = PETSC_TRUE;
3081: c->free_ij = PETSC_FALSE;
3082: c->parent = A;
3083: C->preallocated = PETSC_TRUE;
3084: C->assembled = PETSC_TRUE;
3086: PetscObjectReference((PetscObject)A);
3087: MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3088: MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3089: } else {
3090: PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);
3091: PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));
3093: c->singlemalloc = PETSC_TRUE;
3094: c->free_a = PETSC_TRUE;
3095: c->free_ij = PETSC_TRUE;
3097: PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));
3098: if (mbs > 0) {
3099: PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));
3100: if (cpvalues == MAT_COPY_VALUES) {
3101: PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));
3102: } else {
3103: PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));
3104: }
3105: }
3106: C->preallocated = PETSC_TRUE;
3107: C->assembled = PETSC_TRUE;
3108: }
3109: }
3111: c->roworiented = a->roworiented;
3112: c->nonew = a->nonew;
3114: PetscLayoutReference(A->rmap,&C->rmap);
3115: PetscLayoutReference(A->cmap,&C->cmap);
3117: c->bs2 = a->bs2;
3118: c->mbs = a->mbs;
3119: c->nbs = a->nbs;
3121: if (a->diag) {
3122: if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3123: c->diag = a->diag;
3124: c->free_diag = PETSC_FALSE;
3125: } else {
3126: PetscMalloc1(mbs+1,&c->diag);
3127: PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));
3128: for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3129: c->free_diag = PETSC_TRUE;
3130: }
3131: } else c->diag = 0;
3133: c->nz = a->nz;
3134: c->maxnz = a->nz; /* Since we allocate exactly the right amount */
3135: c->solve_work = NULL;
3136: c->mult_work = NULL;
3137: c->sor_workt = NULL;
3138: c->sor_work = NULL;
3140: c->compressedrow.use = a->compressedrow.use;
3141: c->compressedrow.nrows = a->compressedrow.nrows;
3142: if (a->compressedrow.use) {
3143: i = a->compressedrow.nrows;
3144: PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);
3145: PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));
3146: PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));
3147: PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));
3148: } else {
3149: c->compressedrow.use = PETSC_FALSE;
3150: c->compressedrow.i = NULL;
3151: c->compressedrow.rindex = NULL;
3152: }
3153: C->nonzerostate = A->nonzerostate;
3155: PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);
3156: PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));
3157: return(0);
3158: }
3162: PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3163: {
3167: MatCreate(PetscObjectComm((PetscObject)A),B);
3168: MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);
3169: MatSetType(*B,MATSEQBAIJ);
3170: MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);
3171: return(0);
3172: }
3176: PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3177: {
3178: Mat_SeqBAIJ *a;
3180: PetscInt i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs;
3181: PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3182: PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3183: PetscInt *masked,nmask,tmp,bs2,ishift;
3184: PetscMPIInt size;
3185: int fd;
3186: PetscScalar *aa;
3187: MPI_Comm comm;
3190: /* force binary viewer to load .info file if it has not yet done so */
3191: PetscViewerSetUp(viewer);
3192: PetscObjectGetComm((PetscObject)viewer,&comm);
3193: PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");
3194: PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);
3195: PetscOptionsEnd();
3196: if (bs < 0) bs = 1;
3197: bs2 = bs*bs;
3199: MPI_Comm_size(comm,&size);
3200: if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3201: PetscViewerBinaryGetDescriptor(viewer,&fd);
3202: PetscBinaryRead(fd,header,4,PETSC_INT);
3203: if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3204: M = header[1]; N = header[2]; nz = header[3];
3206: if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3207: if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3209: /*
3210: This code adds extra rows to make sure the number of rows is
3211: divisible by the blocksize
3212: */
3213: mbs = M/bs;
3214: extra_rows = bs - M + bs*(mbs);
3215: if (extra_rows == bs) extra_rows = 0;
3216: else mbs++;
3217: if (extra_rows) {
3218: PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");
3219: }
3221: /* Set global sizes if not already set */
3222: if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3223: MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
3224: } else { /* Check if the matrix global sizes are correct */
3225: MatGetSize(newmat,&rows,&cols);
3226: if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3227: MatGetLocalSize(newmat,&rows,&cols);
3228: }
3229: if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3230: }
3232: /* read in row lengths */
3233: PetscMalloc1(M+extra_rows,&rowlengths);
3234: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
3235: for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3237: /* read in column indices */
3238: PetscMalloc1(nz+extra_rows,&jj);
3239: PetscBinaryRead(fd,jj,nz,PETSC_INT);
3240: for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3242: /* loop over row lengths determining block row lengths */
3243: PetscCalloc1(mbs,&browlengths);
3244: PetscMalloc2(mbs,&mask,mbs,&masked);
3245: PetscMemzero(mask,mbs*sizeof(PetscInt));
3246: rowcount = 0;
3247: nzcount = 0;
3248: for (i=0; i<mbs; i++) {
3249: nmask = 0;
3250: for (j=0; j<bs; j++) {
3251: kmax = rowlengths[rowcount];
3252: for (k=0; k<kmax; k++) {
3253: tmp = jj[nzcount++]/bs;
3254: if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3255: }
3256: rowcount++;
3257: }
3258: browlengths[i] += nmask;
3259: /* zero out the mask elements we set */
3260: for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3261: }
3263: /* Do preallocation */
3264: MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);
3265: a = (Mat_SeqBAIJ*)newmat->data;
3267: /* set matrix "i" values */
3268: a->i[0] = 0;
3269: for (i=1; i<= mbs; i++) {
3270: a->i[i] = a->i[i-1] + browlengths[i-1];
3271: a->ilen[i-1] = browlengths[i-1];
3272: }
3273: a->nz = 0;
3274: for (i=0; i<mbs; i++) a->nz += browlengths[i];
3276: /* read in nonzero values */
3277: PetscMalloc1(nz+extra_rows,&aa);
3278: PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);
3279: for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3281: /* set "a" and "j" values into matrix */
3282: nzcount = 0; jcount = 0;
3283: for (i=0; i<mbs; i++) {
3284: nzcountb = nzcount;
3285: nmask = 0;
3286: for (j=0; j<bs; j++) {
3287: kmax = rowlengths[i*bs+j];
3288: for (k=0; k<kmax; k++) {
3289: tmp = jj[nzcount++]/bs;
3290: if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3291: }
3292: }
3293: /* sort the masked values */
3294: PetscSortInt(nmask,masked);
3296: /* set "j" values into matrix */
3297: maskcount = 1;
3298: for (j=0; j<nmask; j++) {
3299: a->j[jcount++] = masked[j];
3300: mask[masked[j]] = maskcount++;
3301: }
3302: /* set "a" values into matrix */
3303: ishift = bs2*a->i[i];
3304: for (j=0; j<bs; j++) {
3305: kmax = rowlengths[i*bs+j];
3306: for (k=0; k<kmax; k++) {
3307: tmp = jj[nzcountb]/bs;
3308: block = mask[tmp] - 1;
3309: point = jj[nzcountb] - bs*tmp;
3310: idx = ishift + bs2*block + j + bs*point;
3311: a->a[idx] = (MatScalar)aa[nzcountb++];
3312: }
3313: }
3314: /* zero out the mask elements we set */
3315: for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3316: }
3317: if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3319: PetscFree(rowlengths);
3320: PetscFree(browlengths);
3321: PetscFree(aa);
3322: PetscFree(jj);
3323: PetscFree2(mask,masked);
3325: MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
3326: MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
3327: return(0);
3328: }
3332: /*@C
3333: MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3334: compressed row) format. For good matrix assembly performance the
3335: user should preallocate the matrix storage by setting the parameter nz
3336: (or the array nnz). By setting these parameters accurately, performance
3337: during matrix assembly can be increased by more than a factor of 50.
3339: Collective on MPI_Comm
3341: Input Parameters:
3342: + comm - MPI communicator, set to PETSC_COMM_SELF
3343: . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3344: blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3345: . m - number of rows
3346: . n - number of columns
3347: . nz - number of nonzero blocks per block row (same for all rows)
3348: - nnz - array containing the number of nonzero blocks in the various block rows
3349: (possibly different for each block row) or NULL
3351: Output Parameter:
3352: . A - the matrix
3354: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3355: MatXXXXSetPreallocation() paradgm instead of this routine directly.
3356: [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3358: Options Database Keys:
3359: . -mat_no_unroll - uses code that does not unroll the loops in the
3360: block calculations (much slower)
3361: . -mat_block_size - size of the blocks to use
3363: Level: intermediate
3365: Notes:
3366: The number of rows and columns must be divisible by blocksize.
3368: If the nnz parameter is given then the nz parameter is ignored
3370: A nonzero block is any block that as 1 or more nonzeros in it
3372: The block AIJ format is fully compatible with standard Fortran 77
3373: storage. That is, the stored row and column indices can begin at
3374: either one (as in Fortran) or zero. See the users' manual for details.
3376: Specify the preallocated storage with either nz or nnz (not both).
3377: Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3378: allocation. See Users-Manual: ch_mat for details.
3379: matrices.
3381: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3382: @*/
3383: PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3384: {
3388: MatCreate(comm,A);
3389: MatSetSizes(*A,m,n,m,n);
3390: MatSetType(*A,MATSEQBAIJ);
3391: MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);
3392: return(0);
3393: }
3397: /*@C
3398: MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3399: per row in the matrix. For good matrix assembly performance the
3400: user should preallocate the matrix storage by setting the parameter nz
3401: (or the array nnz). By setting these parameters accurately, performance
3402: during matrix assembly can be increased by more than a factor of 50.
3404: Collective on MPI_Comm
3406: Input Parameters:
3407: + B - the matrix
3408: . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3409: blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3410: . nz - number of block nonzeros per block row (same for all rows)
3411: - nnz - array containing the number of block nonzeros in the various block rows
3412: (possibly different for each block row) or NULL
3414: Options Database Keys:
3415: . -mat_no_unroll - uses code that does not unroll the loops in the
3416: block calculations (much slower)
3417: . -mat_block_size - size of the blocks to use
3419: Level: intermediate
3421: Notes:
3422: If the nnz parameter is given then the nz parameter is ignored
3424: You can call MatGetInfo() to get information on how effective the preallocation was;
3425: for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3426: You can also run with the option -info and look for messages with the string
3427: malloc in them to see if additional memory allocation was needed.
3429: The block AIJ format is fully compatible with standard Fortran 77
3430: storage. That is, the stored row and column indices can begin at
3431: either one (as in Fortran) or zero. See the users' manual for details.
3433: Specify the preallocated storage with either nz or nnz (not both).
3434: Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3435: allocation. See Users-Manual: ch_mat for details.
3437: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3438: @*/
3439: PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3440: {
3447: PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));
3448: return(0);
3449: }
3453: /*@C
3454: MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3455: (the default sequential PETSc format).
3457: Collective on MPI_Comm
3459: Input Parameters:
3460: + B - the matrix
3461: . i - the indices into j for the start of each local row (starts with zero)
3462: . j - the column indices for each local row (starts with zero) these must be sorted for each row
3463: - v - optional values in the matrix
3465: Level: developer
3467: Notes:
3468: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs
3469: may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3470: over rows within a block and the last index is over columns within a block row. Fortran programs will likely set
3471: MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3472: block column and the second index is over columns within a block.
3474: .keywords: matrix, aij, compressed row, sparse
3476: .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3477: @*/
3478: PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3479: {
3486: PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));
3487: return(0);
3488: }
3493: /*@
3494: MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3496: Collective on MPI_Comm
3498: Input Parameters:
3499: + comm - must be an MPI communicator of size 1
3500: . bs - size of block
3501: . m - number of rows
3502: . n - number of columns
3503: . i - row indices
3504: . j - column indices
3505: - a - matrix values
3507: Output Parameter:
3508: . mat - the matrix
3510: Level: advanced
3512: Notes:
3513: The i, j, and a arrays are not copied by this routine, the user must free these arrays
3514: once the matrix is destroyed
3516: You cannot set new nonzero locations into this matrix, that will generate an error.
3518: The i and j indices are 0 based
3520: When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3522: The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3523: the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3524: block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory
3525: with column-major ordering within blocks.
3527: .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3529: @*/
3530: PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat)
3531: {
3533: PetscInt ii;
3534: Mat_SeqBAIJ *baij;
3537: if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3538: if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3540: MatCreate(comm,mat);
3541: MatSetSizes(*mat,m,n,m,n);
3542: MatSetType(*mat,MATSEQBAIJ);
3543: MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);
3544: baij = (Mat_SeqBAIJ*)(*mat)->data;
3545: PetscMalloc2(m,&baij->imax,m,&baij->ilen);
3546: PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));
3548: baij->i = i;
3549: baij->j = j;
3550: baij->a = a;
3552: baij->singlemalloc = PETSC_FALSE;
3553: baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3554: baij->free_a = PETSC_FALSE;
3555: baij->free_ij = PETSC_FALSE;
3557: for (ii=0; ii<m; ii++) {
3558: baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3559: #if defined(PETSC_USE_DEBUG)
3560: if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3561: #endif
3562: }
3563: #if defined(PETSC_USE_DEBUG)
3564: for (ii=0; ii<baij->i[m]; ii++) {
3565: if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3566: if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3567: }
3568: #endif
3570: MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
3571: MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
3572: return(0);
3573: }
3577: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3578: {
3582: MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);
3583: return(0);
3584: }