Actual source code: baij.c
petsc-3.3-p7 2013-05-11
2: /*
3: Defines the basic matrix operations for the BAIJ (compressed row)
4: matrix storage format.
5: */
6: #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/
7: #include <petscblaslapack.h>
8: #include <../src/mat/blockinvert.h>
13: PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
14: {
15: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data;
17: PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18: MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19: PetscReal shift = 0.0;
22: if (a->idiagvalid) {
23: if (values)*values = a->idiag;
24: return(0);
25: }
26: MatMarkDiagonal_SeqBAIJ(A);
27: diag_offset = a->diag;
28: if (!a->idiag) {
29: PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);
30: PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));
31: }
32: diag = a->idiag;
33: mdiag = a->idiag+bs2*mbs;
34: if (values) *values = a->idiag;
35: /* factor and invert each block */
36: switch (bs){
37: case 1:
38: for (i=0; i<mbs; i++) {
39: odiag = v + 1*diag_offset[i];
40: diag[0] = odiag[0];
41: mdiag[0] = odiag[0];
42: diag[0] = (PetscScalar)1.0 / (diag[0] + shift);
43: diag += 1;
44: mdiag += 1;
45: }
46: break;
47: case 2:
48: for (i=0; i<mbs; i++) {
49: odiag = v + 4*diag_offset[i];
50: diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
51: mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
52: PetscKernel_A_gets_inverse_A_2(diag,shift);
53: diag += 4;
54: mdiag += 4;
55: }
56: break;
57: case 3:
58: for (i=0; i<mbs; i++) {
59: odiag = v + 9*diag_offset[i];
60: diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
61: diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
62: diag[8] = odiag[8];
63: mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
64: mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
65: mdiag[8] = odiag[8];
66: PetscKernel_A_gets_inverse_A_3(diag,shift);
67: diag += 9;
68: mdiag += 9;
69: }
70: break;
71: case 4:
72: for (i=0; i<mbs; i++) {
73: odiag = v + 16*diag_offset[i];
74: PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));
75: PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));
76: PetscKernel_A_gets_inverse_A_4(diag,shift);
77: diag += 16;
78: mdiag += 16;
79: }
80: break;
81: case 5:
82: for (i=0; i<mbs; i++) {
83: odiag = v + 25*diag_offset[i];
84: PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));
85: PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));
86: PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);
87: diag += 25;
88: mdiag += 25;
89: }
90: break;
91: case 6:
92: for (i=0; i<mbs; i++) {
93: odiag = v + 36*diag_offset[i];
94: PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));
95: PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));
96: PetscKernel_A_gets_inverse_A_6(diag,shift);
97: diag += 36;
98: mdiag += 36;
99: }
100: break;
101: case 7:
102: for (i=0; i<mbs; i++) {
103: odiag = v + 49*diag_offset[i];
104: PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));
105: PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));
106: PetscKernel_A_gets_inverse_A_7(diag,shift);
107: diag += 49;
108: mdiag += 49;
109: }
110: break;
111: default:
112: PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);
113: for (i=0; i<mbs; i++) {
114: odiag = v + bs2*diag_offset[i];
115: PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));
116: PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));
117: PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);
118: diag += bs2;
119: mdiag += bs2;
120: }
121: PetscFree2(v_work,v_pivots);
122: }
123: a->idiagvalid = PETSC_TRUE;
124: return(0);
125: }
129: PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
130: {
131: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
132: PetscScalar *x,x1,s1;
133: const PetscScalar *b;
134: const MatScalar *aa = a->a, *idiag,*mdiag,*v;
135: PetscErrorCode ierr;
136: PetscInt m = a->mbs,i,i2,nz,j;
137: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
140: if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
141: its = its*lits;
142: if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
143: if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
144: if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
145: if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
146: if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
148: if (!a->idiagvalid){MatInvertBlockDiagonal(A,PETSC_NULL);}
150: diag = a->diag;
151: idiag = a->idiag;
152: VecGetArray(xx,&x);
153: VecGetArrayRead(bb,&b);
155: if (flag & SOR_ZERO_INITIAL_GUESS) {
156: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
157: x[0] = b[0]*idiag[0];
158: i2 = 1;
159: idiag += 1;
160: for (i=1; i<m; i++) {
161: v = aa + ai[i];
162: vi = aj + ai[i];
163: nz = diag[i] - ai[i];
164: s1 = b[i2];
165: for (j=0; j<nz; j++) {
166: s1 -= v[j]*x[vi[j]];
167: }
168: x[i2] = idiag[0]*s1;
169: idiag += 1;
170: i2 += 1;
171: }
172: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
173: PetscLogFlops(a->nz);
174: }
175: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
176: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
177: i2 = 0;
178: mdiag = a->idiag+a->mbs;
179: for (i=0; i<m; i++) {
180: x1 = x[i2];
181: x[i2] = mdiag[0]*x1;
182: mdiag += 1;
183: i2 += 1;
184: }
185: PetscLogFlops(m);
186: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
187: PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
188: }
189: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
190: idiag = a->idiag+a->mbs - 1;
191: i2 = m - 1;
192: x1 = x[i2];
193: x[i2] = idiag[0]*x1;
194: idiag -= 1;
195: i2 -= 1;
196: for (i=m-2; i>=0; i--) {
197: v = aa + (diag[i]+1);
198: vi = aj + diag[i] + 1;
199: nz = ai[i+1] - diag[i] - 1;
200: s1 = x[i2];
201: for (j=0; j<nz; j++) {
202: s1 -= v[j]*x[vi[j]];
203: }
204: x[i2] = idiag[0]*s1;
205: idiag -= 1;
206: i2 -= 1;
207: }
208: PetscLogFlops(a->nz);
209: }
210: } else {
211: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
212: }
213: VecRestoreArray(xx,&x);
214: VecRestoreArrayRead(bb,&b);
215: return(0);
216: }
220: PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
221: {
222: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
223: PetscScalar *x,x1,x2,s1,s2;
224: const PetscScalar *b;
225: const MatScalar *v,*aa = a->a, *idiag,*mdiag;
226: PetscErrorCode ierr;
227: PetscInt m = a->mbs,i,i2,nz,idx,j,it;
228: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
231: if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
232: its = its*lits;
233: if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
234: if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
235: if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
236: if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
237: if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
239: if (!a->idiagvalid){MatInvertBlockDiagonal(A,PETSC_NULL);}
241: diag = a->diag;
242: idiag = a->idiag;
243: VecGetArray(xx,&x);
244: VecGetArrayRead(bb,&b);
246: if (flag & SOR_ZERO_INITIAL_GUESS) {
247: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
248: x[0] = b[0]*idiag[0] + b[1]*idiag[2];
249: x[1] = b[0]*idiag[1] + b[1]*idiag[3];
250: i2 = 2;
251: idiag += 4;
252: for (i=1; i<m; i++) {
253: v = aa + 4*ai[i];
254: vi = aj + ai[i];
255: nz = diag[i] - ai[i];
256: s1 = b[i2]; s2 = b[i2+1];
257: for (j=0; j<nz; j++) {
258: idx = 2*vi[j];
259: it = 4*j;
260: x1 = x[idx]; x2 = x[1+idx];
261: s1 -= v[it]*x1 + v[it+2]*x2;
262: s2 -= v[it+1]*x1 + v[it+3]*x2;
263: }
264: x[i2] = idiag[0]*s1 + idiag[2]*s2;
265: x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
266: idiag += 4;
267: i2 += 2;
268: }
269: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
270: PetscLogFlops(4.0*(a->nz));
271: }
272: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
273: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
274: i2 = 0;
275: mdiag = a->idiag+4*a->mbs;
276: for (i=0; i<m; i++) {
277: x1 = x[i2]; x2 = x[i2+1];
278: x[i2] = mdiag[0]*x1 + mdiag[2]*x2;
279: x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
280: mdiag += 4;
281: i2 += 2;
282: }
283: PetscLogFlops(6.0*m);
284: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
285: PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
286: }
287: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
288: idiag = a->idiag+4*a->mbs - 4;
289: i2 = 2*m - 2;
290: x1 = x[i2]; x2 = x[i2+1];
291: x[i2] = idiag[0]*x1 + idiag[2]*x2;
292: x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
293: idiag -= 4;
294: i2 -= 2;
295: for (i=m-2; i>=0; i--) {
296: v = aa + 4*(diag[i]+1);
297: vi = aj + diag[i] + 1;
298: nz = ai[i+1] - diag[i] - 1;
299: s1 = x[i2]; s2 = x[i2+1];
300: for (j=0; j<nz; j++) {
301: idx = 2*vi[j];
302: it = 4*j;
303: x1 = x[idx]; x2 = x[1+idx];
304: s1 -= v[it]*x1 + v[it+2]*x2;
305: s2 -= v[it+1]*x1 + v[it+3]*x2;
306: }
307: x[i2] = idiag[0]*s1 + idiag[2]*s2;
308: x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
309: idiag -= 4;
310: i2 -= 2;
311: }
312: PetscLogFlops(4.0*(a->nz));
313: }
314: } else {
315: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
316: }
317: VecRestoreArray(xx,&x);
318: VecRestoreArrayRead(bb,&b);
319: return(0);
320: }
324: PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
325: {
326: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
327: PetscScalar *x,x1,x2,x3,s1,s2,s3;
328: const MatScalar *v,*aa = a->a, *idiag,*mdiag;
329: const PetscScalar *b;
330: PetscErrorCode ierr;
331: PetscInt m = a->mbs,i,i2,nz,idx;
332: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
335: its = its*lits;
336: if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
337: if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
338: if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
339: if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
340: if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
341: if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
343: if (!a->idiagvalid){MatInvertBlockDiagonal(A,PETSC_NULL);}
345: diag = a->diag;
346: idiag = a->idiag;
347: VecGetArray(xx,&x);
348: VecGetArrayRead(bb,&b);
350: if (flag & SOR_ZERO_INITIAL_GUESS) {
351: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
352: x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
353: x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
354: x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
355: i2 = 3;
356: idiag += 9;
357: for (i=1; i<m; i++) {
358: v = aa + 9*ai[i];
359: vi = aj + ai[i];
360: nz = diag[i] - ai[i];
361: s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
362: while (nz--) {
363: idx = 3*(*vi++);
364: x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
365: s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
366: s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
367: s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
368: v += 9;
369: }
370: x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
371: x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
372: x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
373: idiag += 9;
374: i2 += 3;
375: }
376: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
377: PetscLogFlops(9.0*(a->nz));
378: }
379: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
380: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
381: i2 = 0;
382: mdiag = a->idiag+9*a->mbs;
383: for (i=0; i<m; i++) {
384: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
385: x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
386: x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
387: x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
388: mdiag += 9;
389: i2 += 3;
390: }
391: PetscLogFlops(15.0*m);
392: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
393: PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
394: }
395: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
396: idiag = a->idiag+9*a->mbs - 9;
397: i2 = 3*m - 3;
398: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
399: x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
400: x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
401: x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
402: idiag -= 9;
403: i2 -= 3;
404: for (i=m-2; i>=0; i--) {
405: v = aa + 9*(diag[i]+1);
406: vi = aj + diag[i] + 1;
407: nz = ai[i+1] - diag[i] - 1;
408: s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
409: while (nz--) {
410: idx = 3*(*vi++);
411: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
412: s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
413: s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
414: s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
415: v += 9;
416: }
417: x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
418: x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
419: x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
420: idiag -= 9;
421: i2 -= 3;
422: }
423: PetscLogFlops(9.0*(a->nz));
424: }
425: } else {
426: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
427: }
428: VecRestoreArray(xx,&x);
429: VecRestoreArrayRead(bb,&b);
430: return(0);
431: }
435: PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
436: {
437: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
438: PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4;
439: const MatScalar *v,*aa = a->a, *idiag,*mdiag;
440: const PetscScalar *b;
441: PetscErrorCode ierr;
442: PetscInt m = a->mbs,i,i2,nz,idx;
443: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
446: if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
447: its = its*lits;
448: if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
449: if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
450: if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
451: if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
452: if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
454: if (!a->idiagvalid){MatInvertBlockDiagonal(A,PETSC_NULL);}
456: diag = a->diag;
457: idiag = a->idiag;
458: VecGetArray(xx,&x);
459: VecGetArrayRead(bb,&b);
461: if (flag & SOR_ZERO_INITIAL_GUESS) {
462: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
463: x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12];
464: x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13];
465: x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
466: x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
467: i2 = 4;
468: idiag += 16;
469: for (i=1; i<m; i++) {
470: v = aa + 16*ai[i];
471: vi = aj + ai[i];
472: nz = diag[i] - ai[i];
473: s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
474: while (nz--) {
475: idx = 4*(*vi++);
476: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
477: s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4;
478: s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4;
479: s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
480: s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
481: v += 16;
482: }
483: x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4;
484: x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4;
485: x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
486: x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
487: idiag += 16;
488: i2 += 4;
489: }
490: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
491: PetscLogFlops(16.0*(a->nz));
492: }
493: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
494: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
495: i2 = 0;
496: mdiag = a->idiag+16*a->mbs;
497: for (i=0; i<m; i++) {
498: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
499: x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4;
500: x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4;
501: x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
502: x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
503: mdiag += 16;
504: i2 += 4;
505: }
506: PetscLogFlops(28.0*m);
507: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
508: PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
509: }
510: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
511: idiag = a->idiag+16*a->mbs - 16;
512: i2 = 4*m - 4;
513: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
514: x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4;
515: x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4;
516: x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
517: x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
518: idiag -= 16;
519: i2 -= 4;
520: for (i=m-2; i>=0; i--) {
521: v = aa + 16*(diag[i]+1);
522: vi = aj + diag[i] + 1;
523: nz = ai[i+1] - diag[i] - 1;
524: s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
525: while (nz--) {
526: idx = 4*(*vi++);
527: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
528: s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4;
529: s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4;
530: s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
531: s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
532: v += 16;
533: }
534: x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4;
535: x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4;
536: x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
537: x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
538: idiag -= 16;
539: i2 -= 4;
540: }
541: PetscLogFlops(16.0*(a->nz));
542: }
543: } else {
544: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
545: }
546: VecRestoreArray(xx,&x);
547: VecRestoreArrayRead(bb,&b);
548: return(0);
549: }
553: PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
554: {
555: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
556: PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
557: const MatScalar *v,*aa = a->a, *idiag,*mdiag;
558: const PetscScalar *b;
559: PetscErrorCode ierr;
560: PetscInt m = a->mbs,i,i2,nz,idx;
561: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
564: if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
565: its = its*lits;
566: if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
567: if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
568: if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
569: if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
570: if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
572: if (!a->idiagvalid){MatInvertBlockDiagonal(A,PETSC_NULL);}
574: diag = a->diag;
575: idiag = a->idiag;
576: VecGetArray(xx,&x);
577: VecGetArrayRead(bb,&b);
579: if (flag & SOR_ZERO_INITIAL_GUESS) {
580: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
581: x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
582: x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
583: x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
584: x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
585: x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
586: i2 = 5;
587: idiag += 25;
588: for (i=1; i<m; i++) {
589: v = aa + 25*ai[i];
590: vi = aj + ai[i];
591: nz = diag[i] - ai[i];
592: s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
593: while (nz--) {
594: idx = 5*(*vi++);
595: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
596: s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
597: s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
598: s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
599: s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
600: s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
601: v += 25;
602: }
603: x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
604: x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
605: x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
606: x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
607: x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
608: idiag += 25;
609: i2 += 5;
610: }
611: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
612: PetscLogFlops(25.0*(a->nz));
613: }
614: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
615: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
616: i2 = 0;
617: mdiag = a->idiag+25*a->mbs;
618: for (i=0; i<m; i++) {
619: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
620: x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
621: x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
622: x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
623: x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
624: x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
625: mdiag += 25;
626: i2 += 5;
627: }
628: PetscLogFlops(45.0*m);
629: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
630: PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
631: }
632: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
633: idiag = a->idiag+25*a->mbs - 25;
634: i2 = 5*m - 5;
635: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
636: x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
637: x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
638: x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
639: x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
640: x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
641: idiag -= 25;
642: i2 -= 5;
643: for (i=m-2; i>=0; i--) {
644: v = aa + 25*(diag[i]+1);
645: vi = aj + diag[i] + 1;
646: nz = ai[i+1] - diag[i] - 1;
647: s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
648: while (nz--) {
649: idx = 5*(*vi++);
650: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
651: s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
652: s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
653: s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
654: s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
655: s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
656: v += 25;
657: }
658: x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
659: x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
660: x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
661: x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
662: x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
663: idiag -= 25;
664: i2 -= 5;
665: }
666: PetscLogFlops(25.0*(a->nz));
667: }
668: } else {
669: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
670: }
671: VecRestoreArray(xx,&x);
672: VecRestoreArrayRead(bb,&b);
673: return(0);
674: }
678: PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
679: {
680: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
681: PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
682: const MatScalar *v,*aa = a->a, *idiag,*mdiag;
683: const PetscScalar *b;
684: PetscErrorCode ierr;
685: PetscInt m = a->mbs,i,i2,nz,idx;
686: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
689: if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
690: its = its*lits;
691: if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
692: if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
693: if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
694: if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
695: if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
697: if (!a->idiagvalid){MatInvertBlockDiagonal(A,PETSC_NULL);}
699: diag = a->diag;
700: idiag = a->idiag;
701: VecGetArray(xx,&x);
702: VecGetArrayRead(bb,&b);
704: if (flag & SOR_ZERO_INITIAL_GUESS) {
705: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
706: x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
707: x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
708: x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
709: x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
710: x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
711: x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
712: i2 = 6;
713: idiag += 36;
714: for (i=1; i<m; i++) {
715: v = aa + 36*ai[i];
716: vi = aj + ai[i];
717: nz = diag[i] - ai[i];
718: s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
719: while (nz--) {
720: idx = 6*(*vi++);
721: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
722: s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
723: s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
724: s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
725: s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
726: s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
727: s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
728: v += 36;
729: }
730: x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
731: x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
732: x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
733: x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
734: x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
735: x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
736: idiag += 36;
737: i2 += 6;
738: }
739: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
740: PetscLogFlops(36.0*(a->nz));
741: }
742: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
743: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
744: i2 = 0;
745: mdiag = a->idiag+36*a->mbs;
746: for (i=0; i<m; i++) {
747: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
748: x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
749: x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
750: x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
751: x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
752: x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
753: x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
754: mdiag += 36;
755: i2 += 6;
756: }
757: PetscLogFlops(60.0*m);
758: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
759: PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
760: }
761: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
762: idiag = a->idiag+36*a->mbs - 36;
763: i2 = 6*m - 6;
764: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
765: x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
766: x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
767: x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
768: x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
769: x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
770: x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
771: idiag -= 36;
772: i2 -= 6;
773: for (i=m-2; i>=0; i--) {
774: v = aa + 36*(diag[i]+1);
775: vi = aj + diag[i] + 1;
776: nz = ai[i+1] - diag[i] - 1;
777: s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
778: while (nz--) {
779: idx = 6*(*vi++);
780: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
781: s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
782: s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
783: s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
784: s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
785: s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
786: s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
787: v += 36;
788: }
789: x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
790: x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
791: x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
792: x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
793: x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
794: x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
795: idiag -= 36;
796: i2 -= 6;
797: }
798: PetscLogFlops(36.0*(a->nz));
799: }
800: } else {
801: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
802: }
803: VecRestoreArray(xx,&x);
804: VecRestoreArrayRead(bb,&b);
805: return(0);
806: }
810: PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
811: {
812: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
813: PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
814: const MatScalar *v,*aa = a->a, *idiag,*mdiag;
815: const PetscScalar *b;
816: PetscErrorCode ierr;
817: PetscInt m = a->mbs,i,i2,nz,idx;
818: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
821: if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
822: its = its*lits;
823: if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
824: if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
825: if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
826: if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
827: if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
829: if (!a->idiagvalid){MatInvertBlockDiagonal(A,PETSC_NULL);}
831: diag = a->diag;
832: idiag = a->idiag;
833: VecGetArray(xx,&x);
834: VecGetArrayRead(bb,&b);
836: if (flag & SOR_ZERO_INITIAL_GUESS) {
837: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
838: x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
839: x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
840: x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
841: x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
842: x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
843: x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
844: x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
845: i2 = 7;
846: idiag += 49;
847: for (i=1; i<m; i++) {
848: v = aa + 49*ai[i];
849: vi = aj + ai[i];
850: nz = diag[i] - ai[i];
851: s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
852: while (nz--) {
853: idx = 7*(*vi++);
854: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
855: s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
856: s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
857: s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
858: s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
859: s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
860: s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
861: s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
862: v += 49;
863: }
864: x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
865: x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
866: x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
867: x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
868: x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
869: x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
870: x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
871: idiag += 49;
872: i2 += 7;
873: }
874: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
875: PetscLogFlops(49.0*(a->nz));
876: }
877: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
878: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
879: i2 = 0;
880: mdiag = a->idiag+49*a->mbs;
881: for (i=0; i<m; i++) {
882: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
883: x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7;
884: x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7;
885: x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7;
886: x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7;
887: x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7;
888: x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7;
889: x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7;
890: mdiag += 49;
891: i2 += 7;
892: }
893: PetscLogFlops(93.0*m);
894: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
895: PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
896: }
897: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
898: idiag = a->idiag+49*a->mbs - 49;
899: i2 = 7*m - 7;
900: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
901: x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
902: x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
903: x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
904: x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
905: x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
906: x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
907: x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
908: idiag -= 49;
909: i2 -= 7;
910: for (i=m-2; i>=0; i--) {
911: v = aa + 49*(diag[i]+1);
912: vi = aj + diag[i] + 1;
913: nz = ai[i+1] - diag[i] - 1;
914: s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
915: while (nz--) {
916: idx = 7*(*vi++);
917: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
918: s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
919: s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
920: s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
921: s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
922: s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
923: s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
924: s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
925: v += 49;
926: }
927: x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
928: x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
929: x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
930: x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
931: x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
932: x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
933: x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
934: idiag -= 49;
935: i2 -= 7;
936: }
937: PetscLogFlops(49.0*(a->nz));
938: }
939: } else {
940: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
941: }
942: VecRestoreArray(xx,&x);
943: VecRestoreArrayRead(bb,&b);
944: return(0);
945: }
949: PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
950: {
951: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
952: PetscScalar *x,*work,*w,*workt;
953: const MatScalar *v,*aa = a->a, *idiag,*mdiag;
954: const PetscScalar *b;
955: PetscErrorCode ierr;
956: PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j;
957: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
960: its = its*lits;
961: if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
962: if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
963: if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
964: if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
965: if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
966: if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
968: if (!a->idiagvalid){MatInvertBlockDiagonal(A,PETSC_NULL);}
970: diag = a->diag;
971: idiag = a->idiag;
972: if (!a->mult_work) {
973: k = PetscMax(A->rmap->n,A->cmap->n);
974: PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);
975: }
976: work = a->mult_work;
977: if (!a->sor_work) {
978: PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);
979: }
980: w = a->sor_work;
982: VecGetArray(xx,&x);
983: VecGetArrayRead(bb,&b);
985: if (flag & SOR_ZERO_INITIAL_GUESS) {
986: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
987: PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
988: /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
989: x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
990: x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/
991: i2 = bs;
992: idiag += bs2;
993: for (i=1; i<m; i++) {
994: v = aa + bs2*ai[i];
995: vi = aj + ai[i];
996: nz = diag[i] - ai[i];
998: PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));
999: /* copy all rows of x that are needed into contiguous space */
1000: workt = work;
1001: for (j=0; j<nz; j++) {
1002: PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
1003: workt += bs;
1004: }
1005: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1006: /*s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
1007: while (nz--) {
1008: idx = N*(*vi++);
1009: x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
1010: s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1011: s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1012: s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1013: v += N2;
1014: } */
1016: PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1017: /* x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1018: x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1019: x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/
1021: idiag += bs2;
1022: i2 += bs;
1023: }
1024: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
1025: PetscLogFlops(1.0*bs2*(a->nz));
1026: }
1027: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
1028: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1029: i2 = 0;
1030: mdiag = a->idiag+bs2*a->mbs;
1031: PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));
1032: for (i=0; i<m; i++) {
1033: PetscKernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2);
1034: /* x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1035: x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
1036: x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
1037: x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */
1039: mdiag += bs2;
1040: i2 += bs;
1041: }
1042: PetscLogFlops(2.0*bs*(bs-1)*m);
1043: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1044: PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
1045: }
1046: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1047: idiag = a->idiag+bs2*a->mbs - bs2;
1048: i2 = bs*m - bs;
1049: PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));
1050: PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1051: /*x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1052: x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
1053: x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
1054: x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/
1055: idiag -= bs2;
1056: i2 -= bs;
1057: for (i=m-2; i>=0; i--) {
1058: v = aa + bs2*(diag[i]+1);
1059: vi = aj + diag[i] + 1;
1060: nz = ai[i+1] - diag[i] - 1;
1062: PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));
1063: /* copy all rows of x that are needed into contiguous space */
1064: workt = work;
1065: for (j=0; j<nz; j++) {
1066: PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
1067: workt += bs;
1068: }
1069: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1070: /* s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
1071: while (nz--) {
1072: idx = N*(*vi++);
1073: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
1074: s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1075: s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1076: s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1077: v += N2;
1078: } */
1080: PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1081: /*x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1082: x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1083: x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */
1084: idiag -= bs2;
1085: i2 -= bs;
1086: }
1087: PetscLogFlops(1.0*bs2*(a->nz));
1088: }
1089: } else {
1090: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
1091: }
1092: VecRestoreArray(xx,&x);
1093: VecRestoreArrayRead(bb,&b);
1094: return(0);
1095: }
1097: /*
1098: Special version for direct calls from Fortran (Used in PETSc-fun3d)
1099: */
1100: #if defined(PETSC_HAVE_FORTRAN_CAPS)
1101: #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1102: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1103: #define matsetvaluesblocked4_ matsetvaluesblocked4
1104: #endif
1106: EXTERN_C_BEGIN
1109: void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
1110: {
1111: Mat A = *AA;
1112: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1113: PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
1114: PetscInt *ai=a->i,*ailen=a->ilen;
1115: PetscInt *aj=a->j,stepval,lastcol = -1;
1116: const PetscScalar *value = v;
1117: MatScalar *ap,*aa = a->a,*bap;
1120: if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
1121: stepval = (n-1)*4;
1122: for (k=0; k<m; k++) { /* loop over added rows */
1123: row = im[k];
1124: rp = aj + ai[row];
1125: ap = aa + 16*ai[row];
1126: nrow = ailen[row];
1127: low = 0;
1128: high = nrow;
1129: for (l=0; l<n; l++) { /* loop over added columns */
1130: col = in[l];
1131: if (col <= lastcol) low = 0; else high = nrow;
1132: lastcol = col;
1133: value = v + k*(stepval+4 + l)*4;
1134: while (high-low > 7) {
1135: t = (low+high)/2;
1136: if (rp[t] > col) high = t;
1137: else low = t;
1138: }
1139: for (i=low; i<high; i++) {
1140: if (rp[i] > col) break;
1141: if (rp[i] == col) {
1142: bap = ap + 16*i;
1143: for (ii=0; ii<4; ii++,value+=stepval) {
1144: for (jj=ii; jj<16; jj+=4) {
1145: bap[jj] += *value++;
1146: }
1147: }
1148: goto noinsert2;
1149: }
1150: }
1151: N = nrow++ - 1;
1152: high++; /* added new column index thus must search to one higher than before */
1153: /* shift up all the later entries in this row */
1154: for (ii=N; ii>=i; ii--) {
1155: rp[ii+1] = rp[ii];
1156: PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1157: }
1158: if (N >= i) {
1159: PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1160: }
1161: rp[i] = col;
1162: bap = ap + 16*i;
1163: for (ii=0; ii<4; ii++,value+=stepval) {
1164: for (jj=ii; jj<16; jj+=4) {
1165: bap[jj] = *value++;
1166: }
1167: }
1168: noinsert2:;
1169: low = i;
1170: }
1171: ailen[row] = nrow;
1172: }
1173: PetscFunctionReturnVoid();
1174: }
1175: EXTERN_C_END
1177: #if defined(PETSC_HAVE_FORTRAN_CAPS)
1178: #define matsetvalues4_ MATSETVALUES4
1179: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1180: #define matsetvalues4_ matsetvalues4
1181: #endif
1183: EXTERN_C_BEGIN
1186: void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1187: {
1188: Mat A = *AA;
1189: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1190: PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1191: PetscInt *ai=a->i,*ailen=a->ilen;
1192: PetscInt *aj=a->j,brow,bcol;
1193: PetscInt ridx,cidx,lastcol = -1;
1194: MatScalar *ap,value,*aa=a->a,*bap;
1195:
1197: for (k=0; k<m; k++) { /* loop over added rows */
1198: row = im[k]; brow = row/4;
1199: rp = aj + ai[brow];
1200: ap = aa + 16*ai[brow];
1201: nrow = ailen[brow];
1202: low = 0;
1203: high = nrow;
1204: for (l=0; l<n; l++) { /* loop over added columns */
1205: col = in[l]; bcol = col/4;
1206: ridx = row % 4; cidx = col % 4;
1207: value = v[l + k*n];
1208: if (col <= lastcol) low = 0; else high = nrow;
1209: lastcol = col;
1210: while (high-low > 7) {
1211: t = (low+high)/2;
1212: if (rp[t] > bcol) high = t;
1213: else low = t;
1214: }
1215: for (i=low; i<high; i++) {
1216: if (rp[i] > bcol) break;
1217: if (rp[i] == bcol) {
1218: bap = ap + 16*i + 4*cidx + ridx;
1219: *bap += value;
1220: goto noinsert1;
1221: }
1222: }
1223: N = nrow++ - 1;
1224: high++; /* added new column thus must search to one higher than before */
1225: /* shift up all the later entries in this row */
1226: for (ii=N; ii>=i; ii--) {
1227: rp[ii+1] = rp[ii];
1228: PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1229: }
1230: if (N>=i) {
1231: PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1232: }
1233: rp[i] = bcol;
1234: ap[16*i + 4*cidx + ridx] = value;
1235: noinsert1:;
1236: low = i;
1237: }
1238: ailen[brow] = nrow;
1239: }
1240: PetscFunctionReturnVoid();
1241: }
1242: EXTERN_C_END
1244: /*
1245: Checks for missing diagonals
1246: */
1249: PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d)
1250: {
1251: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1253: PetscInt *diag,*jj = a->j,i;
1256: MatMarkDiagonal_SeqBAIJ(A);
1257: *missing = PETSC_FALSE;
1258: if (A->rmap->n > 0 && !jj) {
1259: *missing = PETSC_TRUE;
1260: if (d) *d = 0;
1261: PetscInfo(A,"Matrix has no entries therefore is missing diagonal");
1262: } else {
1263: diag = a->diag;
1264: for (i=0; i<a->mbs; i++) {
1265: if (jj[diag[i]] != i) {
1266: *missing = PETSC_TRUE;
1267: if (d) *d = i;
1268: PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1269: break;
1270: }
1271: }
1272: }
1273: return(0);
1274: }
1278: PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1279: {
1280: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1282: PetscInt i,j,m = a->mbs;
1285: if (!a->diag) {
1286: PetscMalloc(m*sizeof(PetscInt),&a->diag);
1287: PetscLogObjectMemory(A,m*sizeof(PetscInt));
1288: a->free_diag = PETSC_TRUE;
1289: }
1290: for (i=0; i<m; i++) {
1291: a->diag[i] = a->i[i+1];
1292: for (j=a->i[i]; j<a->i[i+1]; j++) {
1293: if (a->j[j] == i) {
1294: a->diag[i] = j;
1295: break;
1296: }
1297: }
1298: }
1299: return(0);
1300: }
1303: extern PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1307: static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool *done)
1308: {
1309: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1311: PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,k,l,cnt;
1312: PetscInt *tia, *tja;
1315: *nn = n;
1316: if (!ia) return(0);
1317: if (symmetric) {
1318: MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);
1319: nz = tia[n];
1320: } else {
1321: tia = a->i; tja = a->j;
1322: }
1323:
1324: if (!blockcompressed && bs > 1) {
1325: (*nn) *= bs;
1326: /* malloc & create the natural set of indices */
1327: PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);
1328: if (n) {
1329: (*ia)[0] = 0;
1330: for (j=1; j<bs; j++) {
1331: (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1332: }
1333: }
1335: for (i=1; i<n; i++) {
1336: (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1337: for (j=1; j<bs; j++) {
1338: (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1339: }
1340: }
1341: if (n) {
1342: (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1343: }
1345: if (ja) {
1346: PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);
1347: cnt = 0;
1348: for (i=0; i<n; i++) {
1349: for (j=0; j<bs; j++) {
1350: for (k=tia[i]; k<tia[i+1]; k++) {
1351: for (l=0; l<bs; l++) {
1352: (*ja)[cnt++] = bs*tja[k] + l;
1353: }
1354: }
1355: }
1356: }
1357: }
1359: n *= bs;
1360: nz *= bs*bs;
1361: if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1362: PetscFree(tia);
1363: PetscFree(tja);
1364: }
1365: } else if (oshift == 1) {
1366: if (symmetric) {
1367: PetscInt nz = tia[A->rmap->n/bs];
1368: /* add 1 to i and j indices */
1369: for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1370: *ia = tia;
1371: if (ja) {
1372: for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1373: *ja = tja;
1374: }
1375: } else {
1376: PetscInt nz = a->i[A->rmap->n/bs];
1377: /* malloc space and add 1 to i and j indices */
1378: PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);
1379: for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1380: if (ja) {
1381: PetscMalloc(nz*sizeof(PetscInt),ja);
1382: for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1383: }
1384: }
1385: } else {
1386: *ia = tia;
1387: if (ja) *ja = tja;
1388: }
1389:
1390: return(0);
1391: }
1395: static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool *done)
1396: {
1400: if (!ia) return(0);
1401: if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1402: PetscFree(*ia);
1403: if (ja) {PetscFree(*ja);}
1404: }
1405: return(0);
1406: }
1410: PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1411: {
1412: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1416: #if defined(PETSC_USE_LOG)
1417: PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1418: #endif
1419: MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);
1420: ISDestroy(&a->row);
1421: ISDestroy(&a->col);
1422: if (a->free_diag) {PetscFree(a->diag);}
1423: PetscFree(a->idiag);
1424: if (a->free_imax_ilen) {PetscFree2(a->imax,a->ilen);}
1425: PetscFree(a->solve_work);
1426: PetscFree(a->mult_work);
1427: PetscFree(a->sor_work);
1428: ISDestroy(&a->icol);
1429: PetscFree(a->saved_values);
1430: PetscFree(a->xtoy);
1431: PetscFree2(a->compressedrow.i,a->compressedrow.rindex);
1433: MatDestroy(&a->sbaijMat);
1434: MatDestroy(&a->parent);
1435: PetscFree(A->data);
1437: PetscObjectChangeTypeName((PetscObject)A,0);
1438: PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C","",PETSC_NULL);
1439: PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);
1440: PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);
1441: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);
1442: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);
1443: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);
1444: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);
1445: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);
1446: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C","",PETSC_NULL);
1447: PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C","",PETSC_NULL);
1448: return(0);
1449: }
1453: PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1454: {
1455: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1459: switch (op) {
1460: case MAT_ROW_ORIENTED:
1461: a->roworiented = flg;
1462: break;
1463: case MAT_KEEP_NONZERO_PATTERN:
1464: a->keepnonzeropattern = flg;
1465: break;
1466: case MAT_NEW_NONZERO_LOCATIONS:
1467: a->nonew = (flg ? 0 : 1);
1468: break;
1469: case MAT_NEW_NONZERO_LOCATION_ERR:
1470: a->nonew = (flg ? -1 : 0);
1471: break;
1472: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1473: a->nonew = (flg ? -2 : 0);
1474: break;
1475: case MAT_UNUSED_NONZERO_LOCATION_ERR:
1476: a->nounused = (flg ? -1 : 0);
1477: break;
1478: case MAT_CHECK_COMPRESSED_ROW:
1479: a->compressedrow.check = flg;
1480: break;
1481: case MAT_NEW_DIAGONALS:
1482: case MAT_IGNORE_OFF_PROC_ENTRIES:
1483: case MAT_USE_HASH_TABLE:
1484: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1485: break;
1486: case MAT_SYMMETRIC:
1487: case MAT_STRUCTURALLY_SYMMETRIC:
1488: case MAT_HERMITIAN:
1489: case MAT_SYMMETRY_ETERNAL:
1490: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1491: break;
1492: default:
1493: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1494: }
1495: return(0);
1496: }
1500: PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1501: {
1502: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1504: PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1505: MatScalar *aa,*aa_i;
1506: PetscScalar *v_i;
1509: bs = A->rmap->bs;
1510: ai = a->i;
1511: aj = a->j;
1512: aa = a->a;
1513: bs2 = a->bs2;
1514:
1515: if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1516:
1517: bn = row/bs; /* Block number */
1518: bp = row % bs; /* Block Position */
1519: M = ai[bn+1] - ai[bn];
1520: *nz = bs*M;
1521:
1522: if (v) {
1523: *v = 0;
1524: if (*nz) {
1525: PetscMalloc((*nz)*sizeof(PetscScalar),v);
1526: for (i=0; i<M; i++) { /* for each block in the block row */
1527: v_i = *v + i*bs;
1528: aa_i = aa + bs2*(ai[bn] + i);
1529: for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1530: }
1531: }
1532: }
1534: if (idx) {
1535: *idx = 0;
1536: if (*nz) {
1537: PetscMalloc((*nz)*sizeof(PetscInt),idx);
1538: for (i=0; i<M; i++) { /* for each block in the block row */
1539: idx_i = *idx + i*bs;
1540: itmp = bs*aj[ai[bn] + i];
1541: for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1542: }
1543: }
1544: }
1545: return(0);
1546: }
1550: PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1551: {
1555: if (idx) {PetscFree(*idx);}
1556: if (v) {PetscFree(*v);}
1557: return(0);
1558: }
1560: extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1564: PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1565: {
1566: Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
1567: Mat C;
1569: PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1570: PetscInt *rows,*cols,bs2=a->bs2;
1571: MatScalar *array;
1574: if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1575: if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1576: PetscMalloc((1+nbs)*sizeof(PetscInt),&col);
1577: PetscMemzero(col,(1+nbs)*sizeof(PetscInt));
1579: for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1580: MatCreate(((PetscObject)A)->comm,&C);
1581: MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);
1582: MatSetType(C,((PetscObject)A)->type_name);
1583: MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);
1584: PetscFree(col);
1585: } else {
1586: C = *B;
1587: }
1589: array = a->a;
1590: PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);
1591: for (i=0; i<mbs; i++) {
1592: cols[0] = i*bs;
1593: for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1594: len = ai[i+1] - ai[i];
1595: for (j=0; j<len; j++) {
1596: rows[0] = (*aj++)*bs;
1597: for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1598: MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);
1599: array += bs2;
1600: }
1601: }
1602: PetscFree2(rows,cols);
1603:
1604: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1605: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1606:
1607: if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1608: *B = C;
1609: } else {
1610: MatHeaderMerge(A,C);
1611: }
1612: return(0);
1613: }
1615: EXTERN_C_BEGIN
1618: PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f)
1619: {
1621: Mat Btrans;
1624: *f = PETSC_FALSE;
1625: MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);
1626: MatEqual_SeqBAIJ(B,Btrans,f);
1627: MatDestroy(&Btrans);
1628: return(0);
1629: }
1630: EXTERN_C_END
1634: static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1635: {
1636: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1638: PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1639: int fd;
1640: PetscScalar *aa;
1641: FILE *file;
1644: PetscViewerBinaryGetDescriptor(viewer,&fd);
1645: PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);
1646: col_lens[0] = MAT_FILE_CLASSID;
1648: col_lens[1] = A->rmap->N;
1649: col_lens[2] = A->cmap->n;
1650: col_lens[3] = a->nz*bs2;
1652: /* store lengths of each row and write (including header) to file */
1653: count = 0;
1654: for (i=0; i<a->mbs; i++) {
1655: for (j=0; j<bs; j++) {
1656: col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1657: }
1658: }
1659: PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);
1660: PetscFree(col_lens);
1662: /* store column indices (zero start index) */
1663: PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);
1664: count = 0;
1665: for (i=0; i<a->mbs; i++) {
1666: for (j=0; j<bs; j++) {
1667: for (k=a->i[i]; k<a->i[i+1]; k++) {
1668: for (l=0; l<bs; l++) {
1669: jj[count++] = bs*a->j[k] + l;
1670: }
1671: }
1672: }
1673: }
1674: PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);
1675: PetscFree(jj);
1677: /* store nonzero values */
1678: PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);
1679: count = 0;
1680: for (i=0; i<a->mbs; i++) {
1681: for (j=0; j<bs; j++) {
1682: for (k=a->i[i]; k<a->i[i+1]; k++) {
1683: for (l=0; l<bs; l++) {
1684: aa[count++] = a->a[bs2*k + l*bs + j];
1685: }
1686: }
1687: }
1688: }
1689: PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);
1690: PetscFree(aa);
1692: PetscViewerBinaryGetInfoPointer(viewer,&file);
1693: if (file) {
1694: fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1695: }
1696: return(0);
1697: }
1701: static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1702: {
1703: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1704: PetscErrorCode ierr;
1705: PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1706: PetscViewerFormat format;
1709: PetscViewerGetFormat(viewer,&format);
1710: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1711: PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);
1712: } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1713: Mat aij;
1714: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);
1715: MatView(aij,viewer);
1716: MatDestroy(&aij);
1717: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1718: return(0);
1719: } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1720: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1721: PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");
1722: for (i=0; i<a->mbs; i++) {
1723: for (j=0; j<bs; j++) {
1724: PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1725: for (k=a->i[i]; k<a->i[i+1]; k++) {
1726: for (l=0; l<bs; l++) {
1727: #if defined(PETSC_USE_COMPLEX)
1728: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1729: PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1730: PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1731: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1732: PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1733: PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1734: } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1735: PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
1736: }
1737: #else
1738: if (a->a[bs2*k + l*bs + j] != 0.0) {
1739: PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
1740: }
1741: #endif
1742: }
1743: }
1744: PetscViewerASCIIPrintf(viewer,"\n");
1745: }
1746: }
1747: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1748: } else {
1749: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1750: PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");
1751: for (i=0; i<a->mbs; i++) {
1752: for (j=0; j<bs; j++) {
1753: PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1754: for (k=a->i[i]; k<a->i[i+1]; k++) {
1755: for (l=0; l<bs; l++) {
1756: #if defined(PETSC_USE_COMPLEX)
1757: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1758: PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1759: PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1760: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1761: PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1762: PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1763: } else {
1764: PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
1765: }
1766: #else
1767: PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
1768: #endif
1769: }
1770: }
1771: PetscViewerASCIIPrintf(viewer,"\n");
1772: }
1773: }
1774: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1775: }
1776: PetscViewerFlush(viewer);
1777: return(0);
1778: }
1782: static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1783: {
1784: Mat A = (Mat) Aa;
1785: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
1787: PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1788: PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1789: MatScalar *aa;
1790: PetscViewer viewer;
1791: PetscViewerFormat format;
1794: PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);
1795: PetscViewerGetFormat(viewer,&format);
1797: PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);
1799: /* loop over matrix elements drawing boxes */
1801: if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1802: color = PETSC_DRAW_BLUE;
1803: for (i=0,row=0; i<mbs; i++,row+=bs) {
1804: for (j=a->i[i]; j<a->i[i+1]; j++) {
1805: y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1806: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1807: aa = a->a + j*bs2;
1808: for (k=0; k<bs; k++) {
1809: for (l=0; l<bs; l++) {
1810: if (PetscRealPart(*aa++) >= 0.) continue;
1811: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1812: }
1813: }
1814: }
1815: }
1816: color = PETSC_DRAW_CYAN;
1817: for (i=0,row=0; i<mbs; i++,row+=bs) {
1818: for (j=a->i[i]; j<a->i[i+1]; j++) {
1819: y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1820: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1821: aa = a->a + j*bs2;
1822: for (k=0; k<bs; k++) {
1823: for (l=0; l<bs; l++) {
1824: if (PetscRealPart(*aa++) != 0.) continue;
1825: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1826: }
1827: }
1828: }
1829: }
1830: color = PETSC_DRAW_RED;
1831: for (i=0,row=0; i<mbs; i++,row+=bs) {
1832: for (j=a->i[i]; j<a->i[i+1]; j++) {
1833: y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1834: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1835: aa = a->a + j*bs2;
1836: for (k=0; k<bs; k++) {
1837: for (l=0; l<bs; l++) {
1838: if (PetscRealPart(*aa++) <= 0.) continue;
1839: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1840: }
1841: }
1842: }
1843: }
1844: } else {
1845: /* use contour shading to indicate magnitude of values */
1846: /* first determine max of all nonzero values */
1847: PetscDraw popup;
1848: PetscReal scale,maxv = 0.0;
1850: for (i=0; i<a->nz*a->bs2; i++) {
1851: if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1852: }
1853: scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1854: PetscDrawGetPopup(draw,&popup);
1855: if (popup) {PetscDrawScalePopup(popup,0.0,maxv);}
1856: for (i=0,row=0; i<mbs; i++,row+=bs) {
1857: for (j=a->i[i]; j<a->i[i+1]; j++) {
1858: y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1859: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1860: aa = a->a + j*bs2;
1861: for (k=0; k<bs; k++) {
1862: for (l=0; l<bs; l++) {
1863: color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1864: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1865: }
1866: }
1867: }
1868: }
1869: }
1870: return(0);
1871: }
1875: static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1876: {
1878: PetscReal xl,yl,xr,yr,w,h;
1879: PetscDraw draw;
1880: PetscBool isnull;
1884: PetscViewerDrawGetDraw(viewer,0,&draw);
1885: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
1887: PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
1888: xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1889: xr += w; yr += h; xl = -w; yl = -h;
1890: PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
1891: PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
1892: PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);
1893: return(0);
1894: }
1898: PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1899: {
1901: PetscBool iascii,isbinary,isdraw;
1904: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1905: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1906: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1907: if (iascii){
1908: MatView_SeqBAIJ_ASCII(A,viewer);
1909: } else if (isbinary) {
1910: MatView_SeqBAIJ_Binary(A,viewer);
1911: } else if (isdraw) {
1912: MatView_SeqBAIJ_Draw(A,viewer);
1913: } else {
1914: Mat B;
1915: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
1916: MatView(B,viewer);
1917: MatDestroy(&B);
1918: }
1919: return(0);
1920: }
1925: PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1926: {
1927: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1928: PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1929: PetscInt *ai = a->i,*ailen = a->ilen;
1930: PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1931: MatScalar *ap,*aa = a->a;
1934: for (k=0; k<m; k++) { /* loop over rows */
1935: row = im[k]; brow = row/bs;
1936: if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1937: if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1938: rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1939: nrow = ailen[brow];
1940: for (l=0; l<n; l++) { /* loop over columns */
1941: if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1942: if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1943: col = in[l] ;
1944: bcol = col/bs;
1945: cidx = col%bs;
1946: ridx = row%bs;
1947: high = nrow;
1948: low = 0; /* assume unsorted */
1949: while (high-low > 5) {
1950: t = (low+high)/2;
1951: if (rp[t] > bcol) high = t;
1952: else low = t;
1953: }
1954: for (i=low; i<high; i++) {
1955: if (rp[i] > bcol) break;
1956: if (rp[i] == bcol) {
1957: *v++ = ap[bs2*i+bs*cidx+ridx];
1958: goto finished;
1959: }
1960: }
1961: *v++ = 0.0;
1962: finished:;
1963: }
1964: }
1965: return(0);
1966: }
1970: PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1971: {
1972: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1973: PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1974: PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1975: PetscErrorCode ierr;
1976: PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1977: PetscBool roworiented=a->roworiented;
1978: const PetscScalar *value = v;
1979: MatScalar *ap,*aa = a->a,*bap;
1982: if (roworiented) {
1983: stepval = (n-1)*bs;
1984: } else {
1985: stepval = (m-1)*bs;
1986: }
1987: for (k=0; k<m; k++) { /* loop over added rows */
1988: row = im[k];
1989: if (row < 0) continue;
1990: #if defined(PETSC_USE_DEBUG)
1991: if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1992: #endif
1993: rp = aj + ai[row];
1994: ap = aa + bs2*ai[row];
1995: rmax = imax[row];
1996: nrow = ailen[row];
1997: low = 0;
1998: high = nrow;
1999: for (l=0; l<n; l++) { /* loop over added columns */
2000: if (in[l] < 0) continue;
2001: #if defined(PETSC_USE_DEBUG)
2002: if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
2003: #endif
2004: col = in[l];
2005: if (roworiented) {
2006: value = v + (k*(stepval+bs) + l)*bs;
2007: } else {
2008: value = v + (l*(stepval+bs) + k)*bs;
2009: }
2010: if (col <= lastcol) low = 0; else high = nrow;
2011: lastcol = col;
2012: while (high-low > 7) {
2013: t = (low+high)/2;
2014: if (rp[t] > col) high = t;
2015: else low = t;
2016: }
2017: for (i=low; i<high; i++) {
2018: if (rp[i] > col) break;
2019: if (rp[i] == col) {
2020: bap = ap + bs2*i;
2021: if (roworiented) {
2022: if (is == ADD_VALUES) {
2023: for (ii=0; ii<bs; ii++,value+=stepval) {
2024: for (jj=ii; jj<bs2; jj+=bs) {
2025: bap[jj] += *value++;
2026: }
2027: }
2028: } else {
2029: for (ii=0; ii<bs; ii++,value+=stepval) {
2030: for (jj=ii; jj<bs2; jj+=bs) {
2031: bap[jj] = *value++;
2032: }
2033: }
2034: }
2035: } else {
2036: if (is == ADD_VALUES) {
2037: for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2038: for (jj=0; jj<bs; jj++) {
2039: bap[jj] += value[jj];
2040: }
2041: bap += bs;
2042: }
2043: } else {
2044: for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2045: for (jj=0; jj<bs; jj++) {
2046: bap[jj] = value[jj];
2047: }
2048: bap += bs;
2049: }
2050: }
2051: }
2052: goto noinsert2;
2053: }
2054: }
2055: if (nonew == 1) goto noinsert2;
2056: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2057: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2058: N = nrow++ - 1; high++;
2059: /* shift up all the later entries in this row */
2060: for (ii=N; ii>=i; ii--) {
2061: rp[ii+1] = rp[ii];
2062: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
2063: }
2064: if (N >= i) {
2065: PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
2066: }
2067: rp[i] = col;
2068: bap = ap + bs2*i;
2069: if (roworiented) {
2070: for (ii=0; ii<bs; ii++,value+=stepval) {
2071: for (jj=ii; jj<bs2; jj+=bs) {
2072: bap[jj] = *value++;
2073: }
2074: }
2075: } else {
2076: for (ii=0; ii<bs; ii++,value+=stepval) {
2077: for (jj=0; jj<bs; jj++) {
2078: *bap++ = *value++;
2079: }
2080: }
2081: }
2082: noinsert2:;
2083: low = i;
2084: }
2085: ailen[row] = nrow;
2086: }
2087: return(0);
2088: }
2092: PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
2093: {
2094: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2095: PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
2096: PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen;
2098: PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0;
2099: MatScalar *aa = a->a,*ap;
2100: PetscReal ratio=0.6;
2103: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
2105: if (m) rmax = ailen[0];
2106: for (i=1; i<mbs; i++) {
2107: /* move each row back by the amount of empty slots (fshift) before it*/
2108: fshift += imax[i-1] - ailen[i-1];
2109: rmax = PetscMax(rmax,ailen[i]);
2110: if (fshift) {
2111: ip = aj + ai[i]; ap = aa + bs2*ai[i];
2112: N = ailen[i];
2113: for (j=0; j<N; j++) {
2114: ip[j-fshift] = ip[j];
2115: PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));
2116: }
2117: }
2118: ai[i] = ai[i-1] + ailen[i-1];
2119: }
2120: if (mbs) {
2121: fshift += imax[mbs-1] - ailen[mbs-1];
2122: ai[mbs] = ai[mbs-1] + ailen[mbs-1];
2123: }
2124: /* reset ilen and imax for each row */
2125: for (i=0; i<mbs; i++) {
2126: ailen[i] = imax[i] = ai[i+1] - ai[i];
2127: }
2128: a->nz = ai[mbs];
2130: /* diagonals may have moved, so kill the diagonal pointers */
2131: a->idiagvalid = PETSC_FALSE;
2132: if (fshift && a->diag) {
2133: PetscFree(a->diag);
2134: PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));
2135: a->diag = 0;
2136: }
2137: if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
2138: PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);
2139: PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);
2140: PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);
2141: A->info.mallocs += a->reallocs;
2142: a->reallocs = 0;
2143: A->info.nz_unneeded = (PetscReal)fshift*bs2;
2145: MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);
2146: A->same_nonzero = PETSC_TRUE;
2147: return(0);
2148: }
2150: /*
2151: This function returns an array of flags which indicate the locations of contiguous
2152: blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9]
2153: then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2154: Assume: sizes should be long enough to hold all the values.
2155: */
2158: static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2159: {
2160: PetscInt i,j,k,row;
2161: PetscBool flg;
2164: for (i=0,j=0; i<n; j++) {
2165: row = idx[i];
2166: if (row%bs!=0) { /* Not the begining of a block */
2167: sizes[j] = 1;
2168: i++;
2169: } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2170: sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */
2171: i++;
2172: } else { /* Begining of the block, so check if the complete block exists */
2173: flg = PETSC_TRUE;
2174: for (k=1; k<bs; k++) {
2175: if (row+k != idx[i+k]) { /* break in the block */
2176: flg = PETSC_FALSE;
2177: break;
2178: }
2179: }
2180: if (flg) { /* No break in the bs */
2181: sizes[j] = bs;
2182: i+= bs;
2183: } else {
2184: sizes[j] = 1;
2185: i++;
2186: }
2187: }
2188: }
2189: *bs_max = j;
2190: return(0);
2191: }
2192:
2195: PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2196: {
2197: Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
2198: PetscErrorCode ierr;
2199: PetscInt i,j,k,count,*rows;
2200: PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2201: PetscScalar zero = 0.0;
2202: MatScalar *aa;
2203: const PetscScalar *xx;
2204: PetscScalar *bb;
2207: /* fix right hand side if needed */
2208: if (x && b) {
2209: VecGetArrayRead(x,&xx);
2210: VecGetArray(b,&bb);
2211: for (i=0; i<is_n; i++) {
2212: bb[is_idx[i]] = diag*xx[is_idx[i]];
2213: }
2214: VecRestoreArrayRead(x,&xx);
2215: VecRestoreArray(b,&bb);
2216: }
2218: /* Make a copy of the IS and sort it */
2219: /* allocate memory for rows,sizes */
2220: PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);
2222: /* copy IS values to rows, and sort them */
2223: for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2224: PetscSortInt(is_n,rows);
2226: if (baij->keepnonzeropattern) {
2227: for (i=0; i<is_n; i++) { sizes[i] = 1; }
2228: bs_max = is_n;
2229: A->same_nonzero = PETSC_TRUE;
2230: } else {
2231: MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);
2232: A->same_nonzero = PETSC_FALSE;
2233: }
2235: for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2236: row = rows[j];
2237: if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2238: count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2239: aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2240: if (sizes[i] == bs && !baij->keepnonzeropattern) {
2241: if (diag != (PetscScalar)0.0) {
2242: if (baij->ilen[row/bs] > 0) {
2243: baij->ilen[row/bs] = 1;
2244: baij->j[baij->i[row/bs]] = row/bs;
2245: PetscMemzero(aa,count*bs*sizeof(MatScalar));
2246: }
2247: /* Now insert all the diagonal values for this bs */
2248: for (k=0; k<bs; k++) {
2249: (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);
2250: }
2251: } else { /* (diag == 0.0) */
2252: baij->ilen[row/bs] = 0;
2253: } /* end (diag == 0.0) */
2254: } else { /* (sizes[i] != bs) */
2255: #if defined (PETSC_USE_DEBUG)
2256: if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2257: #endif
2258: for (k=0; k<count; k++) {
2259: aa[0] = zero;
2260: aa += bs;
2261: }
2262: if (diag != (PetscScalar)0.0) {
2263: (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);
2264: }
2265: }
2266: }
2268: PetscFree2(rows,sizes);
2269: MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
2270: return(0);
2271: }
2275: PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2276: {
2277: Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
2278: PetscErrorCode ierr;
2279: PetscInt i,j,k,count;
2280: PetscInt bs=A->rmap->bs,bs2=baij->bs2,row,col;
2281: PetscScalar zero = 0.0;
2282: MatScalar *aa;
2283: const PetscScalar *xx;
2284: PetscScalar *bb;
2285: PetscBool *zeroed,vecs = PETSC_FALSE;
2288: /* fix right hand side if needed */
2289: if (x && b) {
2290: VecGetArrayRead(x,&xx);
2291: VecGetArray(b,&bb);
2292: vecs = PETSC_TRUE;
2293: }
2294: A->same_nonzero = PETSC_TRUE;
2296: /* zero the columns */
2297: PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);
2298: PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));
2299: for (i=0; i<is_n; i++) {
2300: if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2301: zeroed[is_idx[i]] = PETSC_TRUE;
2302: }
2303: for (i=0; i<A->rmap->N; i++) {
2304: if (!zeroed[i]) {
2305: row = i/bs;
2306: for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2307: for (k=0; k<bs; k++) {
2308: col = bs*baij->j[j] + k;
2309: if (zeroed[col]) {
2310: aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2311: if (vecs) bb[i] -= aa[0]*xx[col];
2312: aa[0] = 0.0;
2313: }
2314: }
2315: }
2316: } else if (vecs) bb[i] = diag*xx[i];
2317: }
2318: PetscFree(zeroed);
2319: if (vecs) {
2320: VecRestoreArrayRead(x,&xx);
2321: VecRestoreArray(b,&bb);
2322: }
2324: /* zero the rows */
2325: for (i=0; i<is_n; i++) {
2326: row = is_idx[i];
2327: count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2328: aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2329: for (k=0; k<count; k++) {
2330: aa[0] = zero;
2331: aa += bs;
2332: }
2333: if (diag != (PetscScalar)0.0) {
2334: (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);
2335: }
2336: }
2337: MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
2338: return(0);
2339: }
2343: PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2344: {
2345: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2346: PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2347: PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2348: PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2350: PetscInt ridx,cidx,bs2=a->bs2;
2351: PetscBool roworiented=a->roworiented;
2352: MatScalar *ap,value,*aa=a->a,*bap;
2356: for (k=0; k<m; k++) { /* loop over added rows */
2357: row = im[k];
2358: brow = row/bs;
2359: if (row < 0) continue;
2360: #if defined(PETSC_USE_DEBUG)
2361: if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2362: #endif
2363: rp = aj + ai[brow];
2364: ap = aa + bs2*ai[brow];
2365: rmax = imax[brow];
2366: nrow = ailen[brow];
2367: low = 0;
2368: high = nrow;
2369: for (l=0; l<n; l++) { /* loop over added columns */
2370: if (in[l] < 0) continue;
2371: #if defined(PETSC_USE_DEBUG)
2372: if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2373: #endif
2374: col = in[l]; bcol = col/bs;
2375: ridx = row % bs; cidx = col % bs;
2376: if (roworiented) {
2377: value = v[l + k*n];
2378: } else {
2379: value = v[k + l*m];
2380: }
2381: if (col <= lastcol) low = 0; else high = nrow;
2382: lastcol = col;
2383: while (high-low > 7) {
2384: t = (low+high)/2;
2385: if (rp[t] > bcol) high = t;
2386: else low = t;
2387: }
2388: for (i=low; i<high; i++) {
2389: if (rp[i] > bcol) break;
2390: if (rp[i] == bcol) {
2391: bap = ap + bs2*i + bs*cidx + ridx;
2392: if (is == ADD_VALUES) *bap += value;
2393: else *bap = value;
2394: goto noinsert1;
2395: }
2396: }
2397: if (nonew == 1) goto noinsert1;
2398: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2399: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2400: N = nrow++ - 1; high++;
2401: /* shift up all the later entries in this row */
2402: for (ii=N; ii>=i; ii--) {
2403: rp[ii+1] = rp[ii];
2404: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
2405: }
2406: if (N>=i) {
2407: PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
2408: }
2409: rp[i] = bcol;
2410: ap[bs2*i + bs*cidx + ridx] = value;
2411: a->nz++;
2412: noinsert1:;
2413: low = i;
2414: }
2415: ailen[brow] = nrow;
2416: }
2417: A->same_nonzero = PETSC_FALSE;
2418: return(0);
2419: }
2423: PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2424: {
2425: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
2426: Mat outA;
2428: PetscBool row_identity,col_identity;
2431: if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2432: ISIdentity(row,&row_identity);
2433: ISIdentity(col,&col_identity);
2434: if (!row_identity || !col_identity) {
2435: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2436: }
2438: outA = inA;
2439: inA->factortype = MAT_FACTOR_LU;
2441: MatMarkDiagonal_SeqBAIJ(inA);
2443: PetscObjectReference((PetscObject)row);
2444: ISDestroy(&a->row);
2445: a->row = row;
2446: PetscObjectReference((PetscObject)col);
2447: ISDestroy(&a->col);
2448: a->col = col;
2449:
2450: /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2451: ISDestroy(&a->icol);
2452: ISInvertPermutation(col,PETSC_DECIDE,&a->icol);
2453: PetscLogObjectParent(inA,a->icol);
2454:
2455: MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));
2456: if (!a->solve_work) {
2457: PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);
2458: PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));
2459: }
2460: MatLUFactorNumeric(outA,inA,info);
2462: return(0);
2463: }
2465: EXTERN_C_BEGIN
2468: PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2469: {
2470: Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2471: PetscInt i,nz,mbs;
2474: nz = baij->maxnz;
2475: mbs = baij->mbs;
2476: for (i=0; i<nz; i++) {
2477: baij->j[i] = indices[i];
2478: }
2479: baij->nz = nz;
2480: for (i=0; i<mbs; i++) {
2481: baij->ilen[i] = baij->imax[i];
2482: }
2483: return(0);
2484: }
2485: EXTERN_C_END
2489: /*@
2490: MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2491: in the matrix.
2493: Input Parameters:
2494: + mat - the SeqBAIJ matrix
2495: - indices - the column indices
2497: Level: advanced
2499: Notes:
2500: This can be called if you have precomputed the nonzero structure of the
2501: matrix and want to provide it to the matrix object to improve the performance
2502: of the MatSetValues() operation.
2504: You MUST have set the correct numbers of nonzeros per row in the call to
2505: MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2507: MUST be called before any calls to MatSetValues();
2509: @*/
2510: PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2511: {
2517: PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt *),(mat,indices));
2518: return(0);
2519: }
2523: PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2524: {
2525: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2527: PetscInt i,j,n,row,bs,*ai,*aj,mbs;
2528: PetscReal atmp;
2529: PetscScalar *x,zero = 0.0;
2530: MatScalar *aa;
2531: PetscInt ncols,brow,krow,kcol;
2534: if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2535: bs = A->rmap->bs;
2536: aa = a->a;
2537: ai = a->i;
2538: aj = a->j;
2539: mbs = a->mbs;
2541: VecSet(v,zero);
2542: VecGetArray(v,&x);
2543: VecGetLocalSize(v,&n);
2544: if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2545: for (i=0; i<mbs; i++) {
2546: ncols = ai[1] - ai[0]; ai++;
2547: brow = bs*i;
2548: for (j=0; j<ncols; j++){
2549: for (kcol=0; kcol<bs; kcol++){
2550: for (krow=0; krow<bs; krow++){
2551: atmp = PetscAbsScalar(*aa);aa++;
2552: row = brow + krow; /* row index */
2553: /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2554: if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2555: }
2556: }
2557: aj++;
2558: }
2559: }
2560: VecRestoreArray(v,&x);
2561: return(0);
2562: }
2566: PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2567: {
2571: /* If the two matrices have the same copy implementation, use fast copy. */
2572: if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2573: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2574: Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2575: PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2577: if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2578: if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2579: PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));
2580: } else {
2581: MatCopy_Basic(A,B,str);
2582: }
2583: return(0);
2584: }
2588: PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2589: {
2593: MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);
2594: return(0);
2595: }
2599: PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2600: {
2601: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2603: *array = a->a;
2604: return(0);
2605: }
2609: PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2610: {
2612: return(0);
2613: }
2617: PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2618: {
2619: Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2621: PetscInt i,bs=Y->rmap->bs,j,bs2=bs*bs;
2622: PetscBLASInt one=1;
2625: if (str == SAME_NONZERO_PATTERN) {
2626: PetscScalar alpha = a;
2627: PetscBLASInt bnz = PetscBLASIntCast(x->nz*bs2);
2628: BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2629: } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2630: if (y->xtoy && y->XtoY != X) {
2631: PetscFree(y->xtoy);
2632: MatDestroy(&y->XtoY);
2633: }
2634: if (!y->xtoy) { /* get xtoy */
2635: MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);
2636: y->XtoY = X;
2637: PetscObjectReference((PetscObject)X);
2638: }
2639: for (i=0; i<x->nz; i++) {
2640: j = 0;
2641: while (j < bs2){
2642: y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2643: j++;
2644: }
2645: }
2646: PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
2647: } else {
2648: MatAXPY_Basic(Y,a,X,str);
2649: }
2650: return(0);
2651: }
2655: PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2656: {
2657: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2658: PetscInt i,nz = a->bs2*a->i[a->mbs];
2659: MatScalar *aa = a->a;
2662: for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2663: return(0);
2664: }
2668: PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2669: {
2670: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2671: PetscInt i,nz = a->bs2*a->i[a->mbs];
2672: MatScalar *aa = a->a;
2675: for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2676: return(0);
2677: }
2679: extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2683: /*
2684: Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2685: */
2686: PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool *done)
2687: {
2688: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2690: PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2691: PetscInt nz = a->i[m],row,*jj,mr,col;
2694: *nn = n;
2695: if (!ia) return(0);
2696: if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2697: else {
2698: PetscMalloc((n+1)*sizeof(PetscInt),&collengths);
2699: PetscMemzero(collengths,n*sizeof(PetscInt));
2700: PetscMalloc((n+1)*sizeof(PetscInt),&cia);
2701: PetscMalloc((nz+1)*sizeof(PetscInt),&cja);
2702: jj = a->j;
2703: for (i=0; i<nz; i++) {
2704: collengths[jj[i]]++;
2705: }
2706: cia[0] = oshift;
2707: for (i=0; i<n; i++) {
2708: cia[i+1] = cia[i] + collengths[i];
2709: }
2710: PetscMemzero(collengths,n*sizeof(PetscInt));
2711: jj = a->j;
2712: for (row=0; row<m; row++) {
2713: mr = a->i[row+1] - a->i[row];
2714: for (i=0; i<mr; i++) {
2715: col = *jj++;
2716: cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2717: }
2718: }
2719: PetscFree(collengths);
2720: *ia = cia; *ja = cja;
2721: }
2722: return(0);
2723: }
2727: PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscBool *done)
2728: {
2732: if (!ia) return(0);
2733: PetscFree(*ia);
2734: PetscFree(*ja);
2735: return(0);
2736: }
2740: PetscErrorCode MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2741: {
2742: PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f;
2744: PetscInt bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2;
2745: PetscScalar dx,*y,*xx,*w3_array;
2746: PetscScalar *vscale_array;
2747: PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2748: Vec w1=coloring->w1,w2=coloring->w2,w3;
2749: void *fctx = coloring->fctx;
2750: PetscBool flg = PETSC_FALSE;
2751: PetscInt ctype=coloring->ctype,N,col_start=0,col_end=0;
2752: Vec x1_tmp;
2758: if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2760: PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);
2761: MatSetUnfactored(J);
2762: PetscOptionsGetBool(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);
2763: if (flg) {
2764: PetscInfo(coloring,"Not calling MatZeroEntries()\n");
2765: } else {
2766: PetscBool assembled;
2767: MatAssembled(J,&assembled);
2768: if (assembled) {
2769: MatZeroEntries(J);
2770: }
2771: }
2773: x1_tmp = x1;
2774: if (!coloring->vscale){
2775: VecDuplicate(x1_tmp,&coloring->vscale);
2776: }
2777:
2778: /*
2779: This is a horrible, horrible, hack.
2780: */
2781: if (coloring->F) {
2782: VecGetLocalSize(coloring->F,&m1);
2783: VecGetLocalSize(w1,&m2);
2784: if (m1 != m2) {
2785: coloring->F = 0;
2786: }
2787: }
2789: if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2790: VecNorm(x1_tmp,NORM_2,&unorm);
2791: }
2792: VecGetOwnershipRange(w1,&start,&end); /* OwnershipRange is used by ghosted x! */
2793:
2794: /* Set w1 = F(x1) */
2795: if (coloring->F) {
2796: w1 = coloring->F; /* use already computed value of function */
2797: coloring->F = 0;
2798: } else {
2799: PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);
2800: (*f)(sctx,x1_tmp,w1,fctx);
2801: PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);
2802: }
2803:
2804: if (!coloring->w3) {
2805: VecDuplicate(x1_tmp,&coloring->w3);
2806: PetscLogObjectParent(coloring,coloring->w3);
2807: }
2808: w3 = coloring->w3;
2810: CHKMEMQ;
2811: /* Compute all the local scale factors, including ghost points */
2812: VecGetLocalSize(x1_tmp,&N);
2813: VecGetArray(x1_tmp,&xx);
2814: VecGetArray(coloring->vscale,&vscale_array);
2815: if (ctype == IS_COLORING_GHOSTED){
2816: col_start = 0; col_end = N;
2817: } else if (ctype == IS_COLORING_GLOBAL){
2818: xx = xx - start;
2819: vscale_array = vscale_array - start;
2820: col_start = start; col_end = N + start;
2821: } CHKMEMQ;
2822: for (col=col_start; col<col_end; col++){
2823: /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2824: if (coloring->htype[0] == 'w') {
2825: dx = 1.0 + unorm;
2826: } else {
2827: dx = xx[col];
2828: }
2829: if (dx == (PetscScalar)0.0) dx = 1.0;
2830: #if !defined(PETSC_USE_COMPLEX)
2831: if (dx < umin && dx >= 0.0) dx = umin;
2832: else if (dx < 0.0 && dx > -umin) dx = -umin;
2833: #else
2834: if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin;
2835: else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2836: #endif
2837: dx *= epsilon;
2838: vscale_array[col] = (PetscScalar)1.0/dx;
2839: } CHKMEMQ;
2840: if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start;
2841: VecRestoreArray(coloring->vscale,&vscale_array);
2842: if (ctype == IS_COLORING_GLOBAL){
2843: VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);
2844: VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);
2845: }
2846: CHKMEMQ;
2847: if (coloring->vscaleforrow) {
2848: vscaleforrow = coloring->vscaleforrow;
2849: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2851: PetscMalloc(bs*sizeof(PetscInt),&srows);
2852: /*
2853: Loop over each color
2854: */
2855: VecGetArray(coloring->vscale,&vscale_array);
2856: for (k=0; k<coloring->ncolors; k++) {
2857: coloring->currentcolor = k;
2858: for (i=0; i<bs; i++) {
2859: VecCopy(x1_tmp,w3);
2860: VecGetArray(w3,&w3_array);
2861: if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2862: /*
2863: Loop over each column associated with color
2864: adding the perturbation to the vector w3.
2865: */
2866: for (l=0; l<coloring->ncolumns[k]; l++) {
2867: col = i + bs*coloring->columns[k][l]; /* local column of the matrix we are probing for */
2868: if (coloring->htype[0] == 'w') {
2869: dx = 1.0 + unorm;
2870: } else {
2871: dx = xx[col];
2872: }
2873: if (dx == (PetscScalar)0.0) dx = 1.0;
2874: #if !defined(PETSC_USE_COMPLEX)
2875: if (dx < umin && dx >= 0.0) dx = umin;
2876: else if (dx < 0.0 && dx > -umin) dx = -umin;
2877: #else
2878: if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin;
2879: else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2880: #endif
2881: dx *= epsilon;
2882: if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2883: w3_array[col] += dx;
2884: }
2885: if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2886: VecRestoreArray(w3,&w3_array);
2888: /*
2889: Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2890: w2 = F(x1 + dx) - F(x1)
2891: */
2892: PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);
2893: (*f)(sctx,w3,w2,fctx);
2894: PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);
2895: VecAXPY(w2,-1.0,w1);
2896:
2897: /*
2898: Loop over rows of vector, putting results into Jacobian matrix
2899: */
2900: VecGetArray(w2,&y);
2901: for (l=0; l<coloring->nrows[k]; l++) {
2902: row = bs*coloring->rows[k][l]; /* local row index */
2903: col = i + bs*coloring->columnsforrow[k][l]; /* global column index */
2904: for (j=0; j<bs; j++) {
2905: y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2906: srows[j] = row + start + j;
2907: }
2908: MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);
2909: }
2910: VecRestoreArray(w2,&y);
2911: }
2912: } /* endof for each color */
2913: if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2914: VecRestoreArray(coloring->vscale,&vscale_array);
2915: VecRestoreArray(x1_tmp,&xx);
2916: PetscFree(srows);
2917:
2918: coloring->currentcolor = -1;
2919: MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
2920: MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
2921: PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);
2922: return(0);
2923: }
2925: /* -------------------------------------------------------------------*/
2926: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2927: MatGetRow_SeqBAIJ,
2928: MatRestoreRow_SeqBAIJ,
2929: MatMult_SeqBAIJ_N,
2930: /* 4*/ MatMultAdd_SeqBAIJ_N,
2931: MatMultTranspose_SeqBAIJ,
2932: MatMultTransposeAdd_SeqBAIJ,
2933: 0,
2934: 0,
2935: 0,
2936: /*10*/ 0,
2937: MatLUFactor_SeqBAIJ,
2938: 0,
2939: 0,
2940: MatTranspose_SeqBAIJ,
2941: /*15*/ MatGetInfo_SeqBAIJ,
2942: MatEqual_SeqBAIJ,
2943: MatGetDiagonal_SeqBAIJ,
2944: MatDiagonalScale_SeqBAIJ,
2945: MatNorm_SeqBAIJ,
2946: /*20*/ 0,
2947: MatAssemblyEnd_SeqBAIJ,
2948: MatSetOption_SeqBAIJ,
2949: MatZeroEntries_SeqBAIJ,
2950: /*24*/ MatZeroRows_SeqBAIJ,
2951: 0,
2952: 0,
2953: 0,
2954: 0,
2955: /*29*/ MatSetUp_SeqBAIJ,
2956: 0,
2957: 0,
2958: MatGetArray_SeqBAIJ,
2959: MatRestoreArray_SeqBAIJ,
2960: /*34*/ MatDuplicate_SeqBAIJ,
2961: 0,
2962: 0,
2963: MatILUFactor_SeqBAIJ,
2964: 0,
2965: /*39*/ MatAXPY_SeqBAIJ,
2966: MatGetSubMatrices_SeqBAIJ,
2967: MatIncreaseOverlap_SeqBAIJ,
2968: MatGetValues_SeqBAIJ,
2969: MatCopy_SeqBAIJ,
2970: /*44*/ 0,
2971: MatScale_SeqBAIJ,
2972: 0,
2973: 0,
2974: MatZeroRowsColumns_SeqBAIJ,
2975: /*49*/ 0,
2976: MatGetRowIJ_SeqBAIJ,
2977: MatRestoreRowIJ_SeqBAIJ,
2978: MatGetColumnIJ_SeqBAIJ,
2979: MatRestoreColumnIJ_SeqBAIJ,
2980: /*54*/ MatFDColoringCreate_SeqAIJ,
2981: 0,
2982: 0,
2983: 0,
2984: MatSetValuesBlocked_SeqBAIJ,
2985: /*59*/ MatGetSubMatrix_SeqBAIJ,
2986: MatDestroy_SeqBAIJ,
2987: MatView_SeqBAIJ,
2988: 0,
2989: 0,
2990: /*64*/ 0,
2991: 0,
2992: 0,
2993: 0,
2994: 0,
2995: /*69*/ MatGetRowMaxAbs_SeqBAIJ,
2996: 0,
2997: MatConvert_Basic,
2998: 0,
2999: 0,
3000: /*74*/ 0,
3001: MatFDColoringApply_BAIJ,
3002: 0,
3003: 0,
3004: 0,
3005: /*79*/ 0,
3006: 0,
3007: 0,
3008: 0,
3009: MatLoad_SeqBAIJ,
3010: /*84*/ 0,
3011: 0,
3012: 0,
3013: 0,
3014: 0,
3015: /*89*/ 0,
3016: 0,
3017: 0,
3018: 0,
3019: 0,
3020: /*94*/ 0,
3021: 0,
3022: 0,
3023: 0,
3024: 0,
3025: /*99*/0,
3026: 0,
3027: 0,
3028: 0,
3029: 0,
3030: /*104*/0,
3031: MatRealPart_SeqBAIJ,
3032: MatImaginaryPart_SeqBAIJ,
3033: 0,
3034: 0,
3035: /*109*/0,
3036: 0,
3037: 0,
3038: 0,
3039: MatMissingDiagonal_SeqBAIJ,
3040: /*114*/0,
3041: 0,
3042: 0,
3043: 0,
3044: 0,
3045: /*119*/0,
3046: 0,
3047: MatMultHermitianTranspose_SeqBAIJ,
3048: MatMultHermitianTransposeAdd_SeqBAIJ,
3049: 0,
3050: /*124*/0,
3051: 0,
3052: MatInvertBlockDiagonal_SeqBAIJ
3053: };
3055: EXTERN_C_BEGIN
3058: PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
3059: {
3060: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
3061: PetscInt nz = aij->i[aij->mbs]*aij->bs2;
3065: if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3067: /* allocate space for values if not already there */
3068: if (!aij->saved_values) {
3069: PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);
3070: PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));
3071: }
3073: /* copy values over */
3074: PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));
3075: return(0);
3076: }
3077: EXTERN_C_END
3079: EXTERN_C_BEGIN
3082: PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
3083: {
3084: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
3086: PetscInt nz = aij->i[aij->mbs]*aij->bs2;
3089: if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3090: if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
3092: /* copy values over */
3093: PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));
3094: return(0);
3095: }
3096: EXTERN_C_END
3098: EXTERN_C_BEGIN
3099: extern PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
3100: extern PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
3101: EXTERN_C_END
3103: EXTERN_C_BEGIN
3106: PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
3107: {
3108: Mat_SeqBAIJ *b;
3110: PetscInt i,mbs,nbs,bs2;
3111: PetscBool flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
3114: if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
3115: if (nz == MAT_SKIP_ALLOCATION) {
3116: skipallocation = PETSC_TRUE;
3117: nz = 0;
3118: }
3120: PetscLayoutSetBlockSize(B->rmap,bs);
3121: PetscLayoutSetBlockSize(B->cmap,bs);
3122: PetscLayoutSetUp(B->rmap);
3123: PetscLayoutSetUp(B->cmap);
3124: PetscLayoutGetBlockSize(B->rmap,&bs);
3126: B->preallocated = PETSC_TRUE;
3128: mbs = B->rmap->n/bs;
3129: nbs = B->cmap->n/bs;
3130: bs2 = bs*bs;
3132: if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
3134: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3135: if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
3136: if (nnz) {
3137: for (i=0; i<mbs; i++) {
3138: if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
3139: if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
3140: }
3141: }
3143: b = (Mat_SeqBAIJ*)B->data;
3144: PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");
3145: PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);
3146: PetscOptionsEnd();
3148: if (!flg) {
3149: switch (bs) {
3150: case 1:
3151: B->ops->mult = MatMult_SeqBAIJ_1;
3152: B->ops->multadd = MatMultAdd_SeqBAIJ_1;
3153: B->ops->sor = MatSOR_SeqBAIJ_1;
3154: break;
3155: case 2:
3156: B->ops->mult = MatMult_SeqBAIJ_2;
3157: B->ops->multadd = MatMultAdd_SeqBAIJ_2;
3158: B->ops->sor = MatSOR_SeqBAIJ_2;
3159: break;
3160: case 3:
3161: B->ops->mult = MatMult_SeqBAIJ_3;
3162: B->ops->multadd = MatMultAdd_SeqBAIJ_3;
3163: B->ops->sor = MatSOR_SeqBAIJ_3;
3164: break;
3165: case 4:
3166: B->ops->mult = MatMult_SeqBAIJ_4;
3167: B->ops->multadd = MatMultAdd_SeqBAIJ_4;
3168: B->ops->sor = MatSOR_SeqBAIJ_4;
3169: break;
3170: case 5:
3171: B->ops->mult = MatMult_SeqBAIJ_5;
3172: B->ops->multadd = MatMultAdd_SeqBAIJ_5;
3173: B->ops->sor = MatSOR_SeqBAIJ_5;
3174: break;
3175: case 6:
3176: B->ops->mult = MatMult_SeqBAIJ_6;
3177: B->ops->multadd = MatMultAdd_SeqBAIJ_6;
3178: B->ops->sor = MatSOR_SeqBAIJ_6;
3179: break;
3180: case 7:
3181: B->ops->mult = MatMult_SeqBAIJ_7;
3182: B->ops->multadd = MatMultAdd_SeqBAIJ_7;
3183: B->ops->sor = MatSOR_SeqBAIJ_7;
3184: break;
3185: case 15:
3186: B->ops->mult = MatMult_SeqBAIJ_15_ver1;
3187: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3188: B->ops->sor = MatSOR_SeqBAIJ_N;
3189: break;
3190: default:
3191: B->ops->mult = MatMult_SeqBAIJ_N;
3192: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3193: B->ops->sor = MatSOR_SeqBAIJ_N;
3194: break;
3195: }
3196: }
3197: b->mbs = mbs;
3198: b->nbs = nbs;
3199: if (!skipallocation) {
3200: if (!b->imax) {
3201: PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);
3202: PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));
3203: b->free_imax_ilen = PETSC_TRUE;
3204: }
3205: /* b->ilen will count nonzeros in each block row so far. */
3206: for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
3207: if (!nnz) {
3208: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3209: else if (nz < 0) nz = 1;
3210: for (i=0; i<mbs; i++) b->imax[i] = nz;
3211: nz = nz*mbs;
3212: } else {
3213: nz = 0;
3214: for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
3215: }
3217: /* allocate the matrix space */
3218: MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);
3219: PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);
3220: PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));
3221: PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));
3222: PetscMemzero(b->j,nz*sizeof(PetscInt));
3223: b->singlemalloc = PETSC_TRUE;
3224: b->i[0] = 0;
3225: for (i=1; i<mbs+1; i++) {
3226: b->i[i] = b->i[i-1] + b->imax[i-1];
3227: }
3228: b->free_a = PETSC_TRUE;
3229: b->free_ij = PETSC_TRUE;
3230: } else {
3231: b->free_a = PETSC_FALSE;
3232: b->free_ij = PETSC_FALSE;
3233: }
3235: b->bs2 = bs2;
3236: b->mbs = mbs;
3237: b->nz = 0;
3238: b->maxnz = nz;
3239: B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3240: if (realalloc) {MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);}
3241: return(0);
3242: }
3243: EXTERN_C_END
3245: EXTERN_C_BEGIN
3248: PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3249: {
3250: PetscInt i,m,nz,nz_max=0,*nnz;
3251: PetscScalar *values=0;
3255: if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3256: PetscLayoutSetBlockSize(B->rmap,bs);
3257: PetscLayoutSetBlockSize(B->cmap,bs);
3258: PetscLayoutSetUp(B->rmap);
3259: PetscLayoutSetUp(B->cmap);
3260: PetscLayoutGetBlockSize(B->rmap,&bs);
3261: m = B->rmap->n/bs;
3263: if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
3264: PetscMalloc((m+1) * sizeof(PetscInt), &nnz);
3265: for(i=0; i<m; i++) {
3266: nz = ii[i+1]- ii[i];
3267: if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
3268: nz_max = PetscMax(nz_max, nz);
3269: nnz[i] = nz;
3270: }
3271: MatSeqBAIJSetPreallocation(B,bs,0,nnz);
3272: PetscFree(nnz);
3274: values = (PetscScalar*)V;
3275: if (!values) {
3276: PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);
3277: PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));
3278: }
3279: for (i=0; i<m; i++) {
3280: PetscInt ncols = ii[i+1] - ii[i];
3281: const PetscInt *icols = jj + ii[i];
3282: const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3283: MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);
3284: }
3285: if (!V) { PetscFree(values); }
3286: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3287: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3288: MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3289: return(0);
3290: }
3291: EXTERN_C_END
3294: EXTERN_C_BEGIN
3295: extern PetscErrorCode MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3296: extern PetscErrorCode MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*);
3297: #if defined(PETSC_HAVE_MUMPS)
3298: extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3299: #endif
3300: extern PetscErrorCode MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
3301: EXTERN_C_END
3303: /*MC
3304: MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3305: block sparse compressed row format.
3307: Options Database Keys:
3308: . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3310: Level: beginner
3312: .seealso: MatCreateSeqBAIJ()
3313: M*/
3315: EXTERN_C_BEGIN
3316: extern PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3317: EXTERN_C_END
3319: EXTERN_C_BEGIN
3322: PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3323: {
3325: PetscMPIInt size;
3326: Mat_SeqBAIJ *b;
3329: MPI_Comm_size(((PetscObject)B)->comm,&size);
3330: if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3332: PetscNewLog(B,Mat_SeqBAIJ,&b);
3333: B->data = (void*)b;
3334: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
3335: b->row = 0;
3336: b->col = 0;
3337: b->icol = 0;
3338: b->reallocs = 0;
3339: b->saved_values = 0;
3341: b->roworiented = PETSC_TRUE;
3342: b->nonew = 0;
3343: b->diag = 0;
3344: b->solve_work = 0;
3345: b->mult_work = 0;
3346: B->spptr = 0;
3347: B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2;
3348: b->keepnonzeropattern = PETSC_FALSE;
3349: b->xtoy = 0;
3350: b->XtoY = 0;
3351: B->same_nonzero = PETSC_FALSE;
3353: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C",
3354: "MatGetFactorAvailable_seqbaij_petsc",
3355: MatGetFactorAvailable_seqbaij_petsc);
3356: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C",
3357: "MatGetFactor_seqbaij_petsc",
3358: MatGetFactor_seqbaij_petsc);
3359: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_bstrm_C",
3360: "MatGetFactor_seqbaij_bstrm",
3361: MatGetFactor_seqbaij_bstrm);
3362: #if defined(PETSC_HAVE_MUMPS)
3363: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);
3364: #endif
3365: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatInvertBlockDiagonal_C",
3366: "MatInvertBlockDiagonal_SeqBAIJ",
3367: MatInvertBlockDiagonal_SeqBAIJ);
3368: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3369: "MatStoreValues_SeqBAIJ",
3370: MatStoreValues_SeqBAIJ);
3371: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3372: "MatRetrieveValues_SeqBAIJ",
3373: MatRetrieveValues_SeqBAIJ);
3374: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
3375: "MatSeqBAIJSetColumnIndices_SeqBAIJ",
3376: MatSeqBAIJSetColumnIndices_SeqBAIJ);
3377: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
3378: "MatConvert_SeqBAIJ_SeqAIJ",
3379: MatConvert_SeqBAIJ_SeqAIJ);
3380: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
3381: "MatConvert_SeqBAIJ_SeqSBAIJ",
3382: MatConvert_SeqBAIJ_SeqSBAIJ);
3383: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
3384: "MatSeqBAIJSetPreallocation_SeqBAIJ",
3385: MatSeqBAIJSetPreallocation_SeqBAIJ);
3386: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
3387: "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
3388: MatSeqBAIJSetPreallocationCSR_SeqBAIJ);
3389: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",
3390: "MatConvert_SeqBAIJ_SeqBSTRM",
3391: MatConvert_SeqBAIJ_SeqBSTRM);
3392: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3393: "MatIsTranspose_SeqBAIJ",
3394: MatIsTranspose_SeqBAIJ);
3395: PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);
3396: return(0);
3397: }
3398: EXTERN_C_END
3402: PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3403: {
3404: Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3406: PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3409: if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3411: if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3412: c->imax = a->imax;
3413: c->ilen = a->ilen;
3414: c->free_imax_ilen = PETSC_FALSE;
3415: } else {
3416: PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);
3417: PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));
3418: for (i=0; i<mbs; i++) {
3419: c->imax[i] = a->imax[i];
3420: c->ilen[i] = a->ilen[i];
3421: }
3422: c->free_imax_ilen = PETSC_TRUE;
3423: }
3425: /* allocate the matrix space */
3426: if (mallocmatspace){
3427: if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3428: PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);
3429: PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));
3430: PetscMemzero(c->a,bs2*nz*sizeof(PetscScalar));
3431: c->i = a->i;
3432: c->j = a->j;
3433: c->singlemalloc = PETSC_FALSE;
3434: c->free_a = PETSC_TRUE;
3435: c->free_ij = PETSC_FALSE;
3436: c->parent = A;
3437: C->preallocated = PETSC_TRUE;
3438: C->assembled = PETSC_TRUE;
3439: PetscObjectReference((PetscObject)A);
3440: MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3441: MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3442: } else {
3443: PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);
3444: PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));
3445: c->singlemalloc = PETSC_TRUE;
3446: c->free_a = PETSC_TRUE;
3447: c->free_ij = PETSC_TRUE;
3448: PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));
3449: if (mbs > 0) {
3450: PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));
3451: if (cpvalues == MAT_COPY_VALUES) {
3452: PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));
3453: } else {
3454: PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));
3455: }
3456: }
3457: C->preallocated = PETSC_TRUE;
3458: C->assembled = PETSC_TRUE;
3459: }
3460: }
3462: c->roworiented = a->roworiented;
3463: c->nonew = a->nonew;
3464: PetscLayoutReference(A->rmap,&C->rmap);
3465: PetscLayoutReference(A->cmap,&C->cmap);
3466: c->bs2 = a->bs2;
3467: c->mbs = a->mbs;
3468: c->nbs = a->nbs;
3470: if (a->diag) {
3471: if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3472: c->diag = a->diag;
3473: c->free_diag = PETSC_FALSE;
3474: } else {
3475: PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);
3476: PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));
3477: for (i=0; i<mbs; i++) {
3478: c->diag[i] = a->diag[i];
3479: }
3480: c->free_diag = PETSC_TRUE;
3481: }
3482: } else c->diag = 0;
3483: c->nz = a->nz;
3484: c->maxnz = a->nz; /* Since we allocate exactly the right amount */
3485: c->solve_work = 0;
3486: c->mult_work = 0;
3488: c->compressedrow.use = a->compressedrow.use;
3489: c->compressedrow.nrows = a->compressedrow.nrows;
3490: c->compressedrow.check = a->compressedrow.check;
3491: if (a->compressedrow.use){
3492: i = a->compressedrow.nrows;
3493: PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);
3494: PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));
3495: PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));
3496: PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));
3497: } else {
3498: c->compressedrow.use = PETSC_FALSE;
3499: c->compressedrow.i = PETSC_NULL;
3500: c->compressedrow.rindex = PETSC_NULL;
3501: }
3502: C->same_nonzero = A->same_nonzero;
3503: PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);
3504: PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));
3505: return(0);
3506: }
3510: PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3511: {
3515: MatCreate(((PetscObject)A)->comm,B);
3516: MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);
3517: MatSetType(*B,MATSEQBAIJ);
3518: MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);
3519: return(0);
3520: }
3524: PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3525: {
3526: Mat_SeqBAIJ *a;
3528: PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1;
3529: PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3530: PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3531: PetscInt *masked,nmask,tmp,bs2,ishift;
3532: PetscMPIInt size;
3533: int fd;
3534: PetscScalar *aa;
3535: MPI_Comm comm = ((PetscObject)viewer)->comm;
3538: PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");
3539: PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);
3540: PetscOptionsEnd();
3541: bs2 = bs*bs;
3543: MPI_Comm_size(comm,&size);
3544: if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3545: PetscViewerBinaryGetDescriptor(viewer,&fd);
3546: PetscBinaryRead(fd,header,4,PETSC_INT);
3547: if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3548: M = header[1]; N = header[2]; nz = header[3];
3550: if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3551: if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3553: /*
3554: This code adds extra rows to make sure the number of rows is
3555: divisible by the blocksize
3556: */
3557: mbs = M/bs;
3558: extra_rows = bs - M + bs*(mbs);
3559: if (extra_rows == bs) extra_rows = 0;
3560: else mbs++;
3561: if (extra_rows) {
3562: PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");
3563: }
3565: /* Set global sizes if not already set */
3566: if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3567: MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
3568: } else { /* Check if the matrix global sizes are correct */
3569: MatGetSize(newmat,&rows,&cols);
3570: if (rows < 0 && cols < 0){ /* user might provide local size instead of global size */
3571: MatGetLocalSize(newmat,&rows,&cols);
3572: }
3573: if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3574: }
3575:
3576: /* read in row lengths */
3577: PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);
3578: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
3579: for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3581: /* read in column indices */
3582: PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);
3583: PetscBinaryRead(fd,jj,nz,PETSC_INT);
3584: for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3586: /* loop over row lengths determining block row lengths */
3587: PetscMalloc(mbs*sizeof(PetscInt),&browlengths);
3588: PetscMemzero(browlengths,mbs*sizeof(PetscInt));
3589: PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);
3590: PetscMemzero(mask,mbs*sizeof(PetscInt));
3591: rowcount = 0;
3592: nzcount = 0;
3593: for (i=0; i<mbs; i++) {
3594: nmask = 0;
3595: for (j=0; j<bs; j++) {
3596: kmax = rowlengths[rowcount];
3597: for (k=0; k<kmax; k++) {
3598: tmp = jj[nzcount++]/bs;
3599: if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3600: }
3601: rowcount++;
3602: }
3603: browlengths[i] += nmask;
3604: /* zero out the mask elements we set */
3605: for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3606: }
3608: /* Do preallocation */
3609: MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);
3610: a = (Mat_SeqBAIJ*)newmat->data;
3612: /* set matrix "i" values */
3613: a->i[0] = 0;
3614: for (i=1; i<= mbs; i++) {
3615: a->i[i] = a->i[i-1] + browlengths[i-1];
3616: a->ilen[i-1] = browlengths[i-1];
3617: }
3618: a->nz = 0;
3619: for (i=0; i<mbs; i++) a->nz += browlengths[i];
3621: /* read in nonzero values */
3622: PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);
3623: PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);
3624: for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3626: /* set "a" and "j" values into matrix */
3627: nzcount = 0; jcount = 0;
3628: for (i=0; i<mbs; i++) {
3629: nzcountb = nzcount;
3630: nmask = 0;
3631: for (j=0; j<bs; j++) {
3632: kmax = rowlengths[i*bs+j];
3633: for (k=0; k<kmax; k++) {
3634: tmp = jj[nzcount++]/bs;
3635: if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3636: }
3637: }
3638: /* sort the masked values */
3639: PetscSortInt(nmask,masked);
3641: /* set "j" values into matrix */
3642: maskcount = 1;
3643: for (j=0; j<nmask; j++) {
3644: a->j[jcount++] = masked[j];
3645: mask[masked[j]] = maskcount++;
3646: }
3647: /* set "a" values into matrix */
3648: ishift = bs2*a->i[i];
3649: for (j=0; j<bs; j++) {
3650: kmax = rowlengths[i*bs+j];
3651: for (k=0; k<kmax; k++) {
3652: tmp = jj[nzcountb]/bs ;
3653: block = mask[tmp] - 1;
3654: point = jj[nzcountb] - bs*tmp;
3655: idx = ishift + bs2*block + j + bs*point;
3656: a->a[idx] = (MatScalar)aa[nzcountb++];
3657: }
3658: }
3659: /* zero out the mask elements we set */
3660: for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3661: }
3662: if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3664: PetscFree(rowlengths);
3665: PetscFree(browlengths);
3666: PetscFree(aa);
3667: PetscFree(jj);
3668: PetscFree2(mask,masked);
3670: MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
3671: MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
3672: MatView_Private(newmat);
3673: return(0);
3674: }
3678: /*@C
3679: MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3680: compressed row) format. For good matrix assembly performance the
3681: user should preallocate the matrix storage by setting the parameter nz
3682: (or the array nnz). By setting these parameters accurately, performance
3683: during matrix assembly can be increased by more than a factor of 50.
3685: Collective on MPI_Comm
3687: Input Parameters:
3688: + comm - MPI communicator, set to PETSC_COMM_SELF
3689: . bs - size of block
3690: . m - number of rows
3691: . n - number of columns
3692: . nz - number of nonzero blocks per block row (same for all rows)
3693: - nnz - array containing the number of nonzero blocks in the various block rows
3694: (possibly different for each block row) or PETSC_NULL
3696: Output Parameter:
3697: . A - the matrix
3699: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3700: MatXXXXSetPreallocation() paradgm instead of this routine directly.
3701: [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3703: Options Database Keys:
3704: . -mat_no_unroll - uses code that does not unroll the loops in the
3705: block calculations (much slower)
3706: . -mat_block_size - size of the blocks to use
3708: Level: intermediate
3710: Notes:
3711: The number of rows and columns must be divisible by blocksize.
3713: If the nnz parameter is given then the nz parameter is ignored
3715: A nonzero block is any block that as 1 or more nonzeros in it
3717: The block AIJ format is fully compatible with standard Fortran 77
3718: storage. That is, the stored row and column indices can begin at
3719: either one (as in Fortran) or zero. See the users' manual for details.
3721: Specify the preallocated storage with either nz or nnz (not both).
3722: Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3723: allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3724: matrices.
3726: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3727: @*/
3728: PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3729: {
3731:
3733: MatCreate(comm,A);
3734: MatSetSizes(*A,m,n,m,n);
3735: MatSetType(*A,MATSEQBAIJ);
3736: MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);
3737: return(0);
3738: }
3742: /*@C
3743: MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3744: per row in the matrix. For good matrix assembly performance the
3745: user should preallocate the matrix storage by setting the parameter nz
3746: (or the array nnz). By setting these parameters accurately, performance
3747: during matrix assembly can be increased by more than a factor of 50.
3749: Collective on MPI_Comm
3751: Input Parameters:
3752: + A - the matrix
3753: . bs - size of block
3754: . nz - number of block nonzeros per block row (same for all rows)
3755: - nnz - array containing the number of block nonzeros in the various block rows
3756: (possibly different for each block row) or PETSC_NULL
3758: Options Database Keys:
3759: . -mat_no_unroll - uses code that does not unroll the loops in the
3760: block calculations (much slower)
3761: . -mat_block_size - size of the blocks to use
3763: Level: intermediate
3765: Notes:
3766: If the nnz parameter is given then the nz parameter is ignored
3768: You can call MatGetInfo() to get information on how effective the preallocation was;
3769: for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3770: You can also run with the option -info and look for messages with the string
3771: malloc in them to see if additional memory allocation was needed.
3773: The block AIJ format is fully compatible with standard Fortran 77
3774: storage. That is, the stored row and column indices can begin at
3775: either one (as in Fortran) or zero. See the users' manual for details.
3777: Specify the preallocated storage with either nz or nnz (not both).
3778: Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3779: allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3781: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3782: @*/
3783: PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3784: {
3791: PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));
3792: return(0);
3793: }
3797: /*@C
3798: MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3799: (the default sequential PETSc format).
3801: Collective on MPI_Comm
3803: Input Parameters:
3804: + A - the matrix
3805: . i - the indices into j for the start of each local row (starts with zero)
3806: . j - the column indices for each local row (starts with zero) these must be sorted for each row
3807: - v - optional values in the matrix
3809: Level: developer
3811: .keywords: matrix, aij, compressed row, sparse
3813: .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3814: @*/
3815: PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3816: {
3823: PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));
3824: return(0);
3825: }
3830: /*@
3831: MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3833: Collective on MPI_Comm
3835: Input Parameters:
3836: + comm - must be an MPI communicator of size 1
3837: . bs - size of block
3838: . m - number of rows
3839: . n - number of columns
3840: . i - row indices
3841: . j - column indices
3842: - a - matrix values
3844: Output Parameter:
3845: . mat - the matrix
3847: Level: advanced
3849: Notes:
3850: The i, j, and a arrays are not copied by this routine, the user must free these arrays
3851: once the matrix is destroyed
3853: You cannot set new nonzero locations into this matrix, that will generate an error.
3855: The i and j indices are 0 based
3857: When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3860: .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3862: @*/
3863: PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3864: {
3866: PetscInt ii;
3867: Mat_SeqBAIJ *baij;
3870: if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3871: if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3872:
3873: MatCreate(comm,mat);
3874: MatSetSizes(*mat,m,n,m,n);
3875: MatSetType(*mat,MATSEQBAIJ);
3876: MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);
3877: baij = (Mat_SeqBAIJ*)(*mat)->data;
3878: PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);
3879: PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));
3881: baij->i = i;
3882: baij->j = j;
3883: baij->a = a;
3884: baij->singlemalloc = PETSC_FALSE;
3885: baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3886: baij->free_a = PETSC_FALSE;
3887: baij->free_ij = PETSC_FALSE;
3889: for (ii=0; ii<m; ii++) {
3890: baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3891: #if defined(PETSC_USE_DEBUG)
3892: if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3893: #endif
3894: }
3895: #if defined(PETSC_USE_DEBUG)
3896: for (ii=0; ii<baij->i[m]; ii++) {
3897: if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3898: if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3899: }
3900: #endif
3902: MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
3903: MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
3904: return(0);
3905: }