Actual source code: baijfact.c
petsc-3.7.3 2016-08-01
2: /*
3: Factorization code for BAIJ format.
4: */
5: #include <../src/mat/impls/baij/seq/baij.h>
6: #include <petsc/private/kernels/blockinvert.h>
10: PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2(Mat B,Mat A,const MatFactorInfo *info)
11: {
12: Mat C =B;
13: Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)C->data;
14: IS isrow = b->row,isicol = b->icol;
16: const PetscInt *r,*ic;
17: PetscInt i,j,k,nz,nzL,row,*pj;
18: const PetscInt n=a->mbs,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,bs2=a->bs2;
19: const PetscInt *ajtmp,*bjtmp,*bdiag=b->diag;
20: MatScalar *rtmp,*pc,*mwork,*pv;
21: MatScalar *aa=a->a,*v;
22: PetscInt flg;
23: PetscReal shift = info->shiftamount;
24: PetscBool allowzeropivot,zeropivotdetected;
27: ISGetIndices(isrow,&r);
28: ISGetIndices(isicol,&ic);
29: allowzeropivot = PetscNot(A->erroriffailure);
31: /* generate work space needed by the factorization */
32: PetscMalloc2(bs2*n,&rtmp,bs2,&mwork);
33: PetscMemzero(rtmp,bs2*n*sizeof(MatScalar));
35: for (i=0; i<n; i++) {
36: /* zero rtmp */
37: /* L part */
38: nz = bi[i+1] - bi[i];
39: bjtmp = bj + bi[i];
40: for (j=0; j<nz; j++) {
41: PetscMemzero(rtmp+bs2*bjtmp[j],bs2*sizeof(MatScalar));
42: }
44: /* U part */
45: nz = bdiag[i] - bdiag[i+1];
46: bjtmp = bj + bdiag[i+1]+1;
47: for (j=0; j<nz; j++) {
48: PetscMemzero(rtmp+bs2*bjtmp[j],bs2*sizeof(MatScalar));
49: }
51: /* load in initial (unfactored row) */
52: nz = ai[r[i]+1] - ai[r[i]];
53: ajtmp = aj + ai[r[i]];
54: v = aa + bs2*ai[r[i]];
55: for (j=0; j<nz; j++) {
56: PetscMemcpy(rtmp+bs2*ic[ajtmp[j]],v+bs2*j,bs2*sizeof(MatScalar));
57: }
59: /* elimination */
60: bjtmp = bj + bi[i];
61: nzL = bi[i+1] - bi[i];
62: for (k=0; k < nzL; k++) {
63: row = bjtmp[k];
64: pc = rtmp + bs2*row;
65: for (flg=0,j=0; j<bs2; j++) {
66: if (pc[j] != (PetscScalar)0.0) {
67: flg = 1;
68: break;
69: }
70: }
71: if (flg) {
72: pv = b->a + bs2*bdiag[row];
73: /* PetscKernel_A_gets_A_times_B(bs,pc,pv,mwork); *pc = *pc * (*pv); */
74: PetscKernel_A_gets_A_times_B_2(pc,pv,mwork);
76: pj = b->j + bdiag[row+1]+1; /* begining of U(row,:) */
77: pv = b->a + bs2*(bdiag[row+1]+1);
78: nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries inU(row,:), excluding diag */
79: for (j=0; j<nz; j++) {
80: /* PetscKernel_A_gets_A_minus_B_times_C(bs,rtmp+bs2*pj[j],pc,pv+bs2*j); */
81: /* rtmp+bs2*pj[j] = rtmp+bs2*pj[j] - (*pc)*(pv+bs2*j) */
82: v = rtmp + 4*pj[j];
83: PetscKernel_A_gets_A_minus_B_times_C_2(v,pc,pv);
84: pv += 4;
85: }
86: PetscLogFlops(16*nz+12); /* flops = 2*bs^3*nz + 2*bs^3 - bs2) */
87: }
88: }
90: /* finished row so stick it into b->a */
91: /* L part */
92: pv = b->a + bs2*bi[i];
93: pj = b->j + bi[i];
94: nz = bi[i+1] - bi[i];
95: for (j=0; j<nz; j++) {
96: PetscMemcpy(pv+bs2*j,rtmp+bs2*pj[j],bs2*sizeof(MatScalar));
97: }
99: /* Mark diagonal and invert diagonal for simplier triangular solves */
100: pv = b->a + bs2*bdiag[i];
101: pj = b->j + bdiag[i];
102: PetscMemcpy(pv,rtmp+bs2*pj[0],bs2*sizeof(MatScalar));
103:
104: PetscKernel_A_gets_inverse_A_2(pv,shift,allowzeropivot,&zeropivotdetected);
105: if (zeropivotdetected) B->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
107: /* U part */
108: pv = b->a + bs2*(bdiag[i+1]+1);
109: pj = b->j + bdiag[i+1]+1;
110: nz = bdiag[i] - bdiag[i+1] - 1;
111: for (j=0; j<nz; j++) {
112: PetscMemcpy(pv+bs2*j,rtmp+bs2*pj[j],bs2*sizeof(MatScalar));
113: }
114: }
116: PetscFree2(rtmp,mwork);
117: ISRestoreIndices(isicol,&ic);
118: ISRestoreIndices(isrow,&r);
120: C->ops->solve = MatSolve_SeqBAIJ_2;
121: C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2;
122: C->assembled = PETSC_TRUE;
124: PetscLogFlops(1.333333333333*2*2*2*n); /* from inverting diagonal blocks */
125: return(0);
126: }
130: PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering(Mat B,Mat A,const MatFactorInfo *info)
131: {
132: Mat C =B;
133: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)C->data;
135: PetscInt i,j,k,nz,nzL,row,*pj;
136: const PetscInt n=a->mbs,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,bs2=a->bs2;
137: const PetscInt *ajtmp,*bjtmp,*bdiag=b->diag;
138: MatScalar *rtmp,*pc,*mwork,*pv;
139: MatScalar *aa=a->a,*v;
140: PetscInt flg;
141: PetscReal shift = info->shiftamount;
142: PetscBool allowzeropivot,zeropivotdetected;
145: allowzeropivot = PetscNot(A->erroriffailure);
147: /* generate work space needed by the factorization */
148: PetscMalloc2(bs2*n,&rtmp,bs2,&mwork);
149: PetscMemzero(rtmp,bs2*n*sizeof(MatScalar));
151: for (i=0; i<n; i++) {
152: /* zero rtmp */
153: /* L part */
154: nz = bi[i+1] - bi[i];
155: bjtmp = bj + bi[i];
156: for (j=0; j<nz; j++) {
157: PetscMemzero(rtmp+bs2*bjtmp[j],bs2*sizeof(MatScalar));
158: }
160: /* U part */
161: nz = bdiag[i] - bdiag[i+1];
162: bjtmp = bj + bdiag[i+1]+1;
163: for (j=0; j<nz; j++) {
164: PetscMemzero(rtmp+bs2*bjtmp[j],bs2*sizeof(MatScalar));
165: }
167: /* load in initial (unfactored row) */
168: nz = ai[i+1] - ai[i];
169: ajtmp = aj + ai[i];
170: v = aa + bs2*ai[i];
171: for (j=0; j<nz; j++) {
172: PetscMemcpy(rtmp+bs2*ajtmp[j],v+bs2*j,bs2*sizeof(MatScalar));
173: }
175: /* elimination */
176: bjtmp = bj + bi[i];
177: nzL = bi[i+1] - bi[i];
178: for (k=0; k < nzL; k++) {
179: row = bjtmp[k];
180: pc = rtmp + bs2*row;
181: for (flg=0,j=0; j<bs2; j++) {
182: if (pc[j]!=(PetscScalar)0.0) {
183: flg = 1;
184: break;
185: }
186: }
187: if (flg) {
188: pv = b->a + bs2*bdiag[row];
189: /* PetscKernel_A_gets_A_times_B(bs,pc,pv,mwork); *pc = *pc * (*pv); */
190: PetscKernel_A_gets_A_times_B_2(pc,pv,mwork);
192: pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */
193: pv = b->a + bs2*(bdiag[row+1]+1);
194: nz = bdiag[row]-bdiag[row+1] - 1; /* num of entries in U(row,:) excluding diag */
195: for (j=0; j<nz; j++) {
196: /* PetscKernel_A_gets_A_minus_B_times_C(bs,rtmp+bs2*pj[j],pc,pv+bs2*j); */
197: /* rtmp+bs2*pj[j] = rtmp+bs2*pj[j] - (*pc)*(pv+bs2*j) */
198: v = rtmp + 4*pj[j];
199: PetscKernel_A_gets_A_minus_B_times_C_2(v,pc,pv);
200: pv += 4;
201: }
202: PetscLogFlops(16*nz+12); /* flops = 2*bs^3*nz + 2*bs^3 - bs2) */
203: }
204: }
206: /* finished row so stick it into b->a */
207: /* L part */
208: pv = b->a + bs2*bi[i];
209: pj = b->j + bi[i];
210: nz = bi[i+1] - bi[i];
211: for (j=0; j<nz; j++) {
212: PetscMemcpy(pv+bs2*j,rtmp+bs2*pj[j],bs2*sizeof(MatScalar));
213: }
215: /* Mark diagonal and invert diagonal for simplier triangular solves */
216: pv = b->a + bs2*bdiag[i];
217: pj = b->j + bdiag[i];
218: PetscMemcpy(pv,rtmp+bs2*pj[0],bs2*sizeof(MatScalar));
219:
220: PetscKernel_A_gets_inverse_A_2(pv,shift,allowzeropivot,&zeropivotdetected);
221: if (zeropivotdetected) B->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
223: /* U part */
224: /*
225: pv = b->a + bs2*bi[2*n-i];
226: pj = b->j + bi[2*n-i];
227: nz = bi[2*n-i+1] - bi[2*n-i] - 1;
228: */
229: pv = b->a + bs2*(bdiag[i+1]+1);
230: pj = b->j + bdiag[i+1]+1;
231: nz = bdiag[i] - bdiag[i+1] - 1;
232: for (j=0; j<nz; j++) {
233: PetscMemcpy(pv+bs2*j,rtmp+bs2*pj[j],bs2*sizeof(MatScalar));
234: }
235: }
236: PetscFree2(rtmp,mwork);
238: C->ops->solve = MatSolve_SeqBAIJ_2_NaturalOrdering;
239: C->ops->forwardsolve = MatForwardSolve_SeqBAIJ_2_NaturalOrdering;
240: C->ops->backwardsolve = MatBackwardSolve_SeqBAIJ_2_NaturalOrdering;
241: C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2_NaturalOrdering;
242: C->assembled = PETSC_TRUE;
244: PetscLogFlops(1.333333333333*2*2*2*n); /* from inverting diagonal blocks */
245: return(0);
246: }
250: PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_inplace(Mat B,Mat A,const MatFactorInfo *info)
251: {
252: Mat C = B;
253: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ*)C->data;
254: IS isrow = b->row,isicol = b->icol;
256: const PetscInt *r,*ic;
257: PetscInt i,j,n = a->mbs,*bi = b->i,*bj = b->j;
258: PetscInt *ajtmpold,*ajtmp,nz,row;
259: PetscInt *diag_offset=b->diag,idx,*ai=a->i,*aj=a->j,*pj;
260: MatScalar *pv,*v,*rtmp,m1,m2,m3,m4,*pc,*w,*x,x1,x2,x3,x4;
261: MatScalar p1,p2,p3,p4;
262: MatScalar *ba = b->a,*aa = a->a;
263: PetscReal shift = info->shiftamount;
264: PetscBool allowzeropivot,zeropivotdetected;
267: allowzeropivot = PetscNot(A->erroriffailure);
268: ISGetIndices(isrow,&r);
269: ISGetIndices(isicol,&ic);
270: PetscMalloc1(4*(n+1),&rtmp);
272: for (i=0; i<n; i++) {
273: nz = bi[i+1] - bi[i];
274: ajtmp = bj + bi[i];
275: for (j=0; j<nz; j++) {
276: x = rtmp+4*ajtmp[j]; x[0] = x[1] = x[2] = x[3] = 0.0;
277: }
278: /* load in initial (unfactored row) */
279: idx = r[i];
280: nz = ai[idx+1] - ai[idx];
281: ajtmpold = aj + ai[idx];
282: v = aa + 4*ai[idx];
283: for (j=0; j<nz; j++) {
284: x = rtmp+4*ic[ajtmpold[j]];
285: x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3];
286: v += 4;
287: }
288: row = *ajtmp++;
289: while (row < i) {
290: pc = rtmp + 4*row;
291: p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3];
292: if (p1 != (PetscScalar)0.0 || p2 != (PetscScalar)0.0 || p3 != (PetscScalar)0.0 || p4 != (PetscScalar)0.0) {
293: pv = ba + 4*diag_offset[row];
294: pj = bj + diag_offset[row] + 1;
295: x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
296: pc[0] = m1 = p1*x1 + p3*x2;
297: pc[1] = m2 = p2*x1 + p4*x2;
298: pc[2] = m3 = p1*x3 + p3*x4;
299: pc[3] = m4 = p2*x3 + p4*x4;
300: nz = bi[row+1] - diag_offset[row] - 1;
301: pv += 4;
302: for (j=0; j<nz; j++) {
303: x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
304: x = rtmp + 4*pj[j];
305: x[0] -= m1*x1 + m3*x2;
306: x[1] -= m2*x1 + m4*x2;
307: x[2] -= m1*x3 + m3*x4;
308: x[3] -= m2*x3 + m4*x4;
309: pv += 4;
310: }
311: PetscLogFlops(16.0*nz+12.0);
312: }
313: row = *ajtmp++;
314: }
315: /* finished row so stick it into b->a */
316: pv = ba + 4*bi[i];
317: pj = bj + bi[i];
318: nz = bi[i+1] - bi[i];
319: for (j=0; j<nz; j++) {
320: x = rtmp+4*pj[j];
321: pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3];
322: pv += 4;
323: }
324: /* invert diagonal block */
325: w = ba + 4*diag_offset[i];
326: PetscKernel_A_gets_inverse_A_2(w,shift,allowzeropivot,&zeropivotdetected);
327: if (zeropivotdetected) C->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
328: }
330: PetscFree(rtmp);
331: ISRestoreIndices(isicol,&ic);
332: ISRestoreIndices(isrow,&r);
334: C->ops->solve = MatSolve_SeqBAIJ_2_inplace;
335: C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2_inplace;
336: C->assembled = PETSC_TRUE;
338: PetscLogFlops(1.333333333333*8*b->mbs); /* from inverting diagonal blocks */
339: return(0);
340: }
341: /*
342: Version for when blocks are 2 by 2 Using natural ordering
343: */
346: PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace(Mat C,Mat A,const MatFactorInfo *info)
347: {
348: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ*)C->data;
350: PetscInt i,j,n = a->mbs,*bi = b->i,*bj = b->j;
351: PetscInt *ajtmpold,*ajtmp,nz,row;
352: PetscInt *diag_offset = b->diag,*ai=a->i,*aj=a->j,*pj;
353: MatScalar *pv,*v,*rtmp,*pc,*w,*x;
354: MatScalar p1,p2,p3,p4,m1,m2,m3,m4,x1,x2,x3,x4;
355: MatScalar *ba = b->a,*aa = a->a;
356: PetscReal shift = info->shiftamount;
357: PetscBool allowzeropivot,zeropivotdetected;
360: allowzeropivot = PetscNot(A->erroriffailure);
361: PetscMalloc1(4*(n+1),&rtmp);
362: for (i=0; i<n; i++) {
363: nz = bi[i+1] - bi[i];
364: ajtmp = bj + bi[i];
365: for (j=0; j<nz; j++) {
366: x = rtmp+4*ajtmp[j];
367: x[0] = x[1] = x[2] = x[3] = 0.0;
368: }
369: /* load in initial (unfactored row) */
370: nz = ai[i+1] - ai[i];
371: ajtmpold = aj + ai[i];
372: v = aa + 4*ai[i];
373: for (j=0; j<nz; j++) {
374: x = rtmp+4*ajtmpold[j];
375: x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3];
376: v += 4;
377: }
378: row = *ajtmp++;
379: while (row < i) {
380: pc = rtmp + 4*row;
381: p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3];
382: if (p1 != (PetscScalar)0.0 || p2 != (PetscScalar)0.0 || p3 != (PetscScalar)0.0 || p4 != (PetscScalar)0.0) {
383: pv = ba + 4*diag_offset[row];
384: pj = bj + diag_offset[row] + 1;
385: x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
386: pc[0] = m1 = p1*x1 + p3*x2;
387: pc[1] = m2 = p2*x1 + p4*x2;
388: pc[2] = m3 = p1*x3 + p3*x4;
389: pc[3] = m4 = p2*x3 + p4*x4;
390: nz = bi[row+1] - diag_offset[row] - 1;
391: pv += 4;
392: for (j=0; j<nz; j++) {
393: x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
394: x = rtmp + 4*pj[j];
395: x[0] -= m1*x1 + m3*x2;
396: x[1] -= m2*x1 + m4*x2;
397: x[2] -= m1*x3 + m3*x4;
398: x[3] -= m2*x3 + m4*x4;
399: pv += 4;
400: }
401: PetscLogFlops(16.0*nz+12.0);
402: }
403: row = *ajtmp++;
404: }
405: /* finished row so stick it into b->a */
406: pv = ba + 4*bi[i];
407: pj = bj + bi[i];
408: nz = bi[i+1] - bi[i];
409: for (j=0; j<nz; j++) {
410: x = rtmp+4*pj[j];
411: pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3];
412: /*
413: printf(" col %d:",pj[j]);
414: PetscInt j1;
415: for (j1=0; j1<4; j1++) printf(" %g,",*(pv+j1));
416: printf("\n");
417: */
418: pv += 4;
419: }
420: /* invert diagonal block */
421: w = ba + 4*diag_offset[i];
422: PetscKernel_A_gets_inverse_A_2(w,shift, allowzeropivot,&zeropivotdetected);
423: if (zeropivotdetected) C->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
424: }
426: PetscFree(rtmp);
428: C->ops->solve = MatSolve_SeqBAIJ_2_NaturalOrdering_inplace;
429: C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2_NaturalOrdering_inplace;
430: C->assembled = PETSC_TRUE;
432: PetscLogFlops(1.333333333333*8*b->mbs); /* from inverting diagonal blocks */
433: return(0);
434: }
436: /* ----------------------------------------------------------- */
437: /*
438: Version for when blocks are 1 by 1.
439: */
442: PetscErrorCode MatLUFactorNumeric_SeqBAIJ_1(Mat B,Mat A,const MatFactorInfo *info)
443: {
444: Mat C =B;
445: Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)C->data;
446: IS isrow = b->row,isicol = b->icol;
447: PetscErrorCode ierr;
448: const PetscInt *r,*ic,*ics;
449: const PetscInt n=a->mbs,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag;
450: PetscInt i,j,k,nz,nzL,row,*pj;
451: const PetscInt *ajtmp,*bjtmp;
452: MatScalar *rtmp,*pc,multiplier,*pv;
453: const MatScalar *aa=a->a,*v;
454: PetscBool row_identity,col_identity;
455: FactorShiftCtx sctx;
456: const PetscInt *ddiag;
457: PetscReal rs;
458: MatScalar d;
461: /* MatPivotSetUp(): initialize shift context sctx */
462: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
464: if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
465: ddiag = a->diag;
466: sctx.shift_top = info->zeropivot;
467: for (i=0; i<n; i++) {
468: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
469: d = (aa)[ddiag[i]];
470: rs = -PetscAbsScalar(d) - PetscRealPart(d);
471: v = aa+ai[i];
472: nz = ai[i+1] - ai[i];
473: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
474: if (rs>sctx.shift_top) sctx.shift_top = rs;
475: }
476: sctx.shift_top *= 1.1;
477: sctx.nshift_max = 5;
478: sctx.shift_lo = 0.;
479: sctx.shift_hi = 1.;
480: }
482: ISGetIndices(isrow,&r);
483: ISGetIndices(isicol,&ic);
484: PetscMalloc1(n+1,&rtmp);
485: ics = ic;
487: do {
488: sctx.newshift = PETSC_FALSE;
489: for (i=0; i<n; i++) {
490: /* zero rtmp */
491: /* L part */
492: nz = bi[i+1] - bi[i];
493: bjtmp = bj + bi[i];
494: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
496: /* U part */
497: nz = bdiag[i]-bdiag[i+1];
498: bjtmp = bj + bdiag[i+1]+1;
499: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
501: /* load in initial (unfactored row) */
502: nz = ai[r[i]+1] - ai[r[i]];
503: ajtmp = aj + ai[r[i]];
504: v = aa + ai[r[i]];
505: for (j=0; j<nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
507: /* ZeropivotApply() */
508: rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */
510: /* elimination */
511: bjtmp = bj + bi[i];
512: row = *bjtmp++;
513: nzL = bi[i+1] - bi[i];
514: for (k=0; k < nzL; k++) {
515: pc = rtmp + row;
516: if (*pc != (PetscScalar)0.0) {
517: pv = b->a + bdiag[row];
518: multiplier = *pc * (*pv);
519: *pc = multiplier;
521: pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */
522: pv = b->a + bdiag[row+1]+1;
523: nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */
524: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
525: PetscLogFlops(2.0*nz);
526: }
527: row = *bjtmp++;
528: }
530: /* finished row so stick it into b->a */
531: rs = 0.0;
532: /* L part */
533: pv = b->a + bi[i];
534: pj = b->j + bi[i];
535: nz = bi[i+1] - bi[i];
536: for (j=0; j<nz; j++) {
537: pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
538: }
540: /* U part */
541: pv = b->a + bdiag[i+1]+1;
542: pj = b->j + bdiag[i+1]+1;
543: nz = bdiag[i] - bdiag[i+1]-1;
544: for (j=0; j<nz; j++) {
545: pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
546: }
548: sctx.rs = rs;
549: sctx.pv = rtmp[i];
550: MatPivotCheck(B,A,info,&sctx,i);
551: if (sctx.newshift) break; /* break for-loop */
552: rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
554: /* Mark diagonal and invert diagonal for simplier triangular solves */
555: pv = b->a + bdiag[i];
556: *pv = (PetscScalar)1.0/rtmp[i];
558: } /* endof for (i=0; i<n; i++) { */
560: /* MatPivotRefine() */
561: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
562: /*
563: * if no shift in this attempt & shifting & started shifting & can refine,
564: * then try lower shift
565: */
566: sctx.shift_hi = sctx.shift_fraction;
567: sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
568: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
569: sctx.newshift = PETSC_TRUE;
570: sctx.nshift++;
571: }
572: } while (sctx.newshift);
574: PetscFree(rtmp);
575: ISRestoreIndices(isicol,&ic);
576: ISRestoreIndices(isrow,&r);
578: ISIdentity(isrow,&row_identity);
579: ISIdentity(isicol,&col_identity);
580: if (row_identity && col_identity) {
581: C->ops->solve = MatSolve_SeqBAIJ_1_NaturalOrdering;
582: C->ops->forwardsolve = MatForwardSolve_SeqBAIJ_1_NaturalOrdering;
583: C->ops->backwardsolve = MatBackwardSolve_SeqBAIJ_1_NaturalOrdering;
584: C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1_NaturalOrdering;
585: } else {
586: C->ops->solve = MatSolve_SeqBAIJ_1;
587: C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1;
588: }
589: C->assembled = PETSC_TRUE;
590: PetscLogFlops(C->cmap->n);
592: /* MatShiftView(A,info,&sctx) */
593: if (sctx.nshift) {
594: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
595: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
596: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
597: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
598: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
599: PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);
600: }
601: }
602: return(0);
603: }
607: PetscErrorCode MatLUFactorNumeric_SeqBAIJ_1_inplace(Mat C,Mat A,const MatFactorInfo *info)
608: {
609: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ*)C->data;
610: IS isrow = b->row,isicol = b->icol;
612: const PetscInt *r,*ic;
613: PetscInt i,j,n = a->mbs,*bi = b->i,*bj = b->j;
614: PetscInt *ajtmpold,*ajtmp,nz,row,*ai = a->i,*aj = a->j;
615: PetscInt *diag_offset = b->diag,diag,*pj;
616: MatScalar *pv,*v,*rtmp,multiplier,*pc;
617: MatScalar *ba = b->a,*aa = a->a;
618: PetscBool row_identity, col_identity;
621: ISGetIndices(isrow,&r);
622: ISGetIndices(isicol,&ic);
623: PetscMalloc1(n+1,&rtmp);
625: for (i=0; i<n; i++) {
626: nz = bi[i+1] - bi[i];
627: ajtmp = bj + bi[i];
628: for (j=0; j<nz; j++) rtmp[ajtmp[j]] = 0.0;
630: /* load in initial (unfactored row) */
631: nz = ai[r[i]+1] - ai[r[i]];
632: ajtmpold = aj + ai[r[i]];
633: v = aa + ai[r[i]];
634: for (j=0; j<nz; j++) rtmp[ic[ajtmpold[j]]] = v[j];
636: row = *ajtmp++;
637: while (row < i) {
638: pc = rtmp + row;
639: if (*pc != 0.0) {
640: pv = ba + diag_offset[row];
641: pj = bj + diag_offset[row] + 1;
642: multiplier = *pc * *pv++;
643: *pc = multiplier;
644: nz = bi[row+1] - diag_offset[row] - 1;
645: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
646: PetscLogFlops(1.0+2.0*nz);
647: }
648: row = *ajtmp++;
649: }
650: /* finished row so stick it into b->a */
651: pv = ba + bi[i];
652: pj = bj + bi[i];
653: nz = bi[i+1] - bi[i];
654: for (j=0; j<nz; j++) pv[j] = rtmp[pj[j]];
655: diag = diag_offset[i] - bi[i];
656: /* check pivot entry for current row */
657: if (pv[diag] == 0.0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot: row in original ordering %D in permuted ordering %D",r[i],i);
658: pv[diag] = 1.0/pv[diag];
659: }
661: PetscFree(rtmp);
662: ISRestoreIndices(isicol,&ic);
663: ISRestoreIndices(isrow,&r);
664: ISIdentity(isrow,&row_identity);
665: ISIdentity(isicol,&col_identity);
666: if (row_identity && col_identity) {
667: C->ops->solve = MatSolve_SeqBAIJ_1_NaturalOrdering_inplace;
668: C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1_NaturalOrdering_inplace;
669: } else {
670: C->ops->solve = MatSolve_SeqBAIJ_1_inplace;
671: C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1_inplace;
672: }
673: C->assembled = PETSC_TRUE;
674: PetscLogFlops(C->cmap->n);
675: return(0);
676: }
680: PETSC_INTERN PetscErrorCode MatGetFactor_seqbaij_petsc(Mat A,const MatFactorType ftype,Mat *B)
681: {
682: PetscInt n = A->rmap->n;
686: #if defined(PETSC_USE_COMPLEX)
687: if (A->hermitian && (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian Factor is not supported");
688: #endif
689: MatCreate(PetscObjectComm((PetscObject)A),B);
690: MatSetSizes(*B,n,n,n,n);
691: if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
692: MatSetType(*B,MATSEQBAIJ);
694: (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqBAIJ;
695: (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqBAIJ;
696: } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
697: MatSetType(*B,MATSEQSBAIJ);
698: MatSeqSBAIJSetPreallocation(*B,A->rmap->bs,MAT_SKIP_ALLOCATION,NULL);
700: (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqBAIJ;
701: (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqBAIJ;
702: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
703: (*B)->factortype = ftype;
705: PetscFree((*B)->solvertype);
706: PetscStrallocpy(MATSOLVERPETSC,&(*B)->solvertype);
707: return(0);
708: }
710: /* ----------------------------------------------------------- */
713: PetscErrorCode MatLUFactor_SeqBAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
714: {
716: Mat C;
719: MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);
720: MatLUFactorSymbolic(C,A,row,col,info);
721: MatLUFactorNumeric(C,A,info);
723: A->ops->solve = C->ops->solve;
724: A->ops->solvetranspose = C->ops->solvetranspose;
726: MatHeaderMerge(A,&C);
727: PetscLogObjectParent((PetscObject)A,(PetscObject)((Mat_SeqBAIJ*)(A->data))->icol);
728: return(0);
729: }
731: #include <../src/mat/impls/sbaij/seq/sbaij.h>
734: PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N(Mat C,Mat A,const MatFactorInfo *info)
735: {
737: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
738: Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data;
739: IS ip=b->row;
740: const PetscInt *rip;
741: PetscInt i,j,mbs=a->mbs,bs=A->rmap->bs,*bi=b->i,*bj=b->j,*bcol;
742: PetscInt *ai=a->i,*aj=a->j;
743: PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
744: MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
745: PetscReal rs;
746: FactorShiftCtx sctx;
749: if (bs > 1) { /* convert A to a SBAIJ matrix and apply Cholesky factorization from it */
750: if (!a->sbaijMat) {
751: MatConvert(A,MATSEQSBAIJ,MAT_INITIAL_MATRIX,&a->sbaijMat);
752: }
753: (a->sbaijMat)->ops->choleskyfactornumeric(C,a->sbaijMat,info);
754: MatDestroy(&a->sbaijMat);
755: return(0);
756: }
758: /* MatPivotSetUp(): initialize shift context sctx */
759: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
761: ISGetIndices(ip,&rip);
762: PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&jl);
764: sctx.shift_amount = 0.;
765: sctx.nshift = 0;
766: do {
767: sctx.newshift = PETSC_FALSE;
768: for (i=0; i<mbs; i++) {
769: rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
770: }
772: for (k = 0; k<mbs; k++) {
773: bval = ba + bi[k];
774: /* initialize k-th row by the perm[k]-th row of A */
775: jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
776: for (j = jmin; j < jmax; j++) {
777: col = rip[aj[j]];
778: if (col >= k) { /* only take upper triangular entry */
779: rtmp[col] = aa[j];
780: *bval++ = 0.0; /* for in-place factorization */
781: }
782: }
784: /* shift the diagonal of the matrix */
785: if (sctx.nshift) rtmp[k] += sctx.shift_amount;
787: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
788: dk = rtmp[k];
789: i = jl[k]; /* first row to be added to k_th row */
791: while (i < k) {
792: nexti = jl[i]; /* next row to be added to k_th row */
794: /* compute multiplier, update diag(k) and U(i,k) */
795: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
796: uikdi = -ba[ili]*ba[bi[i]]; /* diagonal(k) */
797: dk += uikdi*ba[ili];
798: ba[ili] = uikdi; /* -U(i,k) */
800: /* add multiple of row i to k-th row */
801: jmin = ili + 1; jmax = bi[i+1];
802: if (jmin < jmax) {
803: for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
804: /* update il and jl for row i */
805: il[i] = jmin;
806: j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
807: }
808: i = nexti;
809: }
811: /* shift the diagonals when zero pivot is detected */
812: /* compute rs=sum of abs(off-diagonal) */
813: rs = 0.0;
814: jmin = bi[k]+1;
815: nz = bi[k+1] - jmin;
816: if (nz) {
817: bcol = bj + jmin;
818: while (nz--) {
819: rs += PetscAbsScalar(rtmp[*bcol]);
820: bcol++;
821: }
822: }
824: sctx.rs = rs;
825: sctx.pv = dk;
826: MatPivotCheck(C,A,info,&sctx,k);
827: if (sctx.newshift) break;
828: dk = sctx.pv;
830: /* copy data into U(k,:) */
831: ba[bi[k]] = 1.0/dk; /* U(k,k) */
832: jmin = bi[k]+1; jmax = bi[k+1];
833: if (jmin < jmax) {
834: for (j=jmin; j<jmax; j++) {
835: col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
836: }
837: /* add the k-th row into il and jl */
838: il[k] = jmin;
839: i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
840: }
841: }
842: } while (sctx.newshift);
843: PetscFree3(rtmp,il,jl);
845: ISRestoreIndices(ip,&rip);
847: C->assembled = PETSC_TRUE;
848: C->preallocated = PETSC_TRUE;
850: PetscLogFlops(C->rmap->N);
851: if (sctx.nshift) {
852: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
853: PetscInfo2(A,"number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
854: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
855: PetscInfo2(A,"number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
856: }
857: }
858: return(0);
859: }
863: PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering(Mat C,Mat A,const MatFactorInfo *info)
864: {
865: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
866: Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data;
868: PetscInt i,j,am=a->mbs;
869: PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
870: PetscInt k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz;
871: MatScalar *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval;
872: PetscReal rs;
873: FactorShiftCtx sctx;
876: /* MatPivotSetUp(): initialize shift context sctx */
877: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
879: PetscMalloc3(am,&rtmp,am,&il,am,&jl);
881: do {
882: sctx.newshift = PETSC_FALSE;
883: for (i=0; i<am; i++) {
884: rtmp[i] = 0.0; jl[i] = am; il[0] = 0;
885: }
887: for (k = 0; k<am; k++) {
888: /* initialize k-th row with elements nonzero in row perm(k) of A */
889: nz = ai[k+1] - ai[k];
890: acol = aj + ai[k];
891: aval = aa + ai[k];
892: bval = ba + bi[k];
893: while (nz--) {
894: if (*acol < k) { /* skip lower triangular entries */
895: acol++; aval++;
896: } else {
897: rtmp[*acol++] = *aval++;
898: *bval++ = 0.0; /* for in-place factorization */
899: }
900: }
902: /* shift the diagonal of the matrix */
903: if (sctx.nshift) rtmp[k] += sctx.shift_amount;
905: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
906: dk = rtmp[k];
907: i = jl[k]; /* first row to be added to k_th row */
909: while (i < k) {
910: nexti = jl[i]; /* next row to be added to k_th row */
911: /* compute multiplier, update D(k) and U(i,k) */
912: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
913: uikdi = -ba[ili]*ba[bi[i]];
914: dk += uikdi*ba[ili];
915: ba[ili] = uikdi; /* -U(i,k) */
917: /* add multiple of row i to k-th row ... */
918: jmin = ili + 1;
919: nz = bi[i+1] - jmin;
920: if (nz > 0) {
921: bcol = bj + jmin;
922: bval = ba + jmin;
923: while (nz--) rtmp[*bcol++] += uikdi*(*bval++);
924: /* update il and jl for i-th row */
925: il[i] = jmin;
926: j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
927: }
928: i = nexti;
929: }
931: /* shift the diagonals when zero pivot is detected */
932: /* compute rs=sum of abs(off-diagonal) */
933: rs = 0.0;
934: jmin = bi[k]+1;
935: nz = bi[k+1] - jmin;
936: if (nz) {
937: bcol = bj + jmin;
938: while (nz--) {
939: rs += PetscAbsScalar(rtmp[*bcol]);
940: bcol++;
941: }
942: }
944: sctx.rs = rs;
945: sctx.pv = dk;
946: MatPivotCheck(C,A,info,&sctx,k);
947: if (sctx.newshift) break; /* sctx.shift_amount is updated */
948: dk = sctx.pv;
950: /* copy data into U(k,:) */
951: ba[bi[k]] = 1.0/dk;
952: jmin = bi[k]+1;
953: nz = bi[k+1] - jmin;
954: if (nz) {
955: bcol = bj + jmin;
956: bval = ba + jmin;
957: while (nz--) {
958: *bval++ = rtmp[*bcol];
959: rtmp[*bcol++] = 0.0;
960: }
961: /* add k-th row into il and jl */
962: il[k] = jmin;
963: i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
964: }
965: }
966: } while (sctx.newshift);
967: PetscFree3(rtmp,il,jl);
969: C->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
970: C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
971: C->assembled = PETSC_TRUE;
972: C->preallocated = PETSC_TRUE;
974: PetscLogFlops(C->rmap->N);
975: if (sctx.nshift) {
976: if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
977: PetscInfo2(A,"number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
978: } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
979: PetscInfo2(A,"number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
980: }
981: }
982: return(0);
983: }
985: #include <petscbt.h>
986: #include <../src/mat/utils/freespace.h>
989: PetscErrorCode MatICCFactorSymbolic_SeqBAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
990: {
991: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
992: Mat_SeqSBAIJ *b;
993: Mat B;
994: PetscErrorCode ierr;
995: PetscBool perm_identity,missing;
996: PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=a->mbs,bs=A->rmap->bs,*ui;
997: const PetscInt *rip;
998: PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
999: PetscInt nlnk,*lnk,*lnk_lvl=NULL,ncols,ncols_upper,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr;
1000: PetscReal fill =info->fill,levels=info->levels;
1001: PetscFreeSpaceList free_space =NULL,current_space=NULL;
1002: PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
1003: PetscBT lnkbt;
1006: MatMissingDiagonal(A,&missing,&i);
1007: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1009: if (bs > 1) {
1010: if (!a->sbaijMat) {
1011: MatConvert(A,MATSEQSBAIJ,MAT_INITIAL_MATRIX,&a->sbaijMat);
1012: }
1013: (fact)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqSBAIJ; /* undue the change made in MatGetFactor_seqbaij_petsc */
1015: MatICCFactorSymbolic(fact,a->sbaijMat,perm,info);
1016: return(0);
1017: }
1019: ISIdentity(perm,&perm_identity);
1020: ISGetIndices(perm,&rip);
1022: /* special case that simply copies fill pattern */
1023: if (!levels && perm_identity) {
1024: PetscMalloc1(am+1,&ui);
1025: for (i=0; i<am; i++) ui[i] = ai[i+1] - a->diag[i]; /* ui: rowlengths - changes when !perm_identity */
1026: B = fact;
1027: MatSeqSBAIJSetPreallocation(B,1,0,ui);
1030: b = (Mat_SeqSBAIJ*)B->data;
1031: uj = b->j;
1032: for (i=0; i<am; i++) {
1033: aj = a->j + a->diag[i];
1034: for (j=0; j<ui[i]; j++) *uj++ = *aj++;
1035: b->ilen[i] = ui[i];
1036: }
1037: PetscFree(ui);
1039: B->factortype = MAT_FACTOR_NONE;
1041: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1042: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1043: B->factortype = MAT_FACTOR_ICC;
1045: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering;
1046: return(0);
1047: }
1049: /* initialization */
1050: PetscMalloc1(am+1,&ui);
1051: ui[0] = 0;
1052: PetscMalloc1(2*am+1,&cols_lvl);
1054: /* jl: linked list for storing indices of the pivot rows
1055: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1056: PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&il,am,&jl);
1057: for (i=0; i<am; i++) {
1058: jl[i] = am; il[i] = 0;
1059: }
1061: /* create and initialize a linked list for storing column indices of the active row k */
1062: nlnk = am + 1;
1063: PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);
1065: /* initial FreeSpace size is fill*(ai[am]+am)/2 */
1066: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am]/2,am/2)),&free_space);
1068: current_space = free_space;
1070: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am]/2,am/2)),&free_space_lvl);
1071: current_space_lvl = free_space_lvl;
1073: for (k=0; k<am; k++) { /* for each active row k */
1074: /* initialize lnk by the column indices of row rip[k] of A */
1075: nzk = 0;
1076: ncols = ai[rip[k]+1] - ai[rip[k]];
1077: ncols_upper = 0;
1078: cols = cols_lvl + am;
1079: for (j=0; j<ncols; j++) {
1080: i = rip[*(aj + ai[rip[k]] + j)];
1081: if (i >= k) { /* only take upper triangular entry */
1082: cols[ncols_upper] = i;
1083: cols_lvl[ncols_upper] = -1; /* initialize level for nonzero entries */
1084: ncols_upper++;
1085: }
1086: }
1087: PetscIncompleteLLAdd(ncols_upper,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);
1088: nzk += nlnk;
1090: /* update lnk by computing fill-in for each pivot row to be merged in */
1091: prow = jl[k]; /* 1st pivot row */
1093: while (prow < k) {
1094: nextprow = jl[prow];
1096: /* merge prow into k-th row */
1097: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
1098: jmax = ui[prow+1];
1099: ncols = jmax-jmin;
1100: i = jmin - ui[prow];
1101: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1102: for (j=0; j<ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j);
1103: PetscIncompleteLLAddSorted(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);
1104: nzk += nlnk;
1106: /* update il and jl for prow */
1107: if (jmin < jmax) {
1108: il[prow] = jmin;
1110: j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1111: }
1112: prow = nextprow;
1113: }
1115: /* if free space is not available, make more free space */
1116: if (current_space->local_remaining<nzk) {
1117: i = am - k + 1; /* num of unfactored rows */
1118: i = PetscMin(PetscIntMultTruncate(i,nzk), PetscIntMultTruncate(i,i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1119: PetscFreeSpaceGet(i,¤t_space);
1120: PetscFreeSpaceGet(i,¤t_space_lvl);
1121: reallocs++;
1122: }
1124: /* copy data into free_space and free_space_lvl, then initialize lnk */
1125: PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
1127: /* add the k-th row into il and jl */
1128: if (nzk-1 > 0) {
1129: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1130: jl[k] = jl[i]; jl[i] = k;
1131: il[k] = ui[k] + 1;
1132: }
1133: uj_ptr[k] = current_space->array;
1134: uj_lvl_ptr[k] = current_space_lvl->array;
1136: current_space->array += nzk;
1137: current_space->local_used += nzk;
1138: current_space->local_remaining -= nzk;
1140: current_space_lvl->array += nzk;
1141: current_space_lvl->local_used += nzk;
1142: current_space_lvl->local_remaining -= nzk;
1144: ui[k+1] = ui[k] + nzk;
1145: }
1147: ISRestoreIndices(perm,&rip);
1148: PetscFree4(uj_ptr,uj_lvl_ptr,il,jl);
1149: PetscFree(cols_lvl);
1151: /* copy free_space into uj and free free_space; set uj in new datastructure; */
1152: PetscMalloc1(ui[am]+1,&uj);
1153: PetscFreeSpaceContiguous(&free_space,uj);
1154: PetscIncompleteLLDestroy(lnk,lnkbt);
1155: PetscFreeSpaceDestroy(free_space_lvl);
1157: /* put together the new matrix in MATSEQSBAIJ format */
1158: B = fact;
1159: MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,NULL);
1161: b = (Mat_SeqSBAIJ*)B->data;
1162: b->singlemalloc = PETSC_FALSE;
1163: b->free_a = PETSC_TRUE;
1164: b->free_ij = PETSC_TRUE;
1166: PetscMalloc1(ui[am]+1,&b->a);
1168: b->j = uj;
1169: b->i = ui;
1170: b->diag = 0;
1171: b->ilen = 0;
1172: b->imax = 0;
1173: b->row = perm;
1174: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1176: PetscObjectReference((PetscObject)perm);
1178: b->icol = perm;
1180: PetscObjectReference((PetscObject)perm);
1181: PetscMalloc1(am+1,&b->solve_work);
1182: PetscLogObjectMemory((PetscObject)B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
1184: b->maxnz = b->nz = ui[am];
1186: B->info.factor_mallocs = reallocs;
1187: B->info.fill_ratio_given = fill;
1188: if (ai[am] != 0.) {
1189: /* nonzeros in lower triangular part of A (includign diagonals)= (ai[am]+am)/2 */
1190: B->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
1191: } else {
1192: B->info.fill_ratio_needed = 0.0;
1193: }
1194: #if defined(PETSC_USE_INFO)
1195: if (ai[am] != 0) {
1196: PetscReal af = B->info.fill_ratio_needed;
1197: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
1198: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
1199: PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
1200: } else {
1201: PetscInfo(A,"Empty matrix.\n");
1202: }
1203: #endif
1204: if (perm_identity) {
1205: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1206: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1207: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering;
1208: } else {
1209: (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N;
1210: }
1211: return(0);
1212: }
1216: PetscErrorCode MatCholeskyFactorSymbolic_SeqBAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
1217: {
1218: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1219: Mat_SeqSBAIJ *b;
1220: Mat B;
1221: PetscErrorCode ierr;
1222: PetscBool perm_identity,missing;
1223: PetscReal fill = info->fill;
1224: const PetscInt *rip;
1225: PetscInt i,mbs=a->mbs,bs=A->rmap->bs,*ai=a->i,*aj=a->j,reallocs=0,prow;
1226: PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1227: PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1228: PetscFreeSpaceList free_space=NULL,current_space=NULL;
1229: PetscBT lnkbt;
1232: if (bs > 1) { /* convert to seqsbaij */
1233: if (!a->sbaijMat) {
1234: MatConvert(A,MATSEQSBAIJ,MAT_INITIAL_MATRIX,&a->sbaijMat);
1235: }
1236: (fact)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqSBAIJ; /* undue the change made in MatGetFactor_seqbaij_petsc */
1238: MatCholeskyFactorSymbolic(fact,a->sbaijMat,perm,info);
1239: return(0);
1240: }
1242: MatMissingDiagonal(A,&missing,&i);
1243: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1245: /* check whether perm is the identity mapping */
1246: ISIdentity(perm,&perm_identity);
1247: if (!perm_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix reordering is not supported");
1248: ISGetIndices(perm,&rip);
1250: /* initialization */
1251: PetscMalloc1(mbs+1,&ui);
1252: ui[0] = 0;
1254: /* jl: linked list for storing indices of the pivot rows
1255: il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
1256: PetscMalloc4(mbs,&ui_ptr,mbs,&il,mbs,&jl,mbs,&cols);
1257: for (i=0; i<mbs; i++) {
1258: jl[i] = mbs; il[i] = 0;
1259: }
1261: /* create and initialize a linked list for storing column indices of the active row k */
1262: nlnk = mbs + 1;
1263: PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);
1265: /* initial FreeSpace size is fill* (ai[mbs]+mbs)/2 */
1266: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[mbs]/2,mbs/2)),&free_space);
1268: current_space = free_space;
1270: for (k=0; k<mbs; k++) { /* for each active row k */
1271: /* initialize lnk by the column indices of row rip[k] of A */
1272: nzk = 0;
1273: ncols = ai[rip[k]+1] - ai[rip[k]];
1274: ncols_upper = 0;
1275: for (j=0; j<ncols; j++) {
1276: i = rip[*(aj + ai[rip[k]] + j)];
1277: if (i >= k) { /* only take upper triangular entry */
1278: cols[ncols_upper] = i;
1279: ncols_upper++;
1280: }
1281: }
1282: PetscLLAdd(ncols_upper,cols,mbs,nlnk,lnk,lnkbt);
1283: nzk += nlnk;
1285: /* update lnk by computing fill-in for each pivot row to be merged in */
1286: prow = jl[k]; /* 1st pivot row */
1288: while (prow < k) {
1289: nextprow = jl[prow];
1290: /* merge prow into k-th row */
1291: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
1292: jmax = ui[prow+1];
1293: ncols = jmax-jmin;
1294: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
1295: PetscLLAddSorted(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);
1296: nzk += nlnk;
1298: /* update il and jl for prow */
1299: if (jmin < jmax) {
1300: il[prow] = jmin;
1301: j = *uj_ptr;
1302: jl[prow] = jl[j];
1303: jl[j] = prow;
1304: }
1305: prow = nextprow;
1306: }
1308: /* if free space is not available, make more free space */
1309: if (current_space->local_remaining<nzk) {
1310: i = mbs - k + 1; /* num of unfactored rows */
1311: i = PetscMin(PetscIntMultTruncate(i,nzk), PetscIntMultTruncate(i,i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1312: PetscFreeSpaceGet(i,¤t_space);
1313: reallocs++;
1314: }
1316: /* copy data into free space, then initialize lnk */
1317: PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);
1319: /* add the k-th row into il and jl */
1320: if (nzk-1 > 0) {
1321: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
1322: jl[k] = jl[i]; jl[i] = k;
1323: il[k] = ui[k] + 1;
1324: }
1325: ui_ptr[k] = current_space->array;
1326: current_space->array += nzk;
1327: current_space->local_used += nzk;
1328: current_space->local_remaining -= nzk;
1330: ui[k+1] = ui[k] + nzk;
1331: }
1333: ISRestoreIndices(perm,&rip);
1334: PetscFree4(ui_ptr,il,jl,cols);
1336: /* copy free_space into uj and free free_space; set uj in new datastructure; */
1337: PetscMalloc1(ui[mbs]+1,&uj);
1338: PetscFreeSpaceContiguous(&free_space,uj);
1339: PetscLLDestroy(lnk,lnkbt);
1341: /* put together the new matrix in MATSEQSBAIJ format */
1342: B = fact;
1343: MatSeqSBAIJSetPreallocation(B,bs,MAT_SKIP_ALLOCATION,NULL);
1345: b = (Mat_SeqSBAIJ*)B->data;
1346: b->singlemalloc = PETSC_FALSE;
1347: b->free_a = PETSC_TRUE;
1348: b->free_ij = PETSC_TRUE;
1350: PetscMalloc1(ui[mbs]+1,&b->a);
1352: b->j = uj;
1353: b->i = ui;
1354: b->diag = 0;
1355: b->ilen = 0;
1356: b->imax = 0;
1357: b->row = perm;
1358: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1360: PetscObjectReference((PetscObject)perm);
1361: b->icol = perm;
1362: PetscObjectReference((PetscObject)perm);
1363: PetscMalloc1(mbs+1,&b->solve_work);
1364: PetscLogObjectMemory((PetscObject)B,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));
1365: b->maxnz = b->nz = ui[mbs];
1367: B->info.factor_mallocs = reallocs;
1368: B->info.fill_ratio_given = fill;
1369: if (ai[mbs] != 0.) {
1370: /* nonzeros in lower triangular part of A = (ai[mbs]+mbs)/2 */
1371: B->info.fill_ratio_needed = ((PetscReal)2*ui[mbs])/(ai[mbs]+mbs);
1372: } else {
1373: B->info.fill_ratio_needed = 0.0;
1374: }
1375: #if defined(PETSC_USE_INFO)
1376: if (ai[mbs] != 0.) {
1377: PetscReal af = B->info.fill_ratio_needed;
1378: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
1379: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
1380: PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
1381: } else {
1382: PetscInfo(A,"Empty matrix.\n");
1383: }
1384: #endif
1385: if (perm_identity) {
1386: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering;
1387: } else {
1388: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N;
1389: }
1390: return(0);
1391: }
1395: PetscErrorCode MatSolve_SeqBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx)
1396: {
1397: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
1398: PetscErrorCode ierr;
1399: const PetscInt *ai=a->i,*aj=a->j,*adiag=a->diag,*vi;
1400: PetscInt i,k,n=a->mbs;
1401: PetscInt nz,bs=A->rmap->bs,bs2=a->bs2;
1402: const MatScalar *aa=a->a,*v;
1403: PetscScalar *x,*s,*t,*ls;
1404: const PetscScalar *b;
1407: VecGetArrayRead(bb,&b);
1408: VecGetArray(xx,&x);
1409: t = a->solve_work;
1411: /* forward solve the lower triangular */
1412: PetscMemcpy(t,b,bs*sizeof(PetscScalar)); /* copy 1st block of b to t */
1414: for (i=1; i<n; i++) {
1415: v = aa + bs2*ai[i];
1416: vi = aj + ai[i];
1417: nz = ai[i+1] - ai[i];
1418: s = t + bs*i;
1419: PetscMemcpy(s,b+bs*i,bs*sizeof(PetscScalar)); /* copy i_th block of b to t */
1420: for (k=0;k<nz;k++) {
1421: PetscKernel_v_gets_v_minus_A_times_w(bs,s,v,t+bs*vi[k]);
1422: v += bs2;
1423: }
1424: }
1426: /* backward solve the upper triangular */
1427: ls = a->solve_work + A->cmap->n;
1428: for (i=n-1; i>=0; i--) {
1429: v = aa + bs2*(adiag[i+1]+1);
1430: vi = aj + adiag[i+1]+1;
1431: nz = adiag[i] - adiag[i+1]-1;
1432: PetscMemcpy(ls,t+i*bs,bs*sizeof(PetscScalar));
1433: for (k=0; k<nz; k++) {
1434: PetscKernel_v_gets_v_minus_A_times_w(bs,ls,v,t+bs*vi[k]);
1435: v += bs2;
1436: }
1437: PetscKernel_w_gets_A_times_v(bs,ls,aa+bs2*adiag[i],t+i*bs); /* *inv(diagonal[i]) */
1438: PetscMemcpy(x+i*bs,t+i*bs,bs*sizeof(PetscScalar));
1439: }
1441: VecRestoreArrayRead(bb,&b);
1442: VecRestoreArray(xx,&x);
1443: PetscLogFlops(2.0*(a->bs2)*(a->nz) - A->rmap->bs*A->cmap->n);
1444: return(0);
1445: }
1449: PetscErrorCode MatSolve_SeqBAIJ_N(Mat A,Vec bb,Vec xx)
1450: {
1451: Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)A->data;
1452: IS iscol=a->col,isrow=a->row;
1453: PetscErrorCode ierr;
1454: const PetscInt *r,*c,*rout,*cout,*ai=a->i,*aj=a->j,*adiag=a->diag,*vi;
1455: PetscInt i,m,n=a->mbs;
1456: PetscInt nz,bs=A->rmap->bs,bs2=a->bs2;
1457: const MatScalar *aa=a->a,*v;
1458: PetscScalar *x,*s,*t,*ls;
1459: const PetscScalar *b;
1462: VecGetArrayRead(bb,&b);
1463: VecGetArray(xx,&x);
1464: t = a->solve_work;
1466: ISGetIndices(isrow,&rout); r = rout;
1467: ISGetIndices(iscol,&cout); c = cout;
1469: /* forward solve the lower triangular */
1470: PetscMemcpy(t,b+bs*r[0],bs*sizeof(PetscScalar));
1471: for (i=1; i<n; i++) {
1472: v = aa + bs2*ai[i];
1473: vi = aj + ai[i];
1474: nz = ai[i+1] - ai[i];
1475: s = t + bs*i;
1476: PetscMemcpy(s,b+bs*r[i],bs*sizeof(PetscScalar));
1477: for (m=0; m<nz; m++) {
1478: PetscKernel_v_gets_v_minus_A_times_w(bs,s,v,t+bs*vi[m]);
1479: v += bs2;
1480: }
1481: }
1483: /* backward solve the upper triangular */
1484: ls = a->solve_work + A->cmap->n;
1485: for (i=n-1; i>=0; i--) {
1486: v = aa + bs2*(adiag[i+1]+1);
1487: vi = aj + adiag[i+1]+1;
1488: nz = adiag[i] - adiag[i+1] - 1;
1489: PetscMemcpy(ls,t+i*bs,bs*sizeof(PetscScalar));
1490: for (m=0; m<nz; m++) {
1491: PetscKernel_v_gets_v_minus_A_times_w(bs,ls,v,t+bs*vi[m]);
1492: v += bs2;
1493: }
1494: PetscKernel_w_gets_A_times_v(bs,ls,v,t+i*bs); /* *inv(diagonal[i]) */
1495: PetscMemcpy(x + bs*c[i],t+i*bs,bs*sizeof(PetscScalar));
1496: }
1497: ISRestoreIndices(isrow,&rout);
1498: ISRestoreIndices(iscol,&cout);
1499: VecRestoreArrayRead(bb,&b);
1500: VecRestoreArray(xx,&x);
1501: PetscLogFlops(2.0*(a->bs2)*(a->nz) - A->rmap->bs*A->cmap->n);
1502: return(0);
1503: }
1507: /*
1508: For each block in an block array saves the largest absolute value in the block into another array
1509: */
1510: static PetscErrorCode MatBlockAbs_private(PetscInt nbs,PetscInt bs2,PetscScalar *blockarray,PetscReal *absarray)
1511: {
1513: PetscInt i,j;
1516: PetscMemzero(absarray,(nbs+1)*sizeof(PetscReal));
1517: for (i=0; i<nbs; i++) {
1518: for (j=0; j<bs2; j++) {
1519: if (absarray[i] < PetscAbsScalar(blockarray[i*nbs+j])) absarray[i] = PetscAbsScalar(blockarray[i*nbs+j]);
1520: }
1521: }
1522: return(0);
1523: }
1527: /*
1528: This needs to be renamed and called by the regular MatILUFactor_SeqBAIJ when drop tolerance is used
1529: */
1530: PetscErrorCode MatILUDTFactor_SeqBAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
1531: {
1532: Mat B = *fact;
1533: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data,*b;
1534: IS isicol;
1536: const PetscInt *r,*ic;
1537: PetscInt i,mbs=a->mbs,bs=A->rmap->bs,bs2=a->bs2,*ai=a->i,*aj=a->j,*ajtmp,*adiag;
1538: PetscInt *bi,*bj,*bdiag;
1540: PetscInt row,nzi,nzi_bl,nzi_bu,*im,dtcount,nzi_al,nzi_au;
1541: PetscInt nlnk,*lnk;
1542: PetscBT lnkbt;
1543: PetscBool row_identity,icol_identity;
1544: MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,*multiplier,*vtmp;
1545: PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp;
1547: PetscReal dt=info->dt; /* shift=info->shiftamount; */
1548: PetscInt nnz_max;
1549: PetscBool missing;
1550: PetscReal *vtmp_abs;
1551: MatScalar *v_work;
1552: PetscInt *v_pivots;
1553: PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE;
1556: /* ------- symbolic factorization, can be reused ---------*/
1557: MatMissingDiagonal(A,&missing,&i);
1558: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1559: adiag=a->diag;
1561: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1563: /* bdiag is location of diagonal in factor */
1564: PetscMalloc1(mbs+1,&bdiag);
1566: /* allocate row pointers bi */
1567: PetscMalloc1(2*mbs+2,&bi);
1569: /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
1570: dtcount = (PetscInt)info->dtcount;
1571: if (dtcount > mbs-1) dtcount = mbs-1;
1572: nnz_max = ai[mbs]+2*mbs*dtcount +2;
1573: /* printf("MatILUDTFactor_SeqBAIJ, bs %d, ai[mbs] %d, nnz_max %d, dtcount %d\n",bs,ai[mbs],nnz_max,dtcount); */
1574: PetscMalloc1(nnz_max,&bj);
1575: nnz_max = nnz_max*bs2;
1576: PetscMalloc1(nnz_max,&ba);
1578: /* put together the new matrix */
1579: MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,MAT_SKIP_ALLOCATION,NULL);
1580: PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
1582: b = (Mat_SeqBAIJ*)(B)->data;
1583: b->free_a = PETSC_TRUE;
1584: b->free_ij = PETSC_TRUE;
1585: b->singlemalloc = PETSC_FALSE;
1587: b->a = ba;
1588: b->j = bj;
1589: b->i = bi;
1590: b->diag = bdiag;
1591: b->ilen = 0;
1592: b->imax = 0;
1593: b->row = isrow;
1594: b->col = iscol;
1596: PetscObjectReference((PetscObject)isrow);
1597: PetscObjectReference((PetscObject)iscol);
1599: b->icol = isicol;
1600: PetscMalloc1(bs*(mbs+1),&b->solve_work);
1601: PetscLogObjectMemory((PetscObject)B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));
1602: b->maxnz = nnz_max/bs2;
1604: (B)->factortype = MAT_FACTOR_ILUDT;
1605: (B)->info.factor_mallocs = 0;
1606: (B)->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)(ai[mbs]*bs2));
1607: /* ------- end of symbolic factorization ---------*/
1608: ISGetIndices(isrow,&r);
1609: ISGetIndices(isicol,&ic);
1611: /* linked list for storing column indices of the active row */
1612: nlnk = mbs + 1;
1613: PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);
1615: /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
1616: PetscMalloc2(mbs,&im,mbs,&jtmp);
1617: /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
1618: PetscMalloc2(mbs*bs2,&rtmp,mbs*bs2,&vtmp);
1619: PetscMalloc1(mbs+1,&vtmp_abs);
1620: PetscMalloc3(bs,&v_work,bs2,&multiplier,bs,&v_pivots);
1622: allowzeropivot = PetscNot(A->erroriffailure);
1623: bi[0] = 0;
1624: bdiag[0] = (nnz_max/bs2)-1; /* location of diagonal in factor B */
1625: bi[2*mbs+1] = bdiag[0]+1; /* endof bj and ba array */
1626: for (i=0; i<mbs; i++) {
1627: /* copy initial fill into linked list */
1628: nzi = ai[r[i]+1] - ai[r[i]];
1629: if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1630: nzi_al = adiag[r[i]] - ai[r[i]];
1631: nzi_au = ai[r[i]+1] - adiag[r[i]] -1;
1633: /* load in initial unfactored row */
1634: ajtmp = aj + ai[r[i]];
1635: PetscLLAddPerm(nzi,ajtmp,ic,mbs,nlnk,lnk,lnkbt);
1636: PetscMemzero(rtmp,mbs*bs2*sizeof(PetscScalar));
1637: aatmp = a->a + bs2*ai[r[i]];
1638: for (j=0; j<nzi; j++) {
1639: PetscMemcpy(rtmp+bs2*ic[ajtmp[j]],aatmp+bs2*j,bs2*sizeof(MatScalar));
1640: }
1642: /* add pivot rows into linked list */
1643: row = lnk[mbs];
1644: while (row < i) {
1645: nzi_bl = bi[row+1] - bi[row] + 1;
1646: bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */
1647: PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);
1648: nzi += nlnk;
1649: row = lnk[row];
1650: }
1652: /* copy data from lnk into jtmp, then initialize lnk */
1653: PetscLLClean(mbs,mbs,nzi,lnk,jtmp,lnkbt);
1655: /* numerical factorization */
1656: bjtmp = jtmp;
1657: row = *bjtmp++; /* 1st pivot row */
1659: while (row < i) {
1660: pc = rtmp + bs2*row;
1661: pv = ba + bs2*bdiag[row]; /* inv(diag) of the pivot row */
1662: PetscKernel_A_gets_A_times_B(bs,pc,pv,multiplier); /* pc= multiplier = pc*inv(diag[row]) */
1663: MatBlockAbs_private(1,bs2,pc,vtmp_abs);
1664: if (vtmp_abs[0] > dt) { /* apply tolerance dropping rule */
1665: pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
1666: pv = ba + bs2*(bdiag[row+1] + 1);
1667: nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
1668: for (j=0; j<nz; j++) {
1669: PetscKernel_A_gets_A_minus_B_times_C(bs,rtmp+bs2*pj[j],pc,pv+bs2*j);
1670: }
1671: /* PetscLogFlops(bslog*(nz+1.0)-bs); */
1672: }
1673: row = *bjtmp++;
1674: }
1676: /* copy sparse rtmp into contiguous vtmp; separate L and U part */
1677: nzi_bl = 0; j = 0;
1678: while (jtmp[j] < i) { /* L-part. Note: jtmp is sorted */
1679: PetscMemcpy(vtmp+bs2*j,rtmp+bs2*jtmp[j],bs2*sizeof(MatScalar));
1680: nzi_bl++; j++;
1681: }
1682: nzi_bu = nzi - nzi_bl -1;
1684: while (j < nzi) { /* U-part */
1685: PetscMemcpy(vtmp+bs2*j,rtmp+bs2*jtmp[j],bs2*sizeof(MatScalar));
1686: j++;
1687: }
1689: MatBlockAbs_private(nzi,bs2,vtmp,vtmp_abs);
1690:
1691: bjtmp = bj + bi[i];
1692: batmp = ba + bs2*bi[i];
1693: /* apply level dropping rule to L part */
1694: ncut = nzi_al + dtcount;
1695: if (ncut < nzi_bl) {
1696: PetscSortSplitReal(ncut,nzi_bl,vtmp_abs,jtmp);
1697: PetscSortIntWithScalarArray(ncut,jtmp,vtmp);
1698: } else {
1699: ncut = nzi_bl;
1700: }
1701: for (j=0; j<ncut; j++) {
1702: bjtmp[j] = jtmp[j];
1703: PetscMemcpy(batmp+bs2*j,rtmp+bs2*bjtmp[j],bs2*sizeof(MatScalar));
1704: }
1705: bi[i+1] = bi[i] + ncut;
1706: nzi = ncut + 1;
1708: /* apply level dropping rule to U part */
1709: ncut = nzi_au + dtcount;
1710: if (ncut < nzi_bu) {
1711: PetscSortSplitReal(ncut,nzi_bu,vtmp_abs+nzi_bl+1,jtmp+nzi_bl+1);
1712: PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);
1713: } else {
1714: ncut = nzi_bu;
1715: }
1716: nzi += ncut;
1718: /* mark bdiagonal */
1719: bdiag[i+1] = bdiag[i] - (ncut + 1);
1720: bi[2*mbs - i] = bi[2*mbs - i +1] - (ncut + 1);
1722: bjtmp = bj + bdiag[i];
1723: batmp = ba + bs2*bdiag[i];
1724: PetscMemcpy(batmp,rtmp+bs2*i,bs2*sizeof(MatScalar));
1725: *bjtmp = i;
1726:
1727: bjtmp = bj + bdiag[i+1]+1;
1728: batmp = ba + (bdiag[i+1]+1)*bs2;
1730: for (k=0; k<ncut; k++) {
1731: bjtmp[k] = jtmp[nzi_bl+1+k];
1732: PetscMemcpy(batmp+bs2*k,rtmp+bs2*bjtmp[k],bs2*sizeof(MatScalar));
1733: }
1735: im[i] = nzi; /* used by PetscLLAddSortedLU() */
1737: /* invert diagonal block for simplier triangular solves - add shift??? */
1738: batmp = ba + bs2*bdiag[i];
1740: PetscKernel_A_gets_inverse_A(bs,batmp,v_pivots,v_work,allowzeropivot,&zeropivotdetected);
1741: if (zeropivotdetected) B->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1742: } /* for (i=0; i<mbs; i++) */
1743: PetscFree3(v_work,multiplier,v_pivots);
1745: /* printf("end of L %d, beginning of U %d\n",bi[mbs],bdiag[mbs]); */
1746: if (bi[mbs] >= bdiag[mbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[mbs],bdiag[mbs]);
1748: ISRestoreIndices(isrow,&r);
1749: ISRestoreIndices(isicol,&ic);
1751: PetscLLDestroy(lnk,lnkbt);
1753: PetscFree2(im,jtmp);
1754: PetscFree2(rtmp,vtmp);
1756: PetscLogFlops(bs2*B->cmap->n);
1757: b->maxnz = b->nz = bi[mbs] + bdiag[0] - bdiag[mbs];
1759: ISIdentity(isrow,&row_identity);
1760: ISIdentity(isicol,&icol_identity);
1761: if (row_identity && icol_identity) {
1762: B->ops->solve = MatSolve_SeqBAIJ_N_NaturalOrdering;
1763: } else {
1764: B->ops->solve = MatSolve_SeqBAIJ_N;
1765: }
1767: B->ops->solveadd = 0;
1768: B->ops->solvetranspose = 0;
1769: B->ops->solvetransposeadd = 0;
1770: B->ops->matsolve = 0;
1771: B->assembled = PETSC_TRUE;
1772: B->preallocated = PETSC_TRUE;
1773: return(0);
1774: }