Actual source code: sbaijfact.c
2: #include src/mat/impls/baij/seq/baij.h
3: #include src/mat/impls/sbaij/seq/sbaij.h
4: #include src/inline/ilu.h
5: #include include/petscis.h
7: #if !defined(PETSC_USE_COMPLEX)
8: /*
9: input:
10: F -- numeric factor
11: output:
12: nneg, nzero, npos: matrix inertia
13: */
17: PetscErrorCode MatGetInertia_SeqSBAIJ(Mat F,PetscInt *nneig,PetscInt *nzero,PetscInt *npos)
18: {
19: Mat_SeqSBAIJ *fact_ptr = (Mat_SeqSBAIJ*)F->data;
20: PetscScalar *dd = fact_ptr->a;
21: PetscInt mbs=fact_ptr->mbs,bs=F->bs,i,nneig_tmp,npos_tmp,*fi = fact_ptr->i;
24: if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"No support for bs: %D >1 yet",bs);
25: nneig_tmp = 0; npos_tmp = 0;
26: for (i=0; i<mbs; i++){
27: if (PetscRealPart(dd[*fi]) > 0.0){
28: npos_tmp++;
29: } else if (PetscRealPart(dd[*fi]) < 0.0){
30: nneig_tmp++;
31: }
32: fi++;
33: }
34: if (nneig) *nneig = nneig_tmp;
35: if (npos) *npos = npos_tmp;
36: if (nzero) *nzero = mbs - nneig_tmp - npos_tmp;
38: return(0);
39: }
40: #endif /* !defined(PETSC_USE_COMPLEX) */
42: /* Using Modified Sparse Row (MSR) storage.
43: See page 85, "Iterative Methods ..." by Saad. */
44: /*
45: Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP.
46: */
47: /* Use Modified Sparse Row storage for u and ju, see Saad pp.85 */
50: PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *B)
51: {
52: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b;
54: PetscInt *rip,i,mbs = a->mbs,*ai,*aj;
55: PetscInt *jutmp,bs = A->bs,bs2=a->bs2;
56: PetscInt m,reallocs = 0,prow;
57: PetscInt *jl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd;
58: PetscInt *il,ili,nextprow;
59: PetscReal f = info->fill;
60: PetscTruth perm_identity;
63: /* check whether perm is the identity mapping */
64: ISIdentity(perm,&perm_identity);
66: /* -- inplace factorization, i.e., use sbaij for *B -- */
67: if (perm_identity && bs==1 ){
68: if (!perm_identity) a->permute = PETSC_TRUE;
69:
70: ISGetIndices(perm,&rip);
71:
72: if (perm_identity){ /* without permutation */
73: ai = a->i; aj = a->j;
74: } else { /* non-trivial permutation */
75: MatReorderingSeqSBAIJ(A,perm);
76: ai = a->inew; aj = a->jnew;
77: }
78:
79: /* initialization */
80: PetscMalloc((mbs+1)*sizeof(PetscInt),&iu);
81: umax = (PetscInt)(f*ai[mbs] + 1);
82: PetscMalloc(umax*sizeof(PetscInt),&ju);
83: iu[0] = 0;
84: juidx = 0; /* index for ju */
85: PetscMalloc((3*mbs+1)*sizeof(PetscInt),&jl); /* linked list for getting pivot row */
86: q = jl + mbs; /* linked list for col index of active row */
87: il = q + mbs;
88: for (i=0; i<mbs; i++){
89: jl[i] = mbs;
90: q[i] = 0;
91: il[i] = 0;
92: }
94: /* for each row k */
95: for (k=0; k<mbs; k++){
96: nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */
97: q[k] = mbs;
98: /* initialize nonzero structure of k-th row to row rip[k] of A */
99: jmin = ai[rip[k]] +1; /* exclude diag[k] */
100: jmax = ai[rip[k]+1];
101: for (j=jmin; j<jmax; j++){
102: vj = rip[aj[j]]; /* col. value */
103: if(vj > k){
104: qm = k;
105: do {
106: m = qm; qm = q[m];
107: } while(qm < vj);
108: if (qm == vj) {
109: SETERRQ(PETSC_ERR_PLIB,"Duplicate entry in A\n");
110: }
111: nzk++;
112: q[m] = vj;
113: q[vj] = qm;
114: } /* if(vj > k) */
115: } /* for (j=jmin; j<jmax; j++) */
117: /* modify nonzero structure of k-th row by computing fill-in
118: for each row i to be merged in */
119: prow = k;
120: prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */
121:
122: while (prow < k){
123: nextprow = jl[prow];
124:
125: /* merge row prow into k-th row */
126: ili = il[prow];
127: jmin = ili + 1; /* points to 2nd nzero entry in U(prow,k:mbs-1) */
128: jmax = iu[prow+1];
129: qm = k;
130: for (j=jmin; j<jmax; j++){
131: vj = ju[j];
132: do {
133: m = qm; qm = q[m];
134: } while (qm < vj);
135: if (qm != vj){ /* a fill */
136: nzk++; q[m] = vj; q[vj] = qm; qm = vj;
137: }
138: } /* end of for (j=jmin; j<jmax; j++) */
139: if (jmin < jmax){
140: il[prow] = jmin;
141: j = ju[jmin];
142: jl[prow] = jl[j]; jl[j] = prow; /* update jl */
143: }
144: prow = nextprow;
145: }
146:
147: /* update il and jl */
148: if (nzk > 0){
149: i = q[k]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
150: jl[k] = jl[i]; jl[i] = k;
151: il[k] = iu[k] + 1;
152: }
153: iu[k+1] = iu[k] + nzk + 1; /* include diag[k] */
155: /* allocate more space to ju if needed */
156: if (iu[k+1] > umax) {
157: /* estimate how much additional space we will need */
158: /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
159: /* just double the memory each time */
160: maxadd = umax;
161: if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
162: umax += maxadd;
164: /* allocate a longer ju */
165: PetscMalloc(umax*sizeof(PetscInt),&jutmp);
166: PetscMemcpy(jutmp,ju,iu[k]*sizeof(PetscInt));
167: PetscFree(ju);
168: ju = jutmp;
169: reallocs++; /* count how many times we realloc */
170: }
172: /* save nonzero structure of k-th row in ju */
173: ju[juidx++] = k; /* diag[k] */
174: i = k;
175: while (nzk --) {
176: i = q[i];
177: ju[juidx++] = i;
178: }
179: }
181: if (ai[mbs] != 0) {
182: PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
183: PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
184: PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af);
185: PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);\n",af);
186: PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n");
187: } else {
188: PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n");
189: }
191: ISRestoreIndices(perm,&rip);
192: /* PetscFree(q); */
193: PetscFree(jl);
195: /* put together the new matrix */
196: MatCreate(A->comm,bs*mbs,bs*mbs,bs*mbs,bs*mbs,B);
197: MatSetType(*B,A->type_name);
198: MatSeqSBAIJSetPreallocation(*B,bs,0,PETSC_NULL);
200: /* PetscLogObjectParent(*B,iperm); */
201: b = (Mat_SeqSBAIJ*)(*B)->data;
202: PetscFree(b->imax);
203: b->singlemalloc = PETSC_FALSE;
204: /* the next line frees the default space generated by the Create() */
205: PetscFree(b->a);
206: PetscFree(b->ilen);
207: PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);
208: b->j = ju;
209: b->i = iu;
210: b->diag = 0;
211: b->ilen = 0;
212: b->imax = 0;
213: b->row = perm;
214: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
215: PetscObjectReference((PetscObject)perm);
216: b->icol = perm;
217: PetscObjectReference((PetscObject)perm);
218: PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);
219: /* In b structure: Free imax, ilen, old a, old j.
220: Allocate idnew, solve_work, new a, new j */
221: PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));
222: b->maxnz = b->nz = iu[mbs];
223:
224: (*B)->factor = FACTOR_CHOLESKY;
225: (*B)->info.factor_mallocs = reallocs;
226: (*B)->info.fill_ratio_given = f;
227: if (ai[mbs] != 0) {
228: (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
229: } else {
230: (*B)->info.fill_ratio_needed = 0.0;
231: }
234: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
235: (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
236: (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
237: PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n");
238:
239: return(0);
240: }
241: /* ----------- end of new code --------------------*/
244: if (!perm_identity) a->permute = PETSC_TRUE;
245:
246: ISGetIndices(perm,&rip);
247:
248: if (perm_identity){ /* without permutation */
249: ai = a->i; aj = a->j;
250: } else { /* non-trivial permutation */
251: MatReorderingSeqSBAIJ(A,perm);
252: ai = a->inew; aj = a->jnew;
253: }
254:
255: /* initialization */
256: PetscMalloc((mbs+1)*sizeof(PetscInt),&iu);
257: umax = (PetscInt)(f*ai[mbs] + 1); umax += mbs + 1;
258: PetscMalloc(umax*sizeof(PetscInt),&ju);
259: iu[0] = mbs+1;
260: juidx = mbs + 1; /* index for ju */
261: PetscMalloc(2*mbs*sizeof(PetscInt),&jl); /* linked list for pivot row */
262: q = jl + mbs; /* linked list for col index */
263: for (i=0; i<mbs; i++){
264: jl[i] = mbs;
265: q[i] = 0;
266: }
268: /* for each row k */
269: for (k=0; k<mbs; k++){
270: for (i=0; i<mbs; i++) q[i] = 0; /* to be removed! */
271: nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */
272: q[k] = mbs;
273: /* initialize nonzero structure of k-th row to row rip[k] of A */
274: jmin = ai[rip[k]] +1; /* exclude diag[k] */
275: jmax = ai[rip[k]+1];
276: for (j=jmin; j<jmax; j++){
277: vj = rip[aj[j]]; /* col. value */
278: if(vj > k){
279: qm = k;
280: do {
281: m = qm; qm = q[m];
282: } while(qm < vj);
283: if (qm == vj) {
284: SETERRQ(PETSC_ERR_PLIB,"Duplicate entry in A\n");
285: }
286: nzk++;
287: q[m] = vj;
288: q[vj] = qm;
289: } /* if(vj > k) */
290: } /* for (j=jmin; j<jmax; j++) */
292: /* modify nonzero structure of k-th row by computing fill-in
293: for each row i to be merged in */
294: prow = k;
295: prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */
296:
297: while (prow < k){
298: /* merge row prow into k-th row */
299: jmin = iu[prow] + 1; jmax = iu[prow+1];
300: qm = k;
301: for (j=jmin; j<jmax; j++){
302: vj = ju[j];
303: do {
304: m = qm; qm = q[m];
305: } while (qm < vj);
306: if (qm != vj){
307: nzk++; q[m] = vj; q[vj] = qm; qm = vj;
308: }
309: }
310: prow = jl[prow]; /* next pivot row */
311: }
312:
313: /* add k to row list for first nonzero element in k-th row */
314: if (nzk > 0){
315: i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */
316: jl[k] = jl[i]; jl[i] = k;
317: }
318: iu[k+1] = iu[k] + nzk;
320: /* allocate more space to ju if needed */
321: if (iu[k+1] > umax) {
322: /* estimate how much additional space we will need */
323: /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
324: /* just double the memory each time */
325: maxadd = umax;
326: if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
327: umax += maxadd;
329: /* allocate a longer ju */
330: PetscMalloc(umax*sizeof(PetscInt),&jutmp);
331: PetscMemcpy(jutmp,ju,iu[k]*sizeof(PetscInt));
332: PetscFree(ju);
333: ju = jutmp;
334: reallocs++; /* count how many times we realloc */
335: }
337: /* save nonzero structure of k-th row in ju */
338: i=k;
339: while (nzk --) {
340: i = q[i];
341: ju[juidx++] = i;
342: }
343: }
345: if (ai[mbs] != 0) {
346: PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
347: PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
348: PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af);
349: PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);\n",af);
350: PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n");
351: } else {
352: PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n");
353: }
355: ISRestoreIndices(perm,&rip);
356: /* PetscFree(q); */
357: PetscFree(jl);
359: /* put together the new matrix */
360: MatCreate(A->comm,bs*mbs,bs*mbs,bs*mbs,bs*mbs,B);
361: MatSetType(*B,A->type_name);
362: MatSeqSBAIJSetPreallocation(*B,bs,0,PETSC_NULL);
364: /* PetscLogObjectParent(*B,iperm); */
365: b = (Mat_SeqSBAIJ*)(*B)->data;
366: PetscFree(b->imax);
367: b->singlemalloc = PETSC_FALSE;
368: /* the next line frees the default space generated by the Create() */
369: PetscFree(b->a);
370: PetscFree(b->ilen);
371: PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);
372: b->j = ju;
373: b->i = iu;
374: b->diag = 0;
375: b->ilen = 0;
376: b->imax = 0;
377: b->row = perm;
378: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
379: PetscObjectReference((PetscObject)perm);
380: b->icol = perm;
381: PetscObjectReference((PetscObject)perm);
382: PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);
383: /* In b structure: Free imax, ilen, old a, old j.
384: Allocate idnew, solve_work, new a, new j */
385: PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));
386: b->maxnz = b->nz = iu[mbs];
387:
388: (*B)->factor = FACTOR_CHOLESKY;
389: (*B)->info.factor_mallocs = reallocs;
390: (*B)->info.fill_ratio_given = f;
391: if (ai[mbs] != 0) {
392: (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
393: } else {
394: (*B)->info.fill_ratio_needed = 0.0;
395: }
397: if (perm_identity){
398: switch (bs) {
399: case 1:
400: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
401: (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
402: PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n");
403: break;
404: case 2:
405: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
406: (*B)->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering;
407: PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
408: break;
409: case 3:
410: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
411: (*B)->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering;
412: PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n");
413: break;
414: case 4:
415: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
416: (*B)->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering;
417: PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
418: break;
419: case 5:
420: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
421: (*B)->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering;
422: PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
423: break;
424: case 6:
425: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
426: (*B)->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering;
427: PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
428: break;
429: case 7:
430: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
431: (*B)->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering;
432: PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
433: break;
434: default:
435: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering;
436: (*B)->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering;
437: PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n");
438: break;
439: }
440: }
442: return(0);
443: }
448: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat A,Mat *B)
449: {
450: Mat C = *B;
451: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
452: IS perm = b->row;
454: PetscInt *perm_ptr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
455: PetscInt *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
456: PetscInt bs=A->bs,bs2 = a->bs2;
457: MatScalar *ba = b->a,*aa,*ap,*dk,*uik;
458: MatScalar *u,*diag,*rtmp,*rtmp_ptr;
459: MatScalar *work;
460: PetscInt *pivots;
463: /* initialization */
464: PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);
465: PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));
466: PetscMalloc(2*mbs*sizeof(PetscInt),&il);
467: jl = il + mbs;
468: for (i=0; i<mbs; i++) {
469: jl[i] = mbs; il[0] = 0;
470: }
471: PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);
472: uik = dk + bs2;
473: work = uik + bs2;
474: PetscMalloc(bs*sizeof(PetscInt),&pivots);
475:
476: ISGetIndices(perm,&perm_ptr);
477:
478: /* check permutation */
479: if (!a->permute){
480: ai = a->i; aj = a->j; aa = a->a;
481: } else {
482: ai = a->inew; aj = a->jnew;
483: PetscMalloc(bs2*ai[mbs]*sizeof(MatScalar),&aa);
484: PetscMemcpy(aa,a->a,bs2*ai[mbs]*sizeof(MatScalar));
485: PetscMalloc(ai[mbs]*sizeof(PetscInt),&a2anew);
486: PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(PetscInt));
488: for (i=0; i<mbs; i++){
489: jmin = ai[i]; jmax = ai[i+1];
490: for (j=jmin; j<jmax; j++){
491: while (a2anew[j] != j){
492: k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k;
493: for (k1=0; k1<bs2; k1++){
494: dk[k1] = aa[k*bs2+k1];
495: aa[k*bs2+k1] = aa[j*bs2+k1];
496: aa[j*bs2+k1] = dk[k1];
497: }
498: }
499: /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
500: if (i > aj[j]){
501: /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */
502: ap = aa + j*bs2; /* ptr to the beginning of j-th block of aa */
503: for (k=0; k<bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */
504: for (k=0; k<bs; k++){ /* j-th block of aa <- dk^T */
505: for (k1=0; k1<bs; k1++) *ap++ = dk[k + bs*k1];
506: }
507: }
508: }
509: }
510: PetscFree(a2anew);
511: }
512:
513: /* for each row k */
514: for (k = 0; k<mbs; k++){
516: /*initialize k-th row with elements nonzero in row perm(k) of A */
517: jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1];
518:
519: ap = aa + jmin*bs2;
520: for (j = jmin; j < jmax; j++){
521: vj = perm_ptr[aj[j]]; /* block col. index */
522: rtmp_ptr = rtmp + vj*bs2;
523: for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++;
524: }
526: /* modify k-th row by adding in those rows i with U(i,k) != 0 */
527: PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));
528: i = jl[k]; /* first row to be added to k_th row */
530: while (i < k){
531: nexti = jl[i]; /* next row to be added to k_th row */
533: /* compute multiplier */
534: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
536: /* uik = -inv(Di)*U_bar(i,k) */
537: diag = ba + i*bs2;
538: u = ba + ili*bs2;
539: PetscMemzero(uik,bs2*sizeof(MatScalar));
540: Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u);
541:
542: /* update D(k) += -U(i,k)^T * U_bar(i,k) */
543: Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u);
544:
545: /* update -U(i,k) */
546: PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));
548: /* add multiple of row i to k-th row ... */
549: jmin = ili + 1; jmax = bi[i+1];
550: if (jmin < jmax){
551: for (j=jmin; j<jmax; j++) {
552: /* rtmp += -U(i,k)^T * U_bar(i,j) */
553: rtmp_ptr = rtmp + bj[j]*bs2;
554: u = ba + j*bs2;
555: Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u);
556: }
557:
558: /* ... add i to row list for next nonzero entry */
559: il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
560: j = bj[jmin];
561: jl[i] = jl[j]; jl[j] = i; /* update jl */
562: }
563: i = nexti;
564: }
566: /* save nonzero entries in k-th row of U ... */
568: /* invert diagonal block */
569: diag = ba+k*bs2;
570: PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));
571: Kernel_A_gets_inverse_A(bs,diag,pivots,work);
572:
573: jmin = bi[k]; jmax = bi[k+1];
574: if (jmin < jmax) {
575: for (j=jmin; j<jmax; j++){
576: vj = bj[j]; /* block col. index of U */
577: u = ba + j*bs2;
578: rtmp_ptr = rtmp + vj*bs2;
579: for (k1=0; k1<bs2; k1++){
580: *u++ = *rtmp_ptr;
581: *rtmp_ptr++ = 0.0;
582: }
583: }
584:
585: /* ... add k to row list for first nonzero entry in k-th row */
586: il[k] = jmin;
587: i = bj[jmin];
588: jl[k] = jl[i]; jl[i] = k;
589: }
590: }
592: PetscFree(rtmp);
593: PetscFree(il);
594: PetscFree(dk);
595: PetscFree(pivots);
596: if (a->permute){
597: PetscFree(aa);
598: }
600: ISRestoreIndices(perm,&perm_ptr);
601: C->factor = FACTOR_CHOLESKY;
602: C->assembled = PETSC_TRUE;
603: C->preallocated = PETSC_TRUE;
604: PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */
605: return(0);
606: }
610: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat A,Mat *B)
611: {
612: Mat C = *B;
613: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
615: PetscInt i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
616: PetscInt *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
617: PetscInt bs=A->bs,bs2 = a->bs2;
618: MatScalar *ba = b->a,*aa,*ap,*dk,*uik;
619: MatScalar *u,*diag,*rtmp,*rtmp_ptr;
620: MatScalar *work;
621: PetscInt *pivots;
624: /* initialization */
625:
626: PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);
627: PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));
628: PetscMalloc(2*mbs*sizeof(PetscInt),&il);
629: jl = il + mbs;
630: for (i=0; i<mbs; i++) {
631: jl[i] = mbs; il[0] = 0;
632: }
633: PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);
634: uik = dk + bs2;
635: work = uik + bs2;
636: PetscMalloc(bs*sizeof(PetscInt),&pivots);
637:
638: ai = a->i; aj = a->j; aa = a->a;
639:
640: /* for each row k */
641: for (k = 0; k<mbs; k++){
643: /*initialize k-th row with elements nonzero in row k of A */
644: jmin = ai[k]; jmax = ai[k+1];
645: ap = aa + jmin*bs2;
646: for (j = jmin; j < jmax; j++){
647: vj = aj[j]; /* block col. index */
648: rtmp_ptr = rtmp + vj*bs2;
649: for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++;
650: }
652: /* modify k-th row by adding in those rows i with U(i,k) != 0 */
653: PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));
654: i = jl[k]; /* first row to be added to k_th row */
656: while (i < k){
657: nexti = jl[i]; /* next row to be added to k_th row */
659: /* compute multiplier */
660: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
662: /* uik = -inv(Di)*U_bar(i,k) */
663: diag = ba + i*bs2;
664: u = ba + ili*bs2;
665: PetscMemzero(uik,bs2*sizeof(MatScalar));
666: Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u);
667:
668: /* update D(k) += -U(i,k)^T * U_bar(i,k) */
669: Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u);
670:
671: /* update -U(i,k) */
672: PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));
674: /* add multiple of row i to k-th row ... */
675: jmin = ili + 1; jmax = bi[i+1];
676: if (jmin < jmax){
677: for (j=jmin; j<jmax; j++) {
678: /* rtmp += -U(i,k)^T * U_bar(i,j) */
679: rtmp_ptr = rtmp + bj[j]*bs2;
680: u = ba + j*bs2;
681: Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u);
682: }
683:
684: /* ... add i to row list for next nonzero entry */
685: il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
686: j = bj[jmin];
687: jl[i] = jl[j]; jl[j] = i; /* update jl */
688: }
689: i = nexti;
690: }
692: /* save nonzero entries in k-th row of U ... */
694: /* invert diagonal block */
695: diag = ba+k*bs2;
696: PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));
697: Kernel_A_gets_inverse_A(bs,diag,pivots,work);
698:
699: jmin = bi[k]; jmax = bi[k+1];
700: if (jmin < jmax) {
701: for (j=jmin; j<jmax; j++){
702: vj = bj[j]; /* block col. index of U */
703: u = ba + j*bs2;
704: rtmp_ptr = rtmp + vj*bs2;
705: for (k1=0; k1<bs2; k1++){
706: *u++ = *rtmp_ptr;
707: *rtmp_ptr++ = 0.0;
708: }
709: }
710:
711: /* ... add k to row list for first nonzero entry in k-th row */
712: il[k] = jmin;
713: i = bj[jmin];
714: jl[k] = jl[i]; jl[i] = k;
715: }
716: }
718: PetscFree(rtmp);
719: PetscFree(il);
720: PetscFree(dk);
721: PetscFree(pivots);
723: C->factor = FACTOR_CHOLESKY;
724: C->assembled = PETSC_TRUE;
725: C->preallocated = PETSC_TRUE;
726: PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */
727: return(0);
728: }
730: /*
731: Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
732: Version for blocks 2 by 2.
733: */
736: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat A,Mat *B)
737: {
738: Mat C = *B;
739: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
740: IS perm = b->row;
742: PetscInt *perm_ptr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
743: PetscInt *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
744: MatScalar *ba = b->a,*aa,*ap,*dk,*uik;
745: MatScalar *u,*diag,*rtmp,*rtmp_ptr;
748: /* initialization */
749: /* il and jl record the first nonzero element in each row of the accessing
750: window U(0:k, k:mbs-1).
751: jl: list of rows to be added to uneliminated rows
752: i>= k: jl(i) is the first row to be added to row i
753: i< k: jl(i) is the row following row i in some list of rows
754: jl(i) = mbs indicates the end of a list
755: il(i): points to the first nonzero element in columns k,...,mbs-1 of
756: row i of U */
757: PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);
758: PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));
759: PetscMalloc(2*mbs*sizeof(PetscInt),&il);
760: jl = il + mbs;
761: for (i=0; i<mbs; i++) {
762: jl[i] = mbs; il[0] = 0;
763: }
764: PetscMalloc(8*sizeof(MatScalar),&dk);
765: uik = dk + 4;
766: ISGetIndices(perm,&perm_ptr);
768: /* check permutation */
769: if (!a->permute){
770: ai = a->i; aj = a->j; aa = a->a;
771: } else {
772: ai = a->inew; aj = a->jnew;
773: PetscMalloc(4*ai[mbs]*sizeof(MatScalar),&aa);
774: PetscMemcpy(aa,a->a,4*ai[mbs]*sizeof(MatScalar));
775: PetscMalloc(ai[mbs]*sizeof(PetscInt),&a2anew);
776: PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(PetscInt));
778: for (i=0; i<mbs; i++){
779: jmin = ai[i]; jmax = ai[i+1];
780: for (j=jmin; j<jmax; j++){
781: while (a2anew[j] != j){
782: k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k;
783: for (k1=0; k1<4; k1++){
784: dk[k1] = aa[k*4+k1];
785: aa[k*4+k1] = aa[j*4+k1];
786: aa[j*4+k1] = dk[k1];
787: }
788: }
789: /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
790: if (i > aj[j]){
791: /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */
792: ap = aa + j*4; /* ptr to the beginning of the block */
793: dk[1] = ap[1]; /* swap ap[1] and ap[2] */
794: ap[1] = ap[2];
795: ap[2] = dk[1];
796: }
797: }
798: }
799: PetscFree(a2anew);
800: }
802: /* for each row k */
803: for (k = 0; k<mbs; k++){
805: /*initialize k-th row with elements nonzero in row perm(k) of A */
806: jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1];
807: ap = aa + jmin*4;
808: for (j = jmin; j < jmax; j++){
809: vj = perm_ptr[aj[j]]; /* block col. index */
810: rtmp_ptr = rtmp + vj*4;
811: for (i=0; i<4; i++) *rtmp_ptr++ = *ap++;
812: }
814: /* modify k-th row by adding in those rows i with U(i,k) != 0 */
815: PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));
816: i = jl[k]; /* first row to be added to k_th row */
818: while (i < k){
819: nexti = jl[i]; /* next row to be added to k_th row */
821: /* compute multiplier */
822: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
824: /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
825: diag = ba + i*4;
826: u = ba + ili*4;
827: uik[0] = -(diag[0]*u[0] + diag[2]*u[1]);
828: uik[1] = -(diag[1]*u[0] + diag[3]*u[1]);
829: uik[2] = -(diag[0]*u[2] + diag[2]*u[3]);
830: uik[3] = -(diag[1]*u[2] + diag[3]*u[3]);
831:
832: /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
833: dk[0] += uik[0]*u[0] + uik[1]*u[1];
834: dk[1] += uik[2]*u[0] + uik[3]*u[1];
835: dk[2] += uik[0]*u[2] + uik[1]*u[3];
836: dk[3] += uik[2]*u[2] + uik[3]*u[3];
838: /* update -U(i,k): ba[ili] = uik */
839: PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));
841: /* add multiple of row i to k-th row ... */
842: jmin = ili + 1; jmax = bi[i+1];
843: if (jmin < jmax){
844: for (j=jmin; j<jmax; j++) {
845: /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
846: rtmp_ptr = rtmp + bj[j]*4;
847: u = ba + j*4;
848: rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1];
849: rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1];
850: rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3];
851: rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3];
852: }
853:
854: /* ... add i to row list for next nonzero entry */
855: il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
856: j = bj[jmin];
857: jl[i] = jl[j]; jl[j] = i; /* update jl */
858: }
859: i = nexti;
860: }
862: /* save nonzero entries in k-th row of U ... */
864: /* invert diagonal block */
865: diag = ba+k*4;
866: PetscMemcpy(diag,dk,4*sizeof(MatScalar));
867: Kernel_A_gets_inverse_A_2(diag);
868:
869: jmin = bi[k]; jmax = bi[k+1];
870: if (jmin < jmax) {
871: for (j=jmin; j<jmax; j++){
872: vj = bj[j]; /* block col. index of U */
873: u = ba + j*4;
874: rtmp_ptr = rtmp + vj*4;
875: for (k1=0; k1<4; k1++){
876: *u++ = *rtmp_ptr;
877: *rtmp_ptr++ = 0.0;
878: }
879: }
880:
881: /* ... add k to row list for first nonzero entry in k-th row */
882: il[k] = jmin;
883: i = bj[jmin];
884: jl[k] = jl[i]; jl[i] = k;
885: }
886: }
888: PetscFree(rtmp);
889: PetscFree(il);
890: PetscFree(dk);
891: if (a->permute) {
892: PetscFree(aa);
893: }
894: ISRestoreIndices(perm,&perm_ptr);
895: C->factor = FACTOR_CHOLESKY;
896: C->assembled = PETSC_TRUE;
897: C->preallocated = PETSC_TRUE;
898: PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
899: return(0);
900: }
902: /*
903: Version for when blocks are 2 by 2 Using natural ordering
904: */
907: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat A,Mat *B)
908: {
909: Mat C = *B;
910: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
912: PetscInt i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
913: PetscInt *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
914: MatScalar *ba = b->a,*aa,*ap,*dk,*uik;
915: MatScalar *u,*diag,*rtmp,*rtmp_ptr;
918: /* initialization */
919: /* il and jl record the first nonzero element in each row of the accessing
920: window U(0:k, k:mbs-1).
921: jl: list of rows to be added to uneliminated rows
922: i>= k: jl(i) is the first row to be added to row i
923: i< k: jl(i) is the row following row i in some list of rows
924: jl(i) = mbs indicates the end of a list
925: il(i): points to the first nonzero element in columns k,...,mbs-1 of
926: row i of U */
927: PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);
928: PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));
929: PetscMalloc(2*mbs*sizeof(PetscInt),&il);
930: jl = il + mbs;
931: for (i=0; i<mbs; i++) {
932: jl[i] = mbs; il[0] = 0;
933: }
934: PetscMalloc(8*sizeof(MatScalar),&dk);
935: uik = dk + 4;
936:
937: ai = a->i; aj = a->j; aa = a->a;
939: /* for each row k */
940: for (k = 0; k<mbs; k++){
942: /*initialize k-th row with elements nonzero in row k of A */
943: jmin = ai[k]; jmax = ai[k+1];
944: ap = aa + jmin*4;
945: for (j = jmin; j < jmax; j++){
946: vj = aj[j]; /* block col. index */
947: rtmp_ptr = rtmp + vj*4;
948: for (i=0; i<4; i++) *rtmp_ptr++ = *ap++;
949: }
950:
951: /* modify k-th row by adding in those rows i with U(i,k) != 0 */
952: PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));
953: i = jl[k]; /* first row to be added to k_th row */
955: while (i < k){
956: nexti = jl[i]; /* next row to be added to k_th row */
958: /* compute multiplier */
959: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
961: /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
962: diag = ba + i*4;
963: u = ba + ili*4;
964: uik[0] = -(diag[0]*u[0] + diag[2]*u[1]);
965: uik[1] = -(diag[1]*u[0] + diag[3]*u[1]);
966: uik[2] = -(diag[0]*u[2] + diag[2]*u[3]);
967: uik[3] = -(diag[1]*u[2] + diag[3]*u[3]);
968:
969: /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
970: dk[0] += uik[0]*u[0] + uik[1]*u[1];
971: dk[1] += uik[2]*u[0] + uik[3]*u[1];
972: dk[2] += uik[0]*u[2] + uik[1]*u[3];
973: dk[3] += uik[2]*u[2] + uik[3]*u[3];
975: /* update -U(i,k): ba[ili] = uik */
976: PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));
978: /* add multiple of row i to k-th row ... */
979: jmin = ili + 1; jmax = bi[i+1];
980: if (jmin < jmax){
981: for (j=jmin; j<jmax; j++) {
982: /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
983: rtmp_ptr = rtmp + bj[j]*4;
984: u = ba + j*4;
985: rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1];
986: rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1];
987: rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3];
988: rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3];
989: }
990:
991: /* ... add i to row list for next nonzero entry */
992: il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
993: j = bj[jmin];
994: jl[i] = jl[j]; jl[j] = i; /* update jl */
995: }
996: i = nexti;
997: }
999: /* save nonzero entries in k-th row of U ... */
1001: /* invert diagonal block */
1002: diag = ba+k*4;
1003: PetscMemcpy(diag,dk,4*sizeof(MatScalar));
1004: Kernel_A_gets_inverse_A_2(diag);
1005:
1006: jmin = bi[k]; jmax = bi[k+1];
1007: if (jmin < jmax) {
1008: for (j=jmin; j<jmax; j++){
1009: vj = bj[j]; /* block col. index of U */
1010: u = ba + j*4;
1011: rtmp_ptr = rtmp + vj*4;
1012: for (k1=0; k1<4; k1++){
1013: *u++ = *rtmp_ptr;
1014: *rtmp_ptr++ = 0.0;
1015: }
1016: }
1017:
1018: /* ... add k to row list for first nonzero entry in k-th row */
1019: il[k] = jmin;
1020: i = bj[jmin];
1021: jl[k] = jl[i]; jl[i] = k;
1022: }
1023: }
1025: PetscFree(rtmp);
1026: PetscFree(il);
1027: PetscFree(dk);
1029: C->factor = FACTOR_CHOLESKY;
1030: C->assembled = PETSC_TRUE;
1031: C->preallocated = PETSC_TRUE;
1032: PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
1033: return(0);
1034: }
1036: /*
1037: Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
1038: Version for blocks are 1 by 1.
1039: */
1042: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat A,Mat *B)
1043: {
1044: Mat C = *B;
1045: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
1046: IS ip = b->row;
1048: PetscInt *rip,i,j,mbs = a->mbs,*bi = b->i,*bj = b->j;
1049: PetscInt *ai,*aj,*r;
1050: PetscInt k,jmin,jmax,*jl,*il,vj,nexti,ili;
1051: MatScalar *rtmp;
1052: MatScalar *ba = b->a,*aa,ak;
1053: MatScalar dk,uikdi;
1056: ISGetIndices(ip,&rip);
1057: if (!a->permute){
1058: ai = a->i; aj = a->j; aa = a->a;
1059: } else {
1060: ai = a->inew; aj = a->jnew;
1061: PetscMalloc(ai[mbs]*sizeof(MatScalar),&aa);
1062: PetscMemcpy(aa,a->a,ai[mbs]*sizeof(MatScalar));
1063: PetscMalloc(ai[mbs]*sizeof(PetscInt),&r);
1064: ierr= PetscMemcpy(r,a->a2anew,(ai[mbs])*sizeof(PetscInt));
1066: jmin = ai[0]; jmax = ai[mbs];
1067: for (j=jmin; j<jmax; j++){
1068: while (r[j] != j){
1069: k = r[j]; r[j] = r[k]; r[k] = k;
1070: ak = aa[k]; aa[k] = aa[j]; aa[j] = ak;
1071: }
1072: }
1073: PetscFree(r);
1074: }
1075:
1076: /* initialization */
1077: /* il and jl record the first nonzero element in each row of the accessing
1078: window U(0:k, k:mbs-1).
1079: jl: list of rows to be added to uneliminated rows
1080: i>= k: jl(i) is the first row to be added to row i
1081: i< k: jl(i) is the row following row i in some list of rows
1082: jl(i) = mbs indicates the end of a list
1083: il(i): points to the first nonzero element in columns k,...,mbs-1 of
1084: row i of U */
1085: PetscMalloc(mbs*sizeof(MatScalar),&rtmp);
1086: PetscMalloc(2*mbs*sizeof(PetscInt),&il);
1087: jl = il + mbs;
1088: for (i=0; i<mbs; i++) {
1089: rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1090: }
1092: /* for each row k */
1093: for (k = 0; k<mbs; k++){
1095: /*initialize k-th row with elements nonzero in row perm(k) of A */
1096: jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1097:
1098: for (j = jmin; j < jmax; j++){
1099: vj = rip[aj[j]];
1100: rtmp[vj] = aa[j];
1101: }
1103: /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1104: dk = rtmp[k];
1105: i = jl[k]; /* first row to be added to k_th row */
1107: while (i < k){
1108: nexti = jl[i]; /* next row to be added to k_th row */
1110: /* compute multiplier, update D(k) and U(i,k) */
1111: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1112: uikdi = - ba[ili]*ba[i];
1113: dk += uikdi*ba[ili];
1114: ba[ili] = uikdi; /* -U(i,k) */
1116: /* add multiple of row i to k-th row ... */
1117: jmin = ili + 1; jmax = bi[i+1];
1118: if (jmin < jmax){
1119: for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1120: /* ... add i to row list for next nonzero entry */
1121: il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
1122: j = bj[jmin];
1123: jl[i] = jl[j]; jl[j] = i; /* update jl */
1124: }
1125: i = nexti;
1126: }
1128: /* check for zero pivot and save diagoanl element */
1129: if (dk == 0.0){
1130: SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot");
1131: /*
1132: } else if (PetscRealPart(dk) < 0.0){
1133: SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Negative pivot: d[%d] = %g\n",k,dk);
1134: */
1135: }
1137: /* save nonzero entries in k-th row of U ... */
1138: ba[k] = 1.0/dk;
1139: jmin = bi[k]; jmax = bi[k+1];
1140: if (jmin < jmax) {
1141: for (j=jmin; j<jmax; j++){
1142: vj = bj[j]; ba[j] = rtmp[vj]; rtmp[vj] = 0.0;
1143: }
1144: /* ... add k to row list for first nonzero entry in k-th row */
1145: il[k] = jmin;
1146: i = bj[jmin];
1147: jl[k] = jl[i]; jl[i] = k;
1148: }
1149: }
1150:
1151: PetscFree(rtmp);
1152: PetscFree(il);
1153: if (a->permute){
1154: PetscFree(aa);
1155: }
1157: ISRestoreIndices(ip,&rip);
1158: C->factor = FACTOR_CHOLESKY;
1159: C->assembled = PETSC_TRUE;
1160: C->preallocated = PETSC_TRUE;
1161: PetscLogFlops(b->mbs);
1162: return(0);
1163: }
1165: /*
1166: Version for when blocks are 1 by 1 Using natural ordering
1167: */
1170: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat A,Mat *B)
1171: {
1172: Mat C = *B;
1173: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data,*b=(Mat_SeqSBAIJ *)C->data;
1175: PetscInt i,j,mbs = a->mbs;
1176: PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
1177: PetscInt k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz,ndamp = 0;
1178: MatScalar *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval;
1179: PetscReal damping=b->factor_damping, zeropivot=b->factor_zeropivot,shift_amount;
1180: PetscTruth damp,chshift;
1181: PetscInt nshift=0;
1184: /* initialization */
1185: /* il and jl record the first nonzero element in each row of the accessing
1186: window U(0:k, k:mbs-1).
1187: jl: list of rows to be added to uneliminated rows
1188: i>= k: jl(i) is the first row to be added to row i
1189: i< k: jl(i) is the row following row i in some list of rows
1190: jl(i) = mbs indicates the end of a list
1191: il(i): points to the first nonzero element in U(i,k:mbs-1)
1192: */
1193: PetscMalloc(mbs*sizeof(MatScalar),&rtmp);
1194: PetscMalloc(2*mbs*sizeof(PetscInt),&il);
1195: jl = il + mbs;
1197: shift_amount = 0;
1198: do {
1199: damp = PETSC_FALSE;
1200: chshift = PETSC_FALSE;
1201: for (i=0; i<mbs; i++) {
1202: rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1203: }
1205: for (k = 0; k<mbs; k++){ /* row k */
1206: /*initialize k-th row with elements nonzero in row perm(k) of A */
1207: nz = ai[k+1] - ai[k];
1208: acol = aj + ai[k];
1209: aval = aa + ai[k];
1210: bval = ba + bi[k];
1211: while (nz -- ){
1212: rtmp[*acol++] = *aval++;
1213: *bval++ = 0.0; /* for in-place factorization */
1214: }
1215: /* damp the diagonal of the matrix */
1216: if (ndamp||nshift) rtmp[k] += damping+shift_amount;
1217:
1218: /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1219: dk = rtmp[k];
1220: i = jl[k]; /* first row to be added to k_th row */
1222: while (i < k){
1223: nexti = jl[i]; /* next row to be added to k_th row */
1224:
1225: /* compute multiplier, update D(k) and U(i,k) */
1226: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1227: uikdi = - ba[ili]*ba[bi[i]];
1228: dk += uikdi*ba[ili];
1229: ba[ili] = uikdi; /* -U(i,k) */
1231: /* add multiple of row i to k-th row ... */
1232: jmin = ili + 1;
1233: nz = bi[i+1] - jmin;
1234: if (nz > 0){
1235: bcol = bj + jmin;
1236: bval = ba + jmin;
1237: while (nz --) rtmp[*bcol++] += uikdi*(*bval++);
1238: /* ... add i to row list for next nonzero entry */
1239: il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
1240: j = bj[jmin];
1241: jl[i] = jl[j]; jl[j] = i; /* update jl */
1242: }
1243: i = nexti;
1244: }
1246: if (PetscRealPart(dk) < zeropivot && b->factor_shift){
1247: /* calculate a shift that would make this row diagonally dominant */
1248: PetscReal rs = PetscAbs(PetscRealPart(dk));
1249: jmin = bi[k]+1;
1250: nz = bi[k+1] - jmin;
1251: if (nz){
1252: bcol = bj + jmin;
1253: bval = ba + jmin;
1254: while (nz--){
1255: rs += PetscAbsScalar(rtmp[*bcol++]);
1256: }
1257: }
1258: /* if this shift is less than the previous, just up the previous
1259: one by a bit */
1260: shift_amount = PetscMax(rs,1.1*shift_amount);
1261: chshift = PETSC_TRUE;
1262: /* Unlike in the ILU case there is no exit condition on nshift:
1263: we increase the shift until it converges. There is no guarantee that
1264: this algorithm converges faster or slower, or is better or worse
1265: than the ILU algorithm. */
1266: nshift++;
1267: break;
1268: }
1269: if (PetscRealPart(dk) < zeropivot){
1270: if (damping == (PetscReal) PETSC_DECIDE) damping = -PetscRealPart(dk)/(k+1);
1271: if (damping > 0.0) {
1272: if (ndamp) damping *= 2.0;
1273: damp = PETSC_TRUE;
1274: ndamp++;
1275: break;
1276: } else if (PetscAbsScalar(dk) < zeropivot){
1277: SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g",k,PetscRealPart(dk),zeropivot);
1278: } else {
1279: PetscLogInfo((PetscObject)A,"Negative pivot %g in row %D of Cholesky factorization\n",PetscRealPart(dk),k);
1280: }
1281: }
1282:
1283: /* save nonzero entries in k-th row of U ... */
1284: /* printf("%D, dk: %g, 1/dk: %g\n",k,dk,1/dk); */
1285: ba[bi[k]] = 1.0/dk;
1286: jmin = bi[k]+1;
1287: nz = bi[k+1] - jmin;
1288: if (nz){
1289: bcol = bj + jmin;
1290: bval = ba + jmin;
1291: while (nz--){
1292: *bval++ = rtmp[*bcol];
1293: rtmp[*bcol++] = 0.0;
1294: }
1295: /* ... add k to row list for first nonzero entry in k-th row */
1296: il[k] = jmin;
1297: i = bj[jmin];
1298: jl[k] = jl[i]; jl[i] = k;
1299: }
1300: } /* end of for (k = 0; k<mbs; k++) */
1301: } while (damp||chshift);
1302: PetscFree(rtmp);
1303: PetscFree(il);
1304:
1305: C->factor = FACTOR_CHOLESKY;
1306: C->assembled = PETSC_TRUE;
1307: C->preallocated = PETSC_TRUE;
1308: PetscLogFlops(b->mbs);
1309: if (ndamp) {
1310: PetscLogInfo(0,"MatCholeskyFactorNumerical_SeqSBAIJ_1_NaturalOrdering: number of damping tries %D damping value %g\n",ndamp,damping);
1311: }
1312: if (nshift) {
1313: PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering diagonal shifted %D shifts\n",nshift);
1314: }
1315:
1316: return(0);
1317: }
1321: PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info)
1322: {
1324: Mat C;
1327: MatCholeskyFactorSymbolic(A,perm,info,&C);
1328: MatCholeskyFactorNumeric(A,&C);
1329: MatHeaderCopy(A,C);
1330: return(0);
1331: }