Actual source code: sbaijfact.c
1: #include <../src/mat/impls/baij/seq/baij.h>
2: #include <../src/mat/impls/sbaij/seq/sbaij.h>
3: #include <petsc/private/kernels/blockinvert.h>
4: #include <petscis.h>
6: PetscErrorCode MatGetInertia_SeqSBAIJ(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
7: {
8: Mat_SeqSBAIJ *fact = (Mat_SeqSBAIJ *)F->data;
9: MatScalar *dd = fact->a;
10: PetscInt mbs = fact->mbs, bs = F->rmap->bs, i, nneg_tmp, npos_tmp, *fi = fact->diag;
12: PetscFunctionBegin;
13: PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for bs: %" PetscInt_FMT " >1 yet", bs);
15: nneg_tmp = 0;
16: npos_tmp = 0;
17: if (fi) {
18: for (i = 0; i < mbs; i++) {
19: if (PetscRealPart(dd[*fi]) > 0.0) npos_tmp++;
20: else if (PetscRealPart(dd[*fi]) < 0.0) nneg_tmp++;
21: fi++;
22: }
23: } else {
24: for (i = 0; i < mbs; i++) {
25: if (PetscRealPart(dd[fact->i[i]]) > 0.0) npos_tmp++;
26: else if (PetscRealPart(dd[fact->i[i]]) < 0.0) nneg_tmp++;
27: }
28: }
29: if (nneg) *nneg = nneg_tmp;
30: if (npos) *npos = npos_tmp;
31: if (nzero) *nzero = mbs - nneg_tmp - npos_tmp;
32: PetscFunctionReturn(PETSC_SUCCESS);
33: }
35: /*
36: Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP.
37: Use Modified Sparse Row (MSR) storage for u and ju. See page 85, "Iterative Methods ..." by Saad.
38: */
39: static PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(Mat F, Mat A, IS perm, const MatFactorInfo *info)
40: {
41: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b;
42: const PetscInt *rip, *ai, *aj;
43: PetscInt i, mbs = a->mbs, *jutmp, bs = A->rmap->bs, bs2 = a->bs2;
44: PetscInt m, reallocs = 0, prow;
45: PetscInt *jl, *q, jmin, jmax, juidx, nzk, qm, *iu, *ju, k, j, vj, umax, maxadd;
46: PetscReal f = info->fill;
47: PetscBool perm_identity;
49: PetscFunctionBegin;
50: /* check whether perm is the identity mapping */
51: PetscCall(ISIdentity(perm, &perm_identity));
52: PetscCall(ISGetIndices(perm, &rip));
54: if (perm_identity) { /* without permutation */
55: a->permute = PETSC_FALSE;
57: ai = a->i;
58: aj = a->j;
59: } else { /* non-trivial permutation */
60: a->permute = PETSC_TRUE;
62: PetscCall(MatReorderingSeqSBAIJ(A, perm));
64: ai = a->inew;
65: aj = a->jnew;
66: }
68: /* initialization */
69: PetscCall(PetscMalloc1(mbs + 1, &iu));
70: umax = (PetscInt)(f * ai[mbs] + 1);
71: umax += mbs + 1;
72: PetscCall(PetscMalloc1(umax, &ju));
73: iu[0] = mbs + 1;
74: juidx = mbs + 1; /* index for ju */
75: /* jl linked list for pivot row -- linked list for col index */
76: PetscCall(PetscMalloc2(mbs, &jl, mbs, &q));
77: for (i = 0; i < mbs; i++) {
78: jl[i] = mbs;
79: q[i] = 0;
80: }
82: /* for each row k */
83: for (k = 0; k < mbs; k++) {
84: for (i = 0; i < mbs; i++) q[i] = 0; /* to be removed! */
85: nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */
86: q[k] = mbs;
87: /* initialize nonzero structure of k-th row to row rip[k] of A */
88: jmin = ai[rip[k]] + 1; /* exclude diag[k] */
89: jmax = ai[rip[k] + 1];
90: for (j = jmin; j < jmax; j++) {
91: vj = rip[aj[j]]; /* col. value */
92: if (vj > k) {
93: qm = k;
94: do {
95: m = qm;
96: qm = q[m];
97: } while (qm < vj);
98: PetscCheck(qm != vj, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Duplicate entry in A");
99: nzk++;
100: q[m] = vj;
101: q[vj] = qm;
102: } /* if (vj > k) */
103: } /* for (j=jmin; j<jmax; j++) */
105: /* modify nonzero structure of k-th row by computing fill-in
106: for each row i to be merged in */
107: prow = k;
108: prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */
110: while (prow < k) {
111: /* merge row prow into k-th row */
112: jmin = iu[prow] + 1;
113: jmax = iu[prow + 1];
114: qm = k;
115: for (j = jmin; j < jmax; j++) {
116: vj = ju[j];
117: do {
118: m = qm;
119: qm = q[m];
120: } while (qm < vj);
121: if (qm != vj) {
122: nzk++;
123: q[m] = vj;
124: q[vj] = qm;
125: qm = vj;
126: }
127: }
128: prow = jl[prow]; /* next pivot row */
129: }
131: /* add k to row list for first nonzero element in k-th row */
132: if (nzk > 0) {
133: i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */
134: jl[k] = jl[i];
135: jl[i] = k;
136: }
137: iu[k + 1] = iu[k] + nzk;
139: /* allocate more space to ju if needed */
140: if (iu[k + 1] > umax) {
141: /* estimate how much additional space we will need */
142: /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
143: /* just double the memory each time */
144: maxadd = umax;
145: if (maxadd < nzk) maxadd = (mbs - k) * (nzk + 1) / 2;
146: umax += maxadd;
148: /* allocate a longer ju */
149: PetscCall(PetscMalloc1(umax, &jutmp));
150: PetscCall(PetscArraycpy(jutmp, ju, iu[k]));
151: PetscCall(PetscFree(ju));
152: ju = jutmp;
153: reallocs++; /* count how many times we realloc */
154: }
156: /* save nonzero structure of k-th row in ju */
157: i = k;
158: while (nzk--) {
159: i = q[i];
160: ju[juidx++] = i;
161: }
162: }
164: #if defined(PETSC_USE_INFO)
165: if (ai[mbs] != 0) {
166: PetscReal af = ((PetscReal)iu[mbs]) / ((PetscReal)ai[mbs]);
167: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
168: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
169: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
170: PetscCall(PetscInfo(A, "for best performance.\n"));
171: } else {
172: PetscCall(PetscInfo(A, "Empty matrix\n"));
173: }
174: #endif
176: PetscCall(ISRestoreIndices(perm, &rip));
177: PetscCall(PetscFree2(jl, q));
179: /* put together the new matrix */
180: PetscCall(MatSeqSBAIJSetPreallocation(F, bs, MAT_SKIP_ALLOCATION, NULL));
182: b = (Mat_SeqSBAIJ *)(F)->data;
183: b->singlemalloc = PETSC_FALSE;
184: b->free_a = PETSC_TRUE;
185: b->free_ij = PETSC_TRUE;
187: PetscCall(PetscMalloc1((iu[mbs] + 1) * bs2, &b->a));
188: b->j = ju;
189: b->i = iu;
190: b->diag = NULL;
191: b->ilen = NULL;
192: b->imax = NULL;
193: b->row = perm;
195: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
197: PetscCall(PetscObjectReference((PetscObject)perm));
199: b->icol = perm;
200: PetscCall(PetscObjectReference((PetscObject)perm));
201: PetscCall(PetscMalloc1(bs * mbs + bs, &b->solve_work));
202: /* In b structure: Free imax, ilen, old a, old j.
203: Allocate idnew, solve_work, new a, new j */
204: b->maxnz = b->nz = iu[mbs];
206: (F)->info.factor_mallocs = reallocs;
207: (F)->info.fill_ratio_given = f;
208: if (ai[mbs] != 0) {
209: (F)->info.fill_ratio_needed = ((PetscReal)iu[mbs]) / ((PetscReal)ai[mbs]);
210: } else {
211: (F)->info.fill_ratio_needed = 0.0;
212: }
213: PetscCall(MatSeqSBAIJSetNumericFactorization_inplace(F, perm_identity));
214: PetscFunctionReturn(PETSC_SUCCESS);
215: }
216: /*
217: Symbolic U^T*D*U factorization for SBAIJ format.
218: See MatICCFactorSymbolic_SeqAIJ() for description of its data structure.
219: */
220: #include <petscbt.h>
221: #include <../src/mat/utils/freespace.h>
222: PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
223: {
224: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data;
225: Mat_SeqSBAIJ *b;
226: PetscBool perm_identity, missing;
227: PetscReal fill = info->fill;
228: const PetscInt *rip, *ai = a->i, *aj = a->j;
229: PetscInt i, mbs = a->mbs, bs = A->rmap->bs, reallocs = 0, prow;
230: PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
231: PetscInt nlnk, *lnk, ncols, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
232: PetscFreeSpaceList free_space = NULL, current_space = NULL;
233: PetscBT lnkbt;
235: PetscFunctionBegin;
236: PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
237: PetscCall(MatMissingDiagonal(A, &missing, &i));
238: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
239: if (bs > 1) {
240: PetscCall(MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(fact, A, perm, info));
241: PetscFunctionReturn(PETSC_SUCCESS);
242: }
244: /* check whether perm is the identity mapping */
245: PetscCall(ISIdentity(perm, &perm_identity));
246: PetscCheck(perm_identity, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix reordering is not supported for sbaij matrix. Use aij format");
247: a->permute = PETSC_FALSE;
248: PetscCall(ISGetIndices(perm, &rip));
250: /* initialization */
251: PetscCall(PetscMalloc1(mbs + 1, &ui));
252: PetscCall(PetscMalloc1(mbs + 1, &udiag));
253: ui[0] = 0;
255: /* jl: linked list for storing indices of the pivot rows
256: il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
257: PetscCall(PetscMalloc4(mbs, &ui_ptr, mbs, &il, mbs, &jl, mbs, &cols));
258: for (i = 0; i < mbs; i++) {
259: jl[i] = mbs;
260: il[i] = 0;
261: }
263: /* create and initialize a linked list for storing column indices of the active row k */
264: nlnk = mbs + 1;
265: PetscCall(PetscLLCreate(mbs, mbs, nlnk, lnk, lnkbt));
267: /* initial FreeSpace size is fill*(ai[mbs]+1) */
268: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[mbs] + 1), &free_space));
269: current_space = free_space;
271: for (k = 0; k < mbs; k++) { /* for each active row k */
272: /* initialize lnk by the column indices of row rip[k] of A */
273: nzk = 0;
274: ncols = ai[k + 1] - ai[k];
275: PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row %" PetscInt_FMT " in matrix ", k);
276: for (j = 0; j < ncols; j++) {
277: i = *(aj + ai[k] + j);
278: cols[j] = i;
279: }
280: PetscCall(PetscLLAdd(ncols, cols, mbs, &nlnk, lnk, lnkbt));
281: nzk += nlnk;
283: /* update lnk by computing fill-in for each pivot row to be merged in */
284: prow = jl[k]; /* 1st pivot row */
286: while (prow < k) {
287: nextprow = jl[prow];
288: /* merge prow into k-th row */
289: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
290: jmax = ui[prow + 1];
291: ncols = jmax - jmin;
292: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
293: PetscCall(PetscLLAddSorted(ncols, uj_ptr, mbs, &nlnk, lnk, lnkbt));
294: nzk += nlnk;
296: /* update il and jl for prow */
297: if (jmin < jmax) {
298: il[prow] = jmin;
299: j = *uj_ptr;
300: jl[prow] = jl[j];
301: jl[j] = prow;
302: }
303: prow = nextprow;
304: }
306: /* if free space is not available, make more free space */
307: if (current_space->local_remaining < nzk) {
308: i = mbs - k + 1; /* num of unfactored rows */
309: i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
310: PetscCall(PetscFreeSpaceGet(i, ¤t_space));
311: reallocs++;
312: }
314: /* copy data into free space, then initialize lnk */
315: PetscCall(PetscLLClean(mbs, mbs, nzk, lnk, current_space->array, lnkbt));
317: /* add the k-th row into il and jl */
318: if (nzk > 1) {
319: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
320: jl[k] = jl[i];
321: jl[i] = k;
322: il[k] = ui[k] + 1;
323: }
324: ui_ptr[k] = current_space->array;
326: current_space->array += nzk;
327: current_space->local_used += nzk;
328: current_space->local_remaining -= nzk;
330: ui[k + 1] = ui[k] + nzk;
331: }
333: PetscCall(ISRestoreIndices(perm, &rip));
334: PetscCall(PetscFree4(ui_ptr, il, jl, cols));
336: /* destroy list of free space and other temporary array(s) */
337: PetscCall(PetscMalloc1(ui[mbs] + 1, &uj));
338: PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, mbs, ui, udiag)); /* store matrix factor */
339: PetscCall(PetscLLDestroy(lnk, lnkbt));
341: /* put together the new matrix in MATSEQSBAIJ format */
342: PetscCall(MatSeqSBAIJSetPreallocation(fact, bs, MAT_SKIP_ALLOCATION, NULL));
344: b = (Mat_SeqSBAIJ *)fact->data;
345: b->singlemalloc = PETSC_FALSE;
346: b->free_a = PETSC_TRUE;
347: b->free_ij = PETSC_TRUE;
349: PetscCall(PetscMalloc1(ui[mbs] + 1, &b->a));
351: b->j = uj;
352: b->i = ui;
353: b->diag = udiag;
354: b->free_diag = PETSC_TRUE;
355: b->ilen = NULL;
356: b->imax = NULL;
357: b->row = perm;
358: b->icol = perm;
360: PetscCall(PetscObjectReference((PetscObject)perm));
361: PetscCall(PetscObjectReference((PetscObject)perm));
363: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
365: PetscCall(PetscMalloc1(mbs + 1, &b->solve_work));
367: b->maxnz = b->nz = ui[mbs];
369: fact->info.factor_mallocs = reallocs;
370: fact->info.fill_ratio_given = fill;
371: if (ai[mbs] != 0) {
372: fact->info.fill_ratio_needed = ((PetscReal)ui[mbs]) / ai[mbs];
373: } else {
374: fact->info.fill_ratio_needed = 0.0;
375: }
376: #if defined(PETSC_USE_INFO)
377: if (ai[mbs] != 0) {
378: PetscReal af = fact->info.fill_ratio_needed;
379: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
380: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
381: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
382: } else {
383: PetscCall(PetscInfo(A, "Empty matrix\n"));
384: }
385: #endif
386: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
387: PetscFunctionReturn(PETSC_SUCCESS);
388: }
390: PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
391: {
392: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data;
393: Mat_SeqSBAIJ *b;
394: PetscBool perm_identity, missing;
395: PetscReal fill = info->fill;
396: const PetscInt *rip, *ai, *aj;
397: PetscInt i, mbs = a->mbs, bs = A->rmap->bs, reallocs = 0, prow, d;
398: PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
399: PetscInt nlnk, *lnk, ncols, *cols, *uj, **ui_ptr, *uj_ptr;
400: PetscFreeSpaceList free_space = NULL, current_space = NULL;
401: PetscBT lnkbt;
403: PetscFunctionBegin;
404: PetscCall(MatMissingDiagonal(A, &missing, &d));
405: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
407: /*
408: This code originally uses Modified Sparse Row (MSR) storage
409: (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choice!
410: Then it is rewritten so the factor B takes seqsbaij format. However the associated
411: MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1 or !perm_identity,
412: thus the original code in MSR format is still used for these cases.
413: The code below should replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR() whenever
414: MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor.
415: */
416: if (bs > 1) {
417: PetscCall(MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(fact, A, perm, info));
418: PetscFunctionReturn(PETSC_SUCCESS);
419: }
421: /* check whether perm is the identity mapping */
422: PetscCall(ISIdentity(perm, &perm_identity));
423: PetscCheck(perm_identity, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix reordering is not supported for sbaij matrix. Use aij format");
424: a->permute = PETSC_FALSE;
425: ai = a->i;
426: aj = a->j;
427: PetscCall(ISGetIndices(perm, &rip));
429: /* initialization */
430: PetscCall(PetscMalloc1(mbs + 1, &ui));
431: ui[0] = 0;
433: /* jl: linked list for storing indices of the pivot rows
434: il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
435: PetscCall(PetscMalloc4(mbs, &ui_ptr, mbs, &il, mbs, &jl, mbs, &cols));
436: for (i = 0; i < mbs; i++) {
437: jl[i] = mbs;
438: il[i] = 0;
439: }
441: /* create and initialize a linked list for storing column indices of the active row k */
442: nlnk = mbs + 1;
443: PetscCall(PetscLLCreate(mbs, mbs, nlnk, lnk, lnkbt));
445: /* initial FreeSpace size is fill*(ai[mbs]+1) */
446: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[mbs] + 1), &free_space));
447: current_space = free_space;
449: for (k = 0; k < mbs; k++) { /* for each active row k */
450: /* initialize lnk by the column indices of row rip[k] of A */
451: nzk = 0;
452: ncols = ai[rip[k] + 1] - ai[rip[k]];
453: for (j = 0; j < ncols; j++) {
454: i = *(aj + ai[rip[k]] + j);
455: cols[j] = rip[i];
456: }
457: PetscCall(PetscLLAdd(ncols, cols, mbs, &nlnk, lnk, lnkbt));
458: nzk += nlnk;
460: /* update lnk by computing fill-in for each pivot row to be merged in */
461: prow = jl[k]; /* 1st pivot row */
463: while (prow < k) {
464: nextprow = jl[prow];
465: /* merge prow into k-th row */
466: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
467: jmax = ui[prow + 1];
468: ncols = jmax - jmin;
469: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
470: PetscCall(PetscLLAddSorted(ncols, uj_ptr, mbs, &nlnk, lnk, lnkbt));
471: nzk += nlnk;
473: /* update il and jl for prow */
474: if (jmin < jmax) {
475: il[prow] = jmin;
477: j = *uj_ptr;
478: jl[prow] = jl[j];
479: jl[j] = prow;
480: }
481: prow = nextprow;
482: }
484: /* if free space is not available, make more free space */
485: if (current_space->local_remaining < nzk) {
486: i = mbs - k + 1; /* num of unfactored rows */
487: i = PetscMin(PetscIntMultTruncate(i, nzk), PetscIntMultTruncate(i, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
488: PetscCall(PetscFreeSpaceGet(i, ¤t_space));
489: reallocs++;
490: }
492: /* copy data into free space, then initialize lnk */
493: PetscCall(PetscLLClean(mbs, mbs, nzk, lnk, current_space->array, lnkbt));
495: /* add the k-th row into il and jl */
496: if (nzk - 1 > 0) {
497: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
498: jl[k] = jl[i];
499: jl[i] = k;
500: il[k] = ui[k] + 1;
501: }
502: ui_ptr[k] = current_space->array;
504: current_space->array += nzk;
505: current_space->local_used += nzk;
506: current_space->local_remaining -= nzk;
508: ui[k + 1] = ui[k] + nzk;
509: }
511: PetscCall(ISRestoreIndices(perm, &rip));
512: PetscCall(PetscFree4(ui_ptr, il, jl, cols));
514: /* destroy list of free space and other temporary array(s) */
515: PetscCall(PetscMalloc1(ui[mbs] + 1, &uj));
516: PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
517: PetscCall(PetscLLDestroy(lnk, lnkbt));
519: /* put together the new matrix in MATSEQSBAIJ format */
520: PetscCall(MatSeqSBAIJSetPreallocation(fact, bs, MAT_SKIP_ALLOCATION, NULL));
522: b = (Mat_SeqSBAIJ *)fact->data;
523: b->singlemalloc = PETSC_FALSE;
524: b->free_a = PETSC_TRUE;
525: b->free_ij = PETSC_TRUE;
527: PetscCall(PetscMalloc1(ui[mbs] + 1, &b->a));
529: b->j = uj;
530: b->i = ui;
531: b->diag = NULL;
532: b->ilen = NULL;
533: b->imax = NULL;
534: b->row = perm;
536: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
538: PetscCall(PetscObjectReference((PetscObject)perm));
539: b->icol = perm;
540: PetscCall(PetscObjectReference((PetscObject)perm));
541: PetscCall(PetscMalloc1(mbs + 1, &b->solve_work));
542: b->maxnz = b->nz = ui[mbs];
544: fact->info.factor_mallocs = reallocs;
545: fact->info.fill_ratio_given = fill;
546: if (ai[mbs] != 0) {
547: fact->info.fill_ratio_needed = ((PetscReal)ui[mbs]) / ai[mbs];
548: } else {
549: fact->info.fill_ratio_needed = 0.0;
550: }
551: #if defined(PETSC_USE_INFO)
552: if (ai[mbs] != 0) {
553: PetscReal af = fact->info.fill_ratio_needed;
554: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
555: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
556: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
557: } else {
558: PetscCall(PetscInfo(A, "Empty matrix\n"));
559: }
560: #endif
561: PetscCall(MatSeqSBAIJSetNumericFactorization_inplace(fact, perm_identity));
562: PetscFunctionReturn(PETSC_SUCCESS);
563: }
565: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat C, Mat A, const MatFactorInfo *info)
566: {
567: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
568: IS perm = b->row;
569: const PetscInt *ai, *aj, *perm_ptr, mbs = a->mbs, *bi = b->i, *bj = b->j;
570: PetscInt i, j;
571: PetscInt *a2anew, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
572: PetscInt bs = A->rmap->bs, bs2 = a->bs2;
573: MatScalar *ba = b->a, *aa, *ap, *dk, *uik;
574: MatScalar *u, *diag, *rtmp, *rtmp_ptr;
575: MatScalar *work;
576: PetscInt *pivots;
577: PetscBool allowzeropivot, zeropivotdetected;
579: PetscFunctionBegin;
580: /* initialization */
581: PetscCall(PetscCalloc1(bs2 * mbs, &rtmp));
582: PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
583: allowzeropivot = PetscNot(A->erroriffailure);
585: il[0] = 0;
586: for (i = 0; i < mbs; i++) jl[i] = mbs;
588: PetscCall(PetscMalloc3(bs2, &dk, bs2, &uik, bs, &work));
589: PetscCall(PetscMalloc1(bs, &pivots));
591: PetscCall(ISGetIndices(perm, &perm_ptr));
593: /* check permutation */
594: if (!a->permute) {
595: ai = a->i;
596: aj = a->j;
597: aa = a->a;
598: } else {
599: ai = a->inew;
600: aj = a->jnew;
601: PetscCall(PetscMalloc1(bs2 * ai[mbs], &aa));
602: PetscCall(PetscArraycpy(aa, a->a, bs2 * ai[mbs]));
603: PetscCall(PetscMalloc1(ai[mbs], &a2anew));
604: PetscCall(PetscArraycpy(a2anew, a->a2anew, ai[mbs]));
606: for (i = 0; i < mbs; i++) {
607: jmin = ai[i];
608: jmax = ai[i + 1];
609: for (j = jmin; j < jmax; j++) {
610: while (a2anew[j] != j) {
611: k = a2anew[j];
612: a2anew[j] = a2anew[k];
613: a2anew[k] = k;
614: for (k1 = 0; k1 < bs2; k1++) {
615: dk[k1] = aa[k * bs2 + k1];
616: aa[k * bs2 + k1] = aa[j * bs2 + k1];
617: aa[j * bs2 + k1] = dk[k1];
618: }
619: }
620: /* transform column-oriented blocks that lie in the lower triangle to row-oriented blocks */
621: if (i > aj[j]) {
622: ap = aa + j * bs2; /* ptr to the beginning of j-th block of aa */
623: for (k = 0; k < bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */
624: for (k = 0; k < bs; k++) { /* j-th block of aa <- dk^T */
625: for (k1 = 0; k1 < bs; k1++) *ap++ = dk[k + bs * k1];
626: }
627: }
628: }
629: }
630: PetscCall(PetscFree(a2anew));
631: }
633: /* for each row k */
634: for (k = 0; k < mbs; k++) {
635: /*initialize k-th row with elements nonzero in row perm(k) of A */
636: jmin = ai[perm_ptr[k]];
637: jmax = ai[perm_ptr[k] + 1];
639: ap = aa + jmin * bs2;
640: for (j = jmin; j < jmax; j++) {
641: vj = perm_ptr[aj[j]]; /* block col. index */
642: rtmp_ptr = rtmp + vj * bs2;
643: for (i = 0; i < bs2; i++) *rtmp_ptr++ = *ap++;
644: }
646: /* modify k-th row by adding in those rows i with U(i,k) != 0 */
647: PetscCall(PetscArraycpy(dk, rtmp + k * bs2, bs2));
648: i = jl[k]; /* first row to be added to k_th row */
650: while (i < k) {
651: nexti = jl[i]; /* next row to be added to k_th row */
653: /* compute multiplier */
654: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
656: /* uik = -inv(Di)*U_bar(i,k) */
657: diag = ba + i * bs2;
658: u = ba + ili * bs2;
659: PetscCall(PetscArrayzero(uik, bs2));
660: PetscKernel_A_gets_A_minus_B_times_C(bs, uik, diag, u);
662: /* update D(k) += -U(i,k)^T * U_bar(i,k) */
663: PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, dk, uik, u);
664: PetscCall(PetscLogFlops(4.0 * bs * bs2));
666: /* update -U(i,k) */
667: PetscCall(PetscArraycpy(ba + ili * bs2, uik, bs2));
669: /* add multiple of row i to k-th row ... */
670: jmin = ili + 1;
671: jmax = bi[i + 1];
672: if (jmin < jmax) {
673: for (j = jmin; j < jmax; j++) {
674: /* rtmp += -U(i,k)^T * U_bar(i,j) */
675: rtmp_ptr = rtmp + bj[j] * bs2;
676: u = ba + j * bs2;
677: PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, rtmp_ptr, uik, u);
678: }
679: PetscCall(PetscLogFlops(2.0 * bs * bs2 * (jmax - jmin)));
681: /* ... add i to row list for next nonzero entry */
682: il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
683: j = bj[jmin];
684: jl[i] = jl[j];
685: jl[j] = i; /* update jl */
686: }
687: i = nexti;
688: }
690: /* save nonzero entries in k-th row of U ... */
692: /* invert diagonal block */
693: diag = ba + k * bs2;
694: PetscCall(PetscArraycpy(diag, dk, bs2));
696: PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, pivots, work, allowzeropivot, &zeropivotdetected));
697: if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
699: jmin = bi[k];
700: jmax = bi[k + 1];
701: if (jmin < jmax) {
702: for (j = jmin; j < jmax; j++) {
703: vj = bj[j]; /* block col. index of U */
704: u = ba + j * bs2;
705: rtmp_ptr = rtmp + vj * bs2;
706: for (k1 = 0; k1 < bs2; k1++) {
707: *u++ = *rtmp_ptr;
708: *rtmp_ptr++ = 0.0;
709: }
710: }
712: /* ... add k to row list for first nonzero entry in k-th row */
713: il[k] = jmin;
714: i = bj[jmin];
715: jl[k] = jl[i];
716: jl[i] = k;
717: }
718: }
720: PetscCall(PetscFree(rtmp));
721: PetscCall(PetscFree2(il, jl));
722: PetscCall(PetscFree3(dk, uik, work));
723: PetscCall(PetscFree(pivots));
724: if (a->permute) PetscCall(PetscFree(aa));
726: PetscCall(ISRestoreIndices(perm, &perm_ptr));
728: C->ops->solve = MatSolve_SeqSBAIJ_N_inplace;
729: C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_inplace;
730: C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_N_inplace;
731: C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_N_inplace;
733: C->assembled = PETSC_TRUE;
734: C->preallocated = PETSC_TRUE;
736: PetscCall(PetscLogFlops(1.3333 * bs * bs2 * b->mbs)); /* from inverting diagonal blocks */
737: PetscFunctionReturn(PETSC_SUCCESS);
738: }
740: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat C, Mat A, const MatFactorInfo *info)
741: {
742: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
743: PetscInt i, j, mbs = a->mbs, *bi = b->i, *bj = b->j;
744: PetscInt *ai, *aj, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
745: PetscInt bs = A->rmap->bs, bs2 = a->bs2;
746: MatScalar *ba = b->a, *aa, *ap, *dk, *uik;
747: MatScalar *u, *diag, *rtmp, *rtmp_ptr;
748: MatScalar *work;
749: PetscInt *pivots;
750: PetscBool allowzeropivot, zeropivotdetected;
752: PetscFunctionBegin;
753: PetscCall(PetscCalloc1(bs2 * mbs, &rtmp));
754: PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
755: il[0] = 0;
756: for (i = 0; i < mbs; i++) jl[i] = mbs;
758: PetscCall(PetscMalloc3(bs2, &dk, bs2, &uik, bs, &work));
759: PetscCall(PetscMalloc1(bs, &pivots));
760: allowzeropivot = PetscNot(A->erroriffailure);
762: ai = a->i;
763: aj = a->j;
764: aa = a->a;
766: /* for each row k */
767: for (k = 0; k < mbs; k++) {
768: /*initialize k-th row with elements nonzero in row k of A */
769: jmin = ai[k];
770: jmax = ai[k + 1];
771: ap = aa + jmin * bs2;
772: for (j = jmin; j < jmax; j++) {
773: vj = aj[j]; /* block col. index */
774: rtmp_ptr = rtmp + vj * bs2;
775: for (i = 0; i < bs2; i++) *rtmp_ptr++ = *ap++;
776: }
778: /* modify k-th row by adding in those rows i with U(i,k) != 0 */
779: PetscCall(PetscArraycpy(dk, rtmp + k * bs2, bs2));
780: i = jl[k]; /* first row to be added to k_th row */
782: while (i < k) {
783: nexti = jl[i]; /* next row to be added to k_th row */
785: /* compute multiplier */
786: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
788: /* uik = -inv(Di)*U_bar(i,k) */
789: diag = ba + i * bs2;
790: u = ba + ili * bs2;
791: PetscCall(PetscArrayzero(uik, bs2));
792: PetscKernel_A_gets_A_minus_B_times_C(bs, uik, diag, u);
794: /* update D(k) += -U(i,k)^T * U_bar(i,k) */
795: PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, dk, uik, u);
796: PetscCall(PetscLogFlops(2.0 * bs * bs2));
798: /* update -U(i,k) */
799: PetscCall(PetscArraycpy(ba + ili * bs2, uik, bs2));
801: /* add multiple of row i to k-th row ... */
802: jmin = ili + 1;
803: jmax = bi[i + 1];
804: if (jmin < jmax) {
805: for (j = jmin; j < jmax; j++) {
806: /* rtmp += -U(i,k)^T * U_bar(i,j) */
807: rtmp_ptr = rtmp + bj[j] * bs2;
808: u = ba + j * bs2;
809: PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, rtmp_ptr, uik, u);
810: }
811: PetscCall(PetscLogFlops(2.0 * bs * bs2 * (jmax - jmin)));
813: /* ... add i to row list for next nonzero entry */
814: il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
815: j = bj[jmin];
816: jl[i] = jl[j];
817: jl[j] = i; /* update jl */
818: }
819: i = nexti;
820: }
822: /* save nonzero entries in k-th row of U ... */
824: /* invert diagonal block */
825: diag = ba + k * bs2;
826: PetscCall(PetscArraycpy(diag, dk, bs2));
828: PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, pivots, work, allowzeropivot, &zeropivotdetected));
829: if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
831: jmin = bi[k];
832: jmax = bi[k + 1];
833: if (jmin < jmax) {
834: for (j = jmin; j < jmax; j++) {
835: vj = bj[j]; /* block col. index of U */
836: u = ba + j * bs2;
837: rtmp_ptr = rtmp + vj * bs2;
838: for (k1 = 0; k1 < bs2; k1++) {
839: *u++ = *rtmp_ptr;
840: *rtmp_ptr++ = 0.0;
841: }
842: }
844: /* ... add k to row list for first nonzero entry in k-th row */
845: il[k] = jmin;
846: i = bj[jmin];
847: jl[k] = jl[i];
848: jl[i] = k;
849: }
850: }
852: PetscCall(PetscFree(rtmp));
853: PetscCall(PetscFree2(il, jl));
854: PetscCall(PetscFree3(dk, uik, work));
855: PetscCall(PetscFree(pivots));
857: C->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
858: C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
859: C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
860: C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
861: C->assembled = PETSC_TRUE;
862: C->preallocated = PETSC_TRUE;
864: PetscCall(PetscLogFlops(1.3333 * bs * bs2 * b->mbs)); /* from inverting diagonal blocks */
865: PetscFunctionReturn(PETSC_SUCCESS);
866: }
868: /*
869: Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
870: Version for blocks 2 by 2.
871: */
872: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat C, Mat A, const MatFactorInfo *info)
873: {
874: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
875: IS perm = b->row;
876: const PetscInt *ai, *aj, *perm_ptr;
877: PetscInt i, j, mbs = a->mbs, *bi = b->i, *bj = b->j;
878: PetscInt *a2anew, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
879: MatScalar *ba = b->a, *aa, *ap;
880: MatScalar *u, *diag, *rtmp, *rtmp_ptr, dk[4], uik[4];
881: PetscReal shift = info->shiftamount;
882: PetscBool allowzeropivot, zeropivotdetected;
884: PetscFunctionBegin;
885: allowzeropivot = PetscNot(A->erroriffailure);
887: /* initialization */
888: /* il and jl record the first nonzero element in each row of the accessing
889: window U(0:k, k:mbs-1).
890: jl: list of rows to be added to uneliminated rows
891: i>= k: jl(i) is the first row to be added to row i
892: i< k: jl(i) is the row following row i in some list of rows
893: jl(i) = mbs indicates the end of a list
894: il(i): points to the first nonzero element in columns k,...,mbs-1 of
895: row i of U */
896: PetscCall(PetscCalloc1(4 * mbs, &rtmp));
897: PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
898: il[0] = 0;
899: for (i = 0; i < mbs; i++) jl[i] = mbs;
901: PetscCall(ISGetIndices(perm, &perm_ptr));
903: /* check permutation */
904: if (!a->permute) {
905: ai = a->i;
906: aj = a->j;
907: aa = a->a;
908: } else {
909: ai = a->inew;
910: aj = a->jnew;
911: PetscCall(PetscMalloc1(4 * ai[mbs], &aa));
912: PetscCall(PetscArraycpy(aa, a->a, 4 * ai[mbs]));
913: PetscCall(PetscMalloc1(ai[mbs], &a2anew));
914: PetscCall(PetscArraycpy(a2anew, a->a2anew, ai[mbs]));
916: for (i = 0; i < mbs; i++) {
917: jmin = ai[i];
918: jmax = ai[i + 1];
919: for (j = jmin; j < jmax; j++) {
920: while (a2anew[j] != j) {
921: k = a2anew[j];
922: a2anew[j] = a2anew[k];
923: a2anew[k] = k;
924: for (k1 = 0; k1 < 4; k1++) {
925: dk[k1] = aa[k * 4 + k1];
926: aa[k * 4 + k1] = aa[j * 4 + k1];
927: aa[j * 4 + k1] = dk[k1];
928: }
929: }
930: /* transform column-oriented blocks that lie in the lower triangle to row-oriented blocks */
931: if (i > aj[j]) {
932: ap = aa + j * 4; /* ptr to the beginning of the block */
933: dk[1] = ap[1]; /* swap ap[1] and ap[2] */
934: ap[1] = ap[2];
935: ap[2] = dk[1];
936: }
937: }
938: }
939: PetscCall(PetscFree(a2anew));
940: }
942: /* for each row k */
943: for (k = 0; k < mbs; k++) {
944: /*initialize k-th row with elements nonzero in row perm(k) of A */
945: jmin = ai[perm_ptr[k]];
946: jmax = ai[perm_ptr[k] + 1];
947: ap = aa + jmin * 4;
948: for (j = jmin; j < jmax; j++) {
949: vj = perm_ptr[aj[j]]; /* block col. index */
950: rtmp_ptr = rtmp + vj * 4;
951: for (i = 0; i < 4; i++) *rtmp_ptr++ = *ap++;
952: }
954: /* modify k-th row by adding in those rows i with U(i,k) != 0 */
955: PetscCall(PetscArraycpy(dk, rtmp + k * 4, 4));
956: i = jl[k]; /* first row to be added to k_th row */
958: while (i < k) {
959: nexti = jl[i]; /* next row to be added to k_th row */
961: /* compute multiplier */
962: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
964: /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
965: diag = ba + i * 4;
966: u = ba + ili * 4;
967: uik[0] = -(diag[0] * u[0] + diag[2] * u[1]);
968: uik[1] = -(diag[1] * u[0] + diag[3] * u[1]);
969: uik[2] = -(diag[0] * u[2] + diag[2] * u[3]);
970: uik[3] = -(diag[1] * u[2] + diag[3] * u[3]);
972: /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
973: dk[0] += uik[0] * u[0] + uik[1] * u[1];
974: dk[1] += uik[2] * u[0] + uik[3] * u[1];
975: dk[2] += uik[0] * u[2] + uik[1] * u[3];
976: dk[3] += uik[2] * u[2] + uik[3] * u[3];
978: PetscCall(PetscLogFlops(16.0 * 2.0));
980: /* update -U(i,k): ba[ili] = uik */
981: PetscCall(PetscArraycpy(ba + ili * 4, uik, 4));
983: /* add multiple of row i to k-th row ... */
984: jmin = ili + 1;
985: jmax = bi[i + 1];
986: if (jmin < jmax) {
987: for (j = jmin; j < jmax; j++) {
988: /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
989: rtmp_ptr = rtmp + bj[j] * 4;
990: u = ba + j * 4;
991: rtmp_ptr[0] += uik[0] * u[0] + uik[1] * u[1];
992: rtmp_ptr[1] += uik[2] * u[0] + uik[3] * u[1];
993: rtmp_ptr[2] += uik[0] * u[2] + uik[1] * u[3];
994: rtmp_ptr[3] += uik[2] * u[2] + uik[3] * u[3];
995: }
996: PetscCall(PetscLogFlops(16.0 * (jmax - jmin)));
998: /* ... add i to row list for next nonzero entry */
999: il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
1000: j = bj[jmin];
1001: jl[i] = jl[j];
1002: jl[j] = i; /* update jl */
1003: }
1004: i = nexti;
1005: }
1007: /* save nonzero entries in k-th row of U ... */
1009: /* invert diagonal block */
1010: diag = ba + k * 4;
1011: PetscCall(PetscArraycpy(diag, dk, 4));
1012: PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
1013: if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1015: jmin = bi[k];
1016: jmax = bi[k + 1];
1017: if (jmin < jmax) {
1018: for (j = jmin; j < jmax; j++) {
1019: vj = bj[j]; /* block col. index of U */
1020: u = ba + j * 4;
1021: rtmp_ptr = rtmp + vj * 4;
1022: for (k1 = 0; k1 < 4; k1++) {
1023: *u++ = *rtmp_ptr;
1024: *rtmp_ptr++ = 0.0;
1025: }
1026: }
1028: /* ... add k to row list for first nonzero entry in k-th row */
1029: il[k] = jmin;
1030: i = bj[jmin];
1031: jl[k] = jl[i];
1032: jl[i] = k;
1033: }
1034: }
1036: PetscCall(PetscFree(rtmp));
1037: PetscCall(PetscFree2(il, jl));
1038: if (a->permute) PetscCall(PetscFree(aa));
1039: PetscCall(ISRestoreIndices(perm, &perm_ptr));
1041: C->ops->solve = MatSolve_SeqSBAIJ_2_inplace;
1042: C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_inplace;
1043: C->assembled = PETSC_TRUE;
1044: C->preallocated = PETSC_TRUE;
1046: PetscCall(PetscLogFlops(1.3333 * 8 * b->mbs)); /* from inverting diagonal blocks */
1047: PetscFunctionReturn(PETSC_SUCCESS);
1048: }
1050: /*
1051: Version for when blocks are 2 by 2 Using natural ordering
1052: */
1053: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat C, Mat A, const MatFactorInfo *info)
1054: {
1055: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
1056: PetscInt i, j, mbs = a->mbs, *bi = b->i, *bj = b->j;
1057: PetscInt *ai, *aj, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
1058: MatScalar *ba = b->a, *aa, *ap, dk[8], uik[8];
1059: MatScalar *u, *diag, *rtmp, *rtmp_ptr;
1060: PetscReal shift = info->shiftamount;
1061: PetscBool allowzeropivot, zeropivotdetected;
1063: PetscFunctionBegin;
1064: allowzeropivot = PetscNot(A->erroriffailure);
1066: /* initialization */
1067: /* il and jl record the first nonzero element in each row of the accessing
1068: window U(0:k, k:mbs-1).
1069: jl: list of rows to be added to uneliminated rows
1070: i>= k: jl(i) is the first row to be added to row i
1071: i< k: jl(i) is the row following row i in some list of rows
1072: jl(i) = mbs indicates the end of a list
1073: il(i): points to the first nonzero element in columns k,...,mbs-1 of
1074: row i of U */
1075: PetscCall(PetscCalloc1(4 * mbs, &rtmp));
1076: PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
1077: il[0] = 0;
1078: for (i = 0; i < mbs; i++) jl[i] = mbs;
1080: ai = a->i;
1081: aj = a->j;
1082: aa = a->a;
1084: /* for each row k */
1085: for (k = 0; k < mbs; k++) {
1086: /*initialize k-th row with elements nonzero in row k of A */
1087: jmin = ai[k];
1088: jmax = ai[k + 1];
1089: ap = aa + jmin * 4;
1090: for (j = jmin; j < jmax; j++) {
1091: vj = aj[j]; /* block col. index */
1092: rtmp_ptr = rtmp + vj * 4;
1093: for (i = 0; i < 4; i++) *rtmp_ptr++ = *ap++;
1094: }
1096: /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1097: PetscCall(PetscArraycpy(dk, rtmp + k * 4, 4));
1098: i = jl[k]; /* first row to be added to k_th row */
1100: while (i < k) {
1101: nexti = jl[i]; /* next row to be added to k_th row */
1103: /* compute multiplier */
1104: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1106: /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
1107: diag = ba + i * 4;
1108: u = ba + ili * 4;
1109: uik[0] = -(diag[0] * u[0] + diag[2] * u[1]);
1110: uik[1] = -(diag[1] * u[0] + diag[3] * u[1]);
1111: uik[2] = -(diag[0] * u[2] + diag[2] * u[3]);
1112: uik[3] = -(diag[1] * u[2] + diag[3] * u[3]);
1114: /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
1115: dk[0] += uik[0] * u[0] + uik[1] * u[1];
1116: dk[1] += uik[2] * u[0] + uik[3] * u[1];
1117: dk[2] += uik[0] * u[2] + uik[1] * u[3];
1118: dk[3] += uik[2] * u[2] + uik[3] * u[3];
1120: PetscCall(PetscLogFlops(16.0 * 2.0));
1122: /* update -U(i,k): ba[ili] = uik */
1123: PetscCall(PetscArraycpy(ba + ili * 4, uik, 4));
1125: /* add multiple of row i to k-th row ... */
1126: jmin = ili + 1;
1127: jmax = bi[i + 1];
1128: if (jmin < jmax) {
1129: for (j = jmin; j < jmax; j++) {
1130: /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
1131: rtmp_ptr = rtmp + bj[j] * 4;
1132: u = ba + j * 4;
1133: rtmp_ptr[0] += uik[0] * u[0] + uik[1] * u[1];
1134: rtmp_ptr[1] += uik[2] * u[0] + uik[3] * u[1];
1135: rtmp_ptr[2] += uik[0] * u[2] + uik[1] * u[3];
1136: rtmp_ptr[3] += uik[2] * u[2] + uik[3] * u[3];
1137: }
1138: PetscCall(PetscLogFlops(16.0 * (jmax - jmin)));
1140: /* ... add i to row list for next nonzero entry */
1141: il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
1142: j = bj[jmin];
1143: jl[i] = jl[j];
1144: jl[j] = i; /* update jl */
1145: }
1146: i = nexti;
1147: }
1149: /* save nonzero entries in k-th row of U ... */
1151: /* invert diagonal block */
1152: diag = ba + k * 4;
1153: PetscCall(PetscArraycpy(diag, dk, 4));
1154: PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
1155: if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1157: jmin = bi[k];
1158: jmax = bi[k + 1];
1159: if (jmin < jmax) {
1160: for (j = jmin; j < jmax; j++) {
1161: vj = bj[j]; /* block col. index of U */
1162: u = ba + j * 4;
1163: rtmp_ptr = rtmp + vj * 4;
1164: for (k1 = 0; k1 < 4; k1++) {
1165: *u++ = *rtmp_ptr;
1166: *rtmp_ptr++ = 0.0;
1167: }
1168: }
1170: /* ... add k to row list for first nonzero entry in k-th row */
1171: il[k] = jmin;
1172: i = bj[jmin];
1173: jl[k] = jl[i];
1174: jl[i] = k;
1175: }
1176: }
1178: PetscCall(PetscFree(rtmp));
1179: PetscCall(PetscFree2(il, jl));
1181: C->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1182: C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1183: C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1184: C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1185: C->assembled = PETSC_TRUE;
1186: C->preallocated = PETSC_TRUE;
1188: PetscCall(PetscLogFlops(1.3333 * 8 * b->mbs)); /* from inverting diagonal blocks */
1189: PetscFunctionReturn(PETSC_SUCCESS);
1190: }
1192: /*
1193: Numeric U^T*D*U factorization for SBAIJ format.
1194: Version for blocks are 1 by 1.
1195: */
1196: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_inplace(Mat C, Mat A, const MatFactorInfo *info)
1197: {
1198: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
1199: IS ip = b->row;
1200: const PetscInt *ai, *aj, *rip;
1201: PetscInt *a2anew, i, j, mbs = a->mbs, *bi = b->i, *bj = b->j, *bcol;
1202: PetscInt k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
1203: MatScalar *rtmp, *ba = b->a, *bval, *aa, dk, uikdi;
1204: PetscReal rs;
1205: FactorShiftCtx sctx;
1207: PetscFunctionBegin;
1208: /* MatPivotSetUp(): initialize shift context sctx */
1209: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
1211: PetscCall(ISGetIndices(ip, &rip));
1212: if (!a->permute) {
1213: ai = a->i;
1214: aj = a->j;
1215: aa = a->a;
1216: } else {
1217: ai = a->inew;
1218: aj = a->jnew;
1219: nz = ai[mbs];
1220: PetscCall(PetscMalloc1(nz, &aa));
1221: a2anew = a->a2anew;
1222: bval = a->a;
1223: for (j = 0; j < nz; j++) aa[a2anew[j]] = *(bval++);
1224: }
1226: /* initialization */
1227: /* il and jl record the first nonzero element in each row of the accessing
1228: window U(0:k, k:mbs-1).
1229: jl: list of rows to be added to uneliminated rows
1230: i>= k: jl(i) is the first row to be added to row i
1231: i< k: jl(i) is the row following row i in some list of rows
1232: jl(i) = mbs indicates the end of a list
1233: il(i): points to the first nonzero element in columns k,...,mbs-1 of
1234: row i of U */
1235: PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));
1237: do {
1238: sctx.newshift = PETSC_FALSE;
1239: il[0] = 0;
1240: for (i = 0; i < mbs; i++) {
1241: rtmp[i] = 0.0;
1242: jl[i] = mbs;
1243: }
1245: for (k = 0; k < mbs; k++) {
1246: /*initialize k-th row by the perm[k]-th row of A */
1247: jmin = ai[rip[k]];
1248: jmax = ai[rip[k] + 1];
1249: bval = ba + bi[k];
1250: for (j = jmin; j < jmax; j++) {
1251: col = rip[aj[j]];
1252: rtmp[col] = aa[j];
1253: *bval++ = 0.0; /* for in-place factorization */
1254: }
1256: /* shift the diagonal of the matrix */
1257: if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1259: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1260: dk = rtmp[k];
1261: i = jl[k]; /* first row to be added to k_th row */
1263: while (i < k) {
1264: nexti = jl[i]; /* next row to be added to k_th row */
1266: /* compute multiplier, update diag(k) and U(i,k) */
1267: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1268: uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
1269: dk += uikdi * ba[ili];
1270: ba[ili] = uikdi; /* -U(i,k) */
1272: /* add multiple of row i to k-th row */
1273: jmin = ili + 1;
1274: jmax = bi[i + 1];
1275: if (jmin < jmax) {
1276: for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
1277: PetscCall(PetscLogFlops(2.0 * (jmax - jmin)));
1279: /* update il and jl for row i */
1280: il[i] = jmin;
1281: j = bj[jmin];
1282: jl[i] = jl[j];
1283: jl[j] = i;
1284: }
1285: i = nexti;
1286: }
1288: /* shift the diagonals when zero pivot is detected */
1289: /* compute rs=sum of abs(off-diagonal) */
1290: rs = 0.0;
1291: jmin = bi[k] + 1;
1292: nz = bi[k + 1] - jmin;
1293: if (nz) {
1294: bcol = bj + jmin;
1295: while (nz--) {
1296: rs += PetscAbsScalar(rtmp[*bcol]);
1297: bcol++;
1298: }
1299: }
1301: sctx.rs = rs;
1302: sctx.pv = dk;
1303: PetscCall(MatPivotCheck(C, A, info, &sctx, k));
1304: if (sctx.newshift) break; /* sctx.shift_amount is updated */
1305: dk = sctx.pv;
1307: /* copy data into U(k,:) */
1308: ba[bi[k]] = 1.0 / dk; /* U(k,k) */
1309: jmin = bi[k] + 1;
1310: jmax = bi[k + 1];
1311: if (jmin < jmax) {
1312: for (j = jmin; j < jmax; j++) {
1313: col = bj[j];
1314: ba[j] = rtmp[col];
1315: rtmp[col] = 0.0;
1316: }
1317: /* add the k-th row into il and jl */
1318: il[k] = jmin;
1319: i = bj[jmin];
1320: jl[k] = jl[i];
1321: jl[i] = k;
1322: }
1323: }
1324: } while (sctx.newshift);
1325: PetscCall(PetscFree3(rtmp, il, jl));
1326: if (a->permute) PetscCall(PetscFree(aa));
1328: PetscCall(ISRestoreIndices(ip, &rip));
1330: C->ops->solve = MatSolve_SeqSBAIJ_1_inplace;
1331: C->ops->solves = MatSolves_SeqSBAIJ_1_inplace;
1332: C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
1333: C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace;
1334: C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace;
1335: C->assembled = PETSC_TRUE;
1336: C->preallocated = PETSC_TRUE;
1338: PetscCall(PetscLogFlops(C->rmap->N));
1339: if (sctx.nshift) {
1340: if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1341: PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1342: } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1343: PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1344: }
1345: }
1346: PetscFunctionReturn(PETSC_SUCCESS);
1347: }
1349: /*
1350: Version for when blocks are 1 by 1 Using natural ordering under new datastructure
1351: Modified from MatCholeskyFactorNumeric_SeqAIJ()
1352: */
1353: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat B, Mat A, const MatFactorInfo *info)
1354: {
1355: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data;
1356: Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)B->data;
1357: PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
1358: PetscInt *ai = a->i, *aj = a->j, *ajtmp;
1359: PetscInt k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
1360: MatScalar *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
1361: FactorShiftCtx sctx;
1362: PetscReal rs;
1363: MatScalar d, *v;
1365: PetscFunctionBegin;
1366: PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r));
1368: /* MatPivotSetUp(): initialize shift context sctx */
1369: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
1371: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
1372: sctx.shift_top = info->zeropivot;
1374: PetscCall(PetscArrayzero(rtmp, mbs));
1376: for (i = 0; i < mbs; i++) {
1377: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
1378: d = (aa)[a->diag[i]];
1379: rtmp[i] += -PetscRealPart(d); /* diagonal entry */
1380: ajtmp = aj + ai[i] + 1; /* exclude diagonal */
1381: v = aa + ai[i] + 1;
1382: nz = ai[i + 1] - ai[i] - 1;
1383: for (j = 0; j < nz; j++) {
1384: rtmp[i] += PetscAbsScalar(v[j]);
1385: rtmp[ajtmp[j]] += PetscAbsScalar(v[j]);
1386: }
1387: if (PetscRealPart(rtmp[i]) > sctx.shift_top) sctx.shift_top = PetscRealPart(rtmp[i]);
1388: }
1389: sctx.shift_top *= 1.1;
1390: sctx.nshift_max = 5;
1391: sctx.shift_lo = 0.;
1392: sctx.shift_hi = 1.;
1393: }
1395: /* allocate working arrays
1396: c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
1397: il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
1398: */
1399: do {
1400: sctx.newshift = PETSC_FALSE;
1402: for (i = 0; i < mbs; i++) c2r[i] = mbs;
1403: if (mbs) il[0] = 0;
1405: for (k = 0; k < mbs; k++) {
1406: /* zero rtmp */
1407: nz = bi[k + 1] - bi[k];
1408: bjtmp = bj + bi[k];
1409: for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
1411: /* load in initial unfactored row */
1412: bval = ba + bi[k];
1413: jmin = ai[k];
1414: jmax = ai[k + 1];
1415: for (j = jmin; j < jmax; j++) {
1416: col = aj[j];
1417: rtmp[col] = aa[j];
1418: *bval++ = 0.0; /* for in-place factorization */
1419: }
1420: /* shift the diagonal of the matrix: ZeropivotApply() */
1421: rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
1423: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1424: dk = rtmp[k];
1425: i = c2r[k]; /* first row to be added to k_th row */
1427: while (i < k) {
1428: nexti = c2r[i]; /* next row to be added to k_th row */
1430: /* compute multiplier, update diag(k) and U(i,k) */
1431: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1432: uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
1433: dk += uikdi * ba[ili]; /* update diag[k] */
1434: ba[ili] = uikdi; /* -U(i,k) */
1436: /* add multiple of row i to k-th row */
1437: jmin = ili + 1;
1438: jmax = bi[i + 1];
1439: if (jmin < jmax) {
1440: for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
1441: /* update il and c2r for row i */
1442: il[i] = jmin;
1443: j = bj[jmin];
1444: c2r[i] = c2r[j];
1445: c2r[j] = i;
1446: }
1447: i = nexti;
1448: }
1450: /* copy data into U(k,:) */
1451: rs = 0.0;
1452: jmin = bi[k];
1453: jmax = bi[k + 1] - 1;
1454: if (jmin < jmax) {
1455: for (j = jmin; j < jmax; j++) {
1456: col = bj[j];
1457: ba[j] = rtmp[col];
1458: rs += PetscAbsScalar(ba[j]);
1459: }
1460: /* add the k-th row into il and c2r */
1461: il[k] = jmin;
1462: i = bj[jmin];
1463: c2r[k] = c2r[i];
1464: c2r[i] = k;
1465: }
1467: sctx.rs = rs;
1468: sctx.pv = dk;
1469: PetscCall(MatPivotCheck(B, A, info, &sctx, k));
1470: if (sctx.newshift) break;
1471: dk = sctx.pv;
1473: ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
1474: }
1475: } while (sctx.newshift);
1477: PetscCall(PetscFree3(rtmp, il, c2r));
1479: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1480: B->ops->solves = MatSolves_SeqSBAIJ_1;
1481: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1482: B->ops->matsolve = MatMatSolve_SeqSBAIJ_1_NaturalOrdering;
1483: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
1484: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
1486: B->assembled = PETSC_TRUE;
1487: B->preallocated = PETSC_TRUE;
1489: PetscCall(PetscLogFlops(B->rmap->n));
1491: /* MatPivotView() */
1492: if (sctx.nshift) {
1493: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1494: PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
1495: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1496: PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1497: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
1498: PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
1499: }
1500: }
1501: PetscFunctionReturn(PETSC_SUCCESS);
1502: }
1504: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering_inplace(Mat C, Mat A, const MatFactorInfo *info)
1505: {
1506: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
1507: PetscInt i, j, mbs = a->mbs;
1508: PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
1509: PetscInt k, jmin, *jl, *il, nexti, ili, *acol, *bcol, nz;
1510: MatScalar *rtmp, *ba = b->a, *aa = a->a, dk, uikdi, *aval, *bval;
1511: PetscReal rs;
1512: FactorShiftCtx sctx;
1514: PetscFunctionBegin;
1515: /* MatPivotSetUp(): initialize shift context sctx */
1516: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
1518: /* initialization */
1519: /* il and jl record the first nonzero element in each row of the accessing
1520: window U(0:k, k:mbs-1).
1521: jl: list of rows to be added to uneliminated rows
1522: i>= k: jl(i) is the first row to be added to row i
1523: i< k: jl(i) is the row following row i in some list of rows
1524: jl(i) = mbs indicates the end of a list
1525: il(i): points to the first nonzero element in U(i,k:mbs-1)
1526: */
1527: PetscCall(PetscMalloc1(mbs, &rtmp));
1528: PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
1530: do {
1531: sctx.newshift = PETSC_FALSE;
1532: il[0] = 0;
1533: for (i = 0; i < mbs; i++) {
1534: rtmp[i] = 0.0;
1535: jl[i] = mbs;
1536: }
1538: for (k = 0; k < mbs; k++) {
1539: /*initialize k-th row with elements nonzero in row perm(k) of A */
1540: nz = ai[k + 1] - ai[k];
1541: acol = aj + ai[k];
1542: aval = aa + ai[k];
1543: bval = ba + bi[k];
1544: while (nz--) {
1545: rtmp[*acol++] = *aval++;
1546: *bval++ = 0.0; /* for in-place factorization */
1547: }
1549: /* shift the diagonal of the matrix */
1550: if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1552: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1553: dk = rtmp[k];
1554: i = jl[k]; /* first row to be added to k_th row */
1556: while (i < k) {
1557: nexti = jl[i]; /* next row to be added to k_th row */
1558: /* compute multiplier, update D(k) and U(i,k) */
1559: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1560: uikdi = -ba[ili] * ba[bi[i]];
1561: dk += uikdi * ba[ili];
1562: ba[ili] = uikdi; /* -U(i,k) */
1564: /* add multiple of row i to k-th row ... */
1565: jmin = ili + 1;
1566: nz = bi[i + 1] - jmin;
1567: if (nz > 0) {
1568: bcol = bj + jmin;
1569: bval = ba + jmin;
1570: PetscCall(PetscLogFlops(2.0 * nz));
1571: while (nz--) rtmp[*bcol++] += uikdi * (*bval++);
1573: /* update il and jl for i-th row */
1574: il[i] = jmin;
1575: j = bj[jmin];
1576: jl[i] = jl[j];
1577: jl[j] = i;
1578: }
1579: i = nexti;
1580: }
1582: /* shift the diagonals when zero pivot is detected */
1583: /* compute rs=sum of abs(off-diagonal) */
1584: rs = 0.0;
1585: jmin = bi[k] + 1;
1586: nz = bi[k + 1] - jmin;
1587: if (nz) {
1588: bcol = bj + jmin;
1589: while (nz--) {
1590: rs += PetscAbsScalar(rtmp[*bcol]);
1591: bcol++;
1592: }
1593: }
1595: sctx.rs = rs;
1596: sctx.pv = dk;
1597: PetscCall(MatPivotCheck(C, A, info, &sctx, k));
1598: if (sctx.newshift) break; /* sctx.shift_amount is updated */
1599: dk = sctx.pv;
1601: /* copy data into U(k,:) */
1602: ba[bi[k]] = 1.0 / dk;
1603: jmin = bi[k] + 1;
1604: nz = bi[k + 1] - jmin;
1605: if (nz) {
1606: bcol = bj + jmin;
1607: bval = ba + jmin;
1608: while (nz--) {
1609: *bval++ = rtmp[*bcol];
1610: rtmp[*bcol++] = 0.0;
1611: }
1612: /* add k-th row into il and jl */
1613: il[k] = jmin;
1614: i = bj[jmin];
1615: jl[k] = jl[i];
1616: jl[i] = k;
1617: }
1618: } /* end of for (k = 0; k<mbs; k++) */
1619: } while (sctx.newshift);
1620: PetscCall(PetscFree(rtmp));
1621: PetscCall(PetscFree2(il, jl));
1623: C->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1624: C->ops->solves = MatSolves_SeqSBAIJ_1_inplace;
1625: C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1626: C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1627: C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1629: C->assembled = PETSC_TRUE;
1630: C->preallocated = PETSC_TRUE;
1632: PetscCall(PetscLogFlops(C->rmap->N));
1633: if (sctx.nshift) {
1634: if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1635: PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1636: } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1637: PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1638: }
1639: }
1640: PetscFunctionReturn(PETSC_SUCCESS);
1641: }
1643: PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat A, IS perm, const MatFactorInfo *info)
1644: {
1645: Mat C;
1647: PetscFunctionBegin;
1648: PetscCall(MatGetFactor(A, "petsc", MAT_FACTOR_CHOLESKY, &C));
1649: PetscCall(MatCholeskyFactorSymbolic(C, A, perm, info));
1650: PetscCall(MatCholeskyFactorNumeric(C, A, info));
1652: A->ops->solve = C->ops->solve;
1653: A->ops->solvetranspose = C->ops->solvetranspose;
1655: PetscCall(MatHeaderMerge(A, &C));
1656: PetscFunctionReturn(PETSC_SUCCESS);
1657: }