Actual source code: aijfact.c
1: #include <../src/mat/impls/aij/seq/aij.h>
2: #include <../src/mat/impls/sbaij/seq/sbaij.h>
3: #include <petscbt.h>
4: #include <../src/mat/utils/freespace.h>
6: /*
7: Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
9: This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
10: */
11: #if 0
12: PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat, MatOrderingType type, IS *irow, IS *icol)
13: {
14: Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->data;
15: PetscInt i, j, jj, k, kk, n = mat->rmap->n, current = 0, newcurrent = 0, *order;
16: const PetscInt *ai = a->i, *aj = a->j;
17: const PetscScalar *aa = a->a;
18: PetscBool *done;
19: PetscReal best, past = 0, future;
21: PetscFunctionBegin;
22: /* pick initial row */
23: best = -1;
24: for (i = 0; i < n; i++) {
25: future = 0.0;
26: for (j = ai[i]; j < ai[i + 1]; j++) {
27: if (aj[j] != i) future += PetscAbsScalar(aa[j]);
28: else past = PetscAbsScalar(aa[j]);
29: }
30: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
31: if (past / future > best) {
32: best = past / future;
33: current = i;
34: }
35: }
37: PetscCall(PetscMalloc1(n, &done));
38: PetscCall(PetscArrayzero(done, n));
39: PetscCall(PetscMalloc1(n, &order));
40: order[0] = current;
41: for (i = 0; i < n - 1; i++) {
42: done[current] = PETSC_TRUE;
43: best = -1;
44: /* loop over all neighbors of current pivot */
45: for (j = ai[current]; j < ai[current + 1]; j++) {
46: jj = aj[j];
47: if (done[jj]) continue;
48: /* loop over columns of potential next row computing weights for below and above diagonal */
49: past = future = 0.0;
50: for (k = ai[jj]; k < ai[jj + 1]; k++) {
51: kk = aj[k];
52: if (done[kk]) past += PetscAbsScalar(aa[k]);
53: else if (kk != jj) future += PetscAbsScalar(aa[k]);
54: }
55: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
56: if (past / future > best) {
57: best = past / future;
58: newcurrent = jj;
59: }
60: }
61: if (best == -1) { /* no neighbors to select from so select best of all that remain */
62: best = -1;
63: for (k = 0; k < n; k++) {
64: if (done[k]) continue;
65: future = 0.0;
66: past = 0.0;
67: for (j = ai[k]; j < ai[k + 1]; j++) {
68: kk = aj[j];
69: if (done[kk]) past += PetscAbsScalar(aa[j]);
70: else if (kk != k) future += PetscAbsScalar(aa[j]);
71: }
72: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
73: if (past / future > best) {
74: best = past / future;
75: newcurrent = k;
76: }
77: }
78: }
79: PetscCheck(current != newcurrent, PETSC_COMM_SELF, PETSC_ERR_PLIB, "newcurrent cannot be current");
80: current = newcurrent;
81: order[i + 1] = current;
82: }
83: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, order, PETSC_COPY_VALUES, irow));
84: *icol = *irow;
85: PetscCall(PetscObjectReference((PetscObject)*irow));
86: PetscCall(PetscFree(done));
87: PetscCall(PetscFree(order));
88: PetscFunctionReturn(PETSC_SUCCESS);
89: }
90: #endif
92: static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type)
93: {
94: PetscFunctionBegin;
95: *type = MATSOLVERPETSC;
96: PetscFunctionReturn(PETSC_SUCCESS);
97: }
99: PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B)
100: {
101: PetscInt n = A->rmap->n;
103: PetscFunctionBegin;
104: #if defined(PETSC_USE_COMPLEX)
105: if ((ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
106: PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY or MAT_FACTOR_ICC are not supported. Use MAT_FACTOR_LU instead.\n"));
107: *B = NULL;
108: PetscFunctionReturn(PETSC_SUCCESS);
109: }
110: #endif
112: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
113: PetscCall(MatSetSizes(*B, n, n, n, n));
114: if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
115: PetscCall(MatSetType(*B, MATSEQAIJ));
117: (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
118: (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
120: PetscCall(MatSetBlockSizesFromMats(*B, A, A));
121: PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU]));
122: PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU]));
123: PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT]));
124: } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
125: PetscCall(MatSetType(*B, MATSEQSBAIJ));
126: PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL));
128: (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ;
129: (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
130: PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY]));
131: PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC]));
132: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
133: (*B)->factortype = ftype;
135: PetscCall(PetscFree((*B)->solvertype));
136: PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype));
137: (*B)->canuseordering = PETSC_TRUE;
138: PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc));
139: PetscFunctionReturn(PETSC_SUCCESS);
140: }
142: #if 0
143: // currently unused
144: static PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
145: {
146: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b;
147: IS isicol;
148: const PetscInt *r, *ic;
149: PetscInt i, n = A->rmap->n, *ai = a->i, *aj = a->j;
150: PetscInt *bi, *bj, *ajtmp;
151: PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
152: PetscReal f;
153: PetscInt nlnk, *lnk, k, **bi_ptr;
154: PetscFreeSpaceList free_space = NULL, current_space = NULL;
155: PetscBT lnkbt;
156: PetscBool missing;
158: PetscFunctionBegin;
159: PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
160: PetscCall(MatMissingDiagonal(A, &missing, &i));
161: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
163: PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
164: PetscCall(ISGetIndices(isrow, &r));
165: PetscCall(ISGetIndices(isicol, &ic));
167: /* get new row pointers */
168: PetscCall(PetscMalloc1(n + 1, &bi));
169: bi[0] = 0;
171: /* bdiag is location of diagonal in factor */
172: PetscCall(PetscMalloc1(n + 1, &bdiag));
173: bdiag[0] = 0;
175: /* linked list for storing column indices of the active row */
176: nlnk = n + 1;
177: PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
179: PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
181: /* initial FreeSpace size is f*(ai[n]+1) */
182: f = info->fill;
183: if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
184: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
185: current_space = free_space;
187: for (i = 0; i < n; i++) {
188: /* copy previous fill into linked list */
189: nzi = 0;
190: nnz = ai[r[i] + 1] - ai[r[i]];
191: ajtmp = aj + ai[r[i]];
192: PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
193: nzi += nlnk;
195: /* add pivot rows into linked list */
196: row = lnk[n];
197: while (row < i) {
198: nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
199: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
200: PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
201: nzi += nlnk;
202: row = lnk[row];
203: }
204: bi[i + 1] = bi[i] + nzi;
205: im[i] = nzi;
207: /* mark bdiag */
208: nzbd = 0;
209: nnz = nzi;
210: k = lnk[n];
211: while (nnz-- && k < i) {
212: nzbd++;
213: k = lnk[k];
214: }
215: bdiag[i] = bi[i] + nzbd;
217: /* if free space is not available, make more free space */
218: if (current_space->local_remaining < nzi) {
219: nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */
220: PetscCall(PetscFreeSpaceGet(nnz, ¤t_space));
221: reallocs++;
222: }
224: /* copy data into free space, then initialize lnk */
225: PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
227: bi_ptr[i] = current_space->array;
228: current_space->array += nzi;
229: current_space->local_used += nzi;
230: current_space->local_remaining -= nzi;
231: }
232: #if defined(PETSC_USE_INFO)
233: if (ai[n] != 0) {
234: PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
235: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
236: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
237: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
238: PetscCall(PetscInfo(A, "for best performance.\n"));
239: } else {
240: PetscCall(PetscInfo(A, "Empty matrix\n"));
241: }
242: #endif
244: PetscCall(ISRestoreIndices(isrow, &r));
245: PetscCall(ISRestoreIndices(isicol, &ic));
247: /* destroy list of free space and other temporary array(s) */
248: PetscCall(PetscMalloc1(bi[n] + 1, &bj));
249: PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
250: PetscCall(PetscLLDestroy(lnk, lnkbt));
251: PetscCall(PetscFree2(bi_ptr, im));
253: /* put together the new matrix */
254: PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
255: b = (Mat_SeqAIJ *)(B)->data;
257: b->free_a = PETSC_TRUE;
258: b->free_ij = PETSC_TRUE;
259: b->singlemalloc = PETSC_FALSE;
261: PetscCall(PetscMalloc1(bi[n] + 1, &b->a));
262: b->j = bj;
263: b->i = bi;
264: b->diag = bdiag;
265: b->ilen = NULL;
266: b->imax = NULL;
267: b->row = isrow;
268: b->col = iscol;
269: PetscCall(PetscObjectReference((PetscObject)isrow));
270: PetscCall(PetscObjectReference((PetscObject)iscol));
271: b->icol = isicol;
272: PetscCall(PetscMalloc1(n + 1, &b->solve_work));
274: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
275: b->maxnz = b->nz = bi[n];
277: (B)->factortype = MAT_FACTOR_LU;
278: (B)->info.factor_mallocs = reallocs;
279: (B)->info.fill_ratio_given = f;
281: if (ai[n]) {
282: (B)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
283: } else {
284: (B)->info.fill_ratio_needed = 0.0;
285: }
286: (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
287: if (a->inode.size) (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
288: PetscFunctionReturn(PETSC_SUCCESS);
289: }
290: #endif
292: PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
293: {
294: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b;
295: IS isicol;
296: const PetscInt *r, *ic, *ai = a->i, *aj = a->j, *ajtmp;
297: PetscInt i, n = A->rmap->n;
298: PetscInt *bi, *bj;
299: PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
300: PetscReal f;
301: PetscInt nlnk, *lnk, k, **bi_ptr;
302: PetscFreeSpaceList free_space = NULL, current_space = NULL;
303: PetscBT lnkbt;
304: PetscBool missing;
306: PetscFunctionBegin;
307: PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
308: PetscCall(MatMissingDiagonal(A, &missing, &i));
309: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
311: PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
312: PetscCall(ISGetIndices(isrow, &r));
313: PetscCall(ISGetIndices(isicol, &ic));
315: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
316: PetscCall(PetscMalloc1(n + 1, &bi));
317: PetscCall(PetscMalloc1(n + 1, &bdiag));
318: bi[0] = bdiag[0] = 0;
320: /* linked list for storing column indices of the active row */
321: nlnk = n + 1;
322: PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
324: PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
326: /* initial FreeSpace size is f*(ai[n]+1) */
327: f = info->fill;
328: if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
329: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
330: current_space = free_space;
332: for (i = 0; i < n; i++) {
333: /* copy previous fill into linked list */
334: nzi = 0;
335: nnz = ai[r[i] + 1] - ai[r[i]];
336: ajtmp = aj + ai[r[i]];
337: PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
338: nzi += nlnk;
340: /* add pivot rows into linked list */
341: row = lnk[n];
342: while (row < i) {
343: nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */
344: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
345: PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
346: nzi += nlnk;
347: row = lnk[row];
348: }
349: bi[i + 1] = bi[i] + nzi;
350: im[i] = nzi;
352: /* mark bdiag */
353: nzbd = 0;
354: nnz = nzi;
355: k = lnk[n];
356: while (nnz-- && k < i) {
357: nzbd++;
358: k = lnk[k];
359: }
360: bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
362: /* if free space is not available, make more free space */
363: if (current_space->local_remaining < nzi) {
364: /* estimated additional space needed */
365: nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi));
366: PetscCall(PetscFreeSpaceGet(nnz, ¤t_space));
367: reallocs++;
368: }
370: /* copy data into free space, then initialize lnk */
371: PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
373: bi_ptr[i] = current_space->array;
374: current_space->array += nzi;
375: current_space->local_used += nzi;
376: current_space->local_remaining -= nzi;
377: }
379: PetscCall(ISRestoreIndices(isrow, &r));
380: PetscCall(ISRestoreIndices(isicol, &ic));
382: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
383: PetscCall(PetscMalloc1(bi[n] + 1, &bj));
384: PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
385: PetscCall(PetscLLDestroy(lnk, lnkbt));
386: PetscCall(PetscFree2(bi_ptr, im));
388: /* put together the new matrix */
389: PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
390: b = (Mat_SeqAIJ *)(B)->data;
392: b->free_a = PETSC_TRUE;
393: b->free_ij = PETSC_TRUE;
394: b->singlemalloc = PETSC_FALSE;
396: PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a));
398: b->j = bj;
399: b->i = bi;
400: b->diag = bdiag;
401: b->ilen = NULL;
402: b->imax = NULL;
403: b->row = isrow;
404: b->col = iscol;
405: PetscCall(PetscObjectReference((PetscObject)isrow));
406: PetscCall(PetscObjectReference((PetscObject)iscol));
407: b->icol = isicol;
408: PetscCall(PetscMalloc1(n + 1, &b->solve_work));
410: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
411: b->maxnz = b->nz = bdiag[0] + 1;
413: B->factortype = MAT_FACTOR_LU;
414: B->info.factor_mallocs = reallocs;
415: B->info.fill_ratio_given = f;
417: if (ai[n]) {
418: B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
419: } else {
420: B->info.fill_ratio_needed = 0.0;
421: }
422: #if defined(PETSC_USE_INFO)
423: if (ai[n] != 0) {
424: PetscReal af = B->info.fill_ratio_needed;
425: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
426: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
427: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
428: PetscCall(PetscInfo(A, "for best performance.\n"));
429: } else {
430: PetscCall(PetscInfo(A, "Empty matrix\n"));
431: }
432: #endif
433: B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
434: if (a->inode.size) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
435: PetscCall(MatSeqAIJCheckInode_FactorLU(B));
436: PetscFunctionReturn(PETSC_SUCCESS);
437: }
439: /*
440: Trouble in factorization, should we dump the original matrix?
441: */
442: PetscErrorCode MatFactorDumpMatrix(Mat A)
443: {
444: PetscBool flg = PETSC_FALSE;
446: PetscFunctionBegin;
447: PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL));
448: if (flg) {
449: PetscViewer viewer;
450: char filename[PETSC_MAX_PATH_LEN];
452: PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank));
453: PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer));
454: PetscCall(MatView(A, viewer));
455: PetscCall(PetscViewerDestroy(&viewer));
456: }
457: PetscFunctionReturn(PETSC_SUCCESS);
458: }
460: PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
461: {
462: Mat C = B;
463: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
464: IS isrow = b->row, isicol = b->icol;
465: const PetscInt *r, *ic, *ics;
466: const PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag;
467: PetscInt i, j, k, nz, nzL, row, *pj;
468: const PetscInt *ajtmp, *bjtmp;
469: MatScalar *rtmp, *pc, multiplier, *pv;
470: const MatScalar *aa = a->a, *v;
471: PetscBool row_identity, col_identity;
472: FactorShiftCtx sctx;
473: const PetscInt *ddiag;
474: PetscReal rs;
475: MatScalar d;
477: PetscFunctionBegin;
478: /* MatPivotSetUp(): initialize shift context sctx */
479: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
481: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
482: ddiag = a->diag;
483: sctx.shift_top = info->zeropivot;
484: for (i = 0; i < n; i++) {
485: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
486: d = (aa)[ddiag[i]];
487: rs = -PetscAbsScalar(d) - PetscRealPart(d);
488: v = aa + ai[i];
489: nz = ai[i + 1] - ai[i];
490: for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
491: if (rs > sctx.shift_top) sctx.shift_top = rs;
492: }
493: sctx.shift_top *= 1.1;
494: sctx.nshift_max = 5;
495: sctx.shift_lo = 0.;
496: sctx.shift_hi = 1.;
497: }
499: PetscCall(ISGetIndices(isrow, &r));
500: PetscCall(ISGetIndices(isicol, &ic));
501: PetscCall(PetscMalloc1(n + 1, &rtmp));
502: ics = ic;
504: do {
505: sctx.newshift = PETSC_FALSE;
506: for (i = 0; i < n; i++) {
507: /* zero rtmp */
508: /* L part */
509: nz = bi[i + 1] - bi[i];
510: bjtmp = bj + bi[i];
511: for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
513: /* U part */
514: nz = bdiag[i] - bdiag[i + 1];
515: bjtmp = bj + bdiag[i + 1] + 1;
516: for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
518: /* load in initial (unfactored row) */
519: nz = ai[r[i] + 1] - ai[r[i]];
520: ajtmp = aj + ai[r[i]];
521: v = aa + ai[r[i]];
522: for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
523: /* ZeropivotApply() */
524: rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */
526: /* elimination */
527: bjtmp = bj + bi[i];
528: row = *bjtmp++;
529: nzL = bi[i + 1] - bi[i];
530: for (k = 0; k < nzL; k++) {
531: pc = rtmp + row;
532: if (*pc != 0.0) {
533: pv = b->a + bdiag[row];
534: multiplier = *pc * (*pv);
535: *pc = multiplier;
537: pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */
538: pv = b->a + bdiag[row + 1] + 1;
539: nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */
541: for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
542: PetscCall(PetscLogFlops(1 + 2.0 * nz));
543: }
544: row = *bjtmp++;
545: }
547: /* finished row so stick it into b->a */
548: rs = 0.0;
549: /* L part */
550: pv = b->a + bi[i];
551: pj = b->j + bi[i];
552: nz = bi[i + 1] - bi[i];
553: for (j = 0; j < nz; j++) {
554: pv[j] = rtmp[pj[j]];
555: rs += PetscAbsScalar(pv[j]);
556: }
558: /* U part */
559: pv = b->a + bdiag[i + 1] + 1;
560: pj = b->j + bdiag[i + 1] + 1;
561: nz = bdiag[i] - bdiag[i + 1] - 1;
562: for (j = 0; j < nz; j++) {
563: pv[j] = rtmp[pj[j]];
564: rs += PetscAbsScalar(pv[j]);
565: }
567: sctx.rs = rs;
568: sctx.pv = rtmp[i];
569: PetscCall(MatPivotCheck(B, A, info, &sctx, i));
570: if (sctx.newshift) break; /* break for-loop */
571: rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
573: /* Mark diagonal and invert diagonal for simpler triangular solves */
574: pv = b->a + bdiag[i];
575: *pv = 1.0 / rtmp[i];
577: } /* endof for (i=0; i<n; i++) { */
579: /* MatPivotRefine() */
580: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
581: /*
582: * if no shift in this attempt & shifting & started shifting & can refine,
583: * then try lower shift
584: */
585: sctx.shift_hi = sctx.shift_fraction;
586: sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
587: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
588: sctx.newshift = PETSC_TRUE;
589: sctx.nshift++;
590: }
591: } while (sctx.newshift);
593: PetscCall(PetscFree(rtmp));
594: PetscCall(ISRestoreIndices(isicol, &ic));
595: PetscCall(ISRestoreIndices(isrow, &r));
597: PetscCall(ISIdentity(isrow, &row_identity));
598: PetscCall(ISIdentity(isicol, &col_identity));
599: if (b->inode.size) {
600: C->ops->solve = MatSolve_SeqAIJ_Inode;
601: } else if (row_identity && col_identity) {
602: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
603: } else {
604: C->ops->solve = MatSolve_SeqAIJ;
605: }
606: C->ops->solveadd = MatSolveAdd_SeqAIJ;
607: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ;
608: C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
609: C->ops->matsolve = MatMatSolve_SeqAIJ;
610: C->ops->matsolvetranspose = MatMatSolveTranspose_SeqAIJ;
611: C->assembled = PETSC_TRUE;
612: C->preallocated = PETSC_TRUE;
614: PetscCall(PetscLogFlops(C->cmap->n));
616: /* MatShiftView(A,info,&sctx) */
617: if (sctx.nshift) {
618: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
619: PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
620: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
621: PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
622: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
623: PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
624: }
625: }
626: PetscFunctionReturn(PETSC_SUCCESS);
627: }
629: static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat, Mat, Mat);
630: static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat, Vec, Vec);
631: static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat, Vec, Vec, Vec);
633: PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
634: {
635: Mat C = B;
636: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
637: IS isrow = b->row, isicol = b->icol;
638: const PetscInt *r, *ic, *ics;
639: PetscInt nz, row, i, j, n = A->rmap->n, diag;
640: const PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
641: const PetscInt *ajtmp, *bjtmp, *diag_offset = b->diag, *pj;
642: MatScalar *pv, *rtmp, *pc, multiplier, d;
643: const MatScalar *v, *aa = a->a;
644: PetscReal rs = 0.0;
645: FactorShiftCtx sctx;
646: const PetscInt *ddiag;
647: PetscBool row_identity, col_identity;
649: PetscFunctionBegin;
650: /* MatPivotSetUp(): initialize shift context sctx */
651: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
653: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
654: ddiag = a->diag;
655: sctx.shift_top = info->zeropivot;
656: for (i = 0; i < n; i++) {
657: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
658: d = (aa)[ddiag[i]];
659: rs = -PetscAbsScalar(d) - PetscRealPart(d);
660: v = aa + ai[i];
661: nz = ai[i + 1] - ai[i];
662: for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
663: if (rs > sctx.shift_top) sctx.shift_top = rs;
664: }
665: sctx.shift_top *= 1.1;
666: sctx.nshift_max = 5;
667: sctx.shift_lo = 0.;
668: sctx.shift_hi = 1.;
669: }
671: PetscCall(ISGetIndices(isrow, &r));
672: PetscCall(ISGetIndices(isicol, &ic));
673: PetscCall(PetscMalloc1(n + 1, &rtmp));
674: ics = ic;
676: do {
677: sctx.newshift = PETSC_FALSE;
678: for (i = 0; i < n; i++) {
679: nz = bi[i + 1] - bi[i];
680: bjtmp = bj + bi[i];
681: for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
683: /* load in initial (unfactored row) */
684: nz = ai[r[i] + 1] - ai[r[i]];
685: ajtmp = aj + ai[r[i]];
686: v = aa + ai[r[i]];
687: for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
688: rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
690: row = *bjtmp++;
691: while (row < i) {
692: pc = rtmp + row;
693: if (*pc != 0.0) {
694: pv = b->a + diag_offset[row];
695: pj = b->j + diag_offset[row] + 1;
696: multiplier = *pc / *pv++;
697: *pc = multiplier;
698: nz = bi[row + 1] - diag_offset[row] - 1;
699: for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
700: PetscCall(PetscLogFlops(1 + 2.0 * nz));
701: }
702: row = *bjtmp++;
703: }
704: /* finished row so stick it into b->a */
705: pv = b->a + bi[i];
706: pj = b->j + bi[i];
707: nz = bi[i + 1] - bi[i];
708: diag = diag_offset[i] - bi[i];
709: rs = 0.0;
710: for (j = 0; j < nz; j++) {
711: pv[j] = rtmp[pj[j]];
712: rs += PetscAbsScalar(pv[j]);
713: }
714: rs -= PetscAbsScalar(pv[diag]);
716: sctx.rs = rs;
717: sctx.pv = pv[diag];
718: PetscCall(MatPivotCheck(B, A, info, &sctx, i));
719: if (sctx.newshift) break;
720: pv[diag] = sctx.pv;
721: }
723: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
724: /*
725: * if no shift in this attempt & shifting & started shifting & can refine,
726: * then try lower shift
727: */
728: sctx.shift_hi = sctx.shift_fraction;
729: sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
730: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
731: sctx.newshift = PETSC_TRUE;
732: sctx.nshift++;
733: }
734: } while (sctx.newshift);
736: /* invert diagonal entries for simpler triangular solves */
737: for (i = 0; i < n; i++) b->a[diag_offset[i]] = 1.0 / b->a[diag_offset[i]];
738: PetscCall(PetscFree(rtmp));
739: PetscCall(ISRestoreIndices(isicol, &ic));
740: PetscCall(ISRestoreIndices(isrow, &r));
742: PetscCall(ISIdentity(isrow, &row_identity));
743: PetscCall(ISIdentity(isicol, &col_identity));
744: if (row_identity && col_identity) {
745: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
746: } else {
747: C->ops->solve = MatSolve_SeqAIJ_inplace;
748: }
749: C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace;
750: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
751: C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
752: C->ops->matsolve = MatMatSolve_SeqAIJ_inplace;
753: C->ops->matsolvetranspose = NULL;
755: C->assembled = PETSC_TRUE;
756: C->preallocated = PETSC_TRUE;
758: PetscCall(PetscLogFlops(C->cmap->n));
759: if (sctx.nshift) {
760: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
761: PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
762: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
763: PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
764: }
765: }
766: (C)->ops->solve = MatSolve_SeqAIJ_inplace;
767: (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
769: PetscCall(MatSeqAIJCheckInode(C));
770: PetscFunctionReturn(PETSC_SUCCESS);
771: }
773: static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat, Vec, Vec);
775: /*
776: This routine implements inplace ILU(0) with row or/and column permutations.
777: Input:
778: A - original matrix
779: Output;
780: A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
781: a->j (col index) is permuted by the inverse of colperm, then sorted
782: a->a reordered accordingly with a->j
783: a->diag (ptr to diagonal elements) is updated.
784: */
785: PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info)
786: {
787: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
788: IS isrow = a->row, isicol = a->icol;
789: const PetscInt *r, *ic, *ics;
790: PetscInt i, j, n = A->rmap->n, *ai = a->i, *aj = a->j;
791: PetscInt *ajtmp, nz, row;
792: PetscInt *diag = a->diag, nbdiag, *pj;
793: PetscScalar *rtmp, *pc, multiplier, d;
794: MatScalar *pv, *v;
795: PetscReal rs;
796: FactorShiftCtx sctx;
797: const MatScalar *aa = a->a, *vtmp;
799: PetscFunctionBegin;
800: PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address");
802: /* MatPivotSetUp(): initialize shift context sctx */
803: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
805: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
806: const PetscInt *ddiag = a->diag;
807: sctx.shift_top = info->zeropivot;
808: for (i = 0; i < n; i++) {
809: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
810: d = (aa)[ddiag[i]];
811: rs = -PetscAbsScalar(d) - PetscRealPart(d);
812: vtmp = aa + ai[i];
813: nz = ai[i + 1] - ai[i];
814: for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]);
815: if (rs > sctx.shift_top) sctx.shift_top = rs;
816: }
817: sctx.shift_top *= 1.1;
818: sctx.nshift_max = 5;
819: sctx.shift_lo = 0.;
820: sctx.shift_hi = 1.;
821: }
823: PetscCall(ISGetIndices(isrow, &r));
824: PetscCall(ISGetIndices(isicol, &ic));
825: PetscCall(PetscMalloc1(n + 1, &rtmp));
826: PetscCall(PetscArrayzero(rtmp, n + 1));
827: ics = ic;
829: #if defined(MV)
830: sctx.shift_top = 0.;
831: sctx.nshift_max = 0;
832: sctx.shift_lo = 0.;
833: sctx.shift_hi = 0.;
834: sctx.shift_fraction = 0.;
836: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
837: sctx.shift_top = 0.;
838: for (i = 0; i < n; i++) {
839: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
840: d = (a->a)[diag[i]];
841: rs = -PetscAbsScalar(d) - PetscRealPart(d);
842: v = a->a + ai[i];
843: nz = ai[i + 1] - ai[i];
844: for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
845: if (rs > sctx.shift_top) sctx.shift_top = rs;
846: }
847: if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
848: sctx.shift_top *= 1.1;
849: sctx.nshift_max = 5;
850: sctx.shift_lo = 0.;
851: sctx.shift_hi = 1.;
852: }
854: sctx.shift_amount = 0.;
855: sctx.nshift = 0;
856: #endif
858: do {
859: sctx.newshift = PETSC_FALSE;
860: for (i = 0; i < n; i++) {
861: /* load in initial unfactored row */
862: nz = ai[r[i] + 1] - ai[r[i]];
863: ajtmp = aj + ai[r[i]];
864: v = a->a + ai[r[i]];
865: /* sort permuted ajtmp and values v accordingly */
866: for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]];
867: PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v));
869: diag[r[i]] = ai[r[i]];
870: for (j = 0; j < nz; j++) {
871: rtmp[ajtmp[j]] = v[j];
872: if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
873: }
874: rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
876: row = *ajtmp++;
877: while (row < i) {
878: pc = rtmp + row;
879: if (*pc != 0.0) {
880: pv = a->a + diag[r[row]];
881: pj = aj + diag[r[row]] + 1;
883: multiplier = *pc / *pv++;
884: *pc = multiplier;
885: nz = ai[r[row] + 1] - diag[r[row]] - 1;
886: for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
887: PetscCall(PetscLogFlops(1 + 2.0 * nz));
888: }
889: row = *ajtmp++;
890: }
891: /* finished row so overwrite it onto a->a */
892: pv = a->a + ai[r[i]];
893: pj = aj + ai[r[i]];
894: nz = ai[r[i] + 1] - ai[r[i]];
895: nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
897: rs = 0.0;
898: for (j = 0; j < nz; j++) {
899: pv[j] = rtmp[pj[j]];
900: if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
901: }
903: sctx.rs = rs;
904: sctx.pv = pv[nbdiag];
905: PetscCall(MatPivotCheck(B, A, info, &sctx, i));
906: if (sctx.newshift) break;
907: pv[nbdiag] = sctx.pv;
908: }
910: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
911: /*
912: * if no shift in this attempt & shifting & started shifting & can refine,
913: * then try lower shift
914: */
915: sctx.shift_hi = sctx.shift_fraction;
916: sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
917: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
918: sctx.newshift = PETSC_TRUE;
919: sctx.nshift++;
920: }
921: } while (sctx.newshift);
923: /* invert diagonal entries for simpler triangular solves */
924: for (i = 0; i < n; i++) a->a[diag[r[i]]] = 1.0 / a->a[diag[r[i]]];
926: PetscCall(PetscFree(rtmp));
927: PetscCall(ISRestoreIndices(isicol, &ic));
928: PetscCall(ISRestoreIndices(isrow, &r));
930: A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm;
931: A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace;
932: A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
933: A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
935: A->assembled = PETSC_TRUE;
936: A->preallocated = PETSC_TRUE;
938: PetscCall(PetscLogFlops(A->cmap->n));
939: if (sctx.nshift) {
940: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
941: PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
942: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
943: PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
944: }
945: }
946: PetscFunctionReturn(PETSC_SUCCESS);
947: }
949: PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info)
950: {
951: Mat C;
953: PetscFunctionBegin;
954: PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C));
955: PetscCall(MatLUFactorSymbolic(C, A, row, col, info));
956: PetscCall(MatLUFactorNumeric(C, A, info));
958: A->ops->solve = C->ops->solve;
959: A->ops->solvetranspose = C->ops->solvetranspose;
961: PetscCall(MatHeaderMerge(A, &C));
962: PetscFunctionReturn(PETSC_SUCCESS);
963: }
965: PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
966: {
967: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
968: IS iscol = a->col, isrow = a->row;
969: PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
970: PetscInt nz;
971: const PetscInt *rout, *cout, *r, *c;
972: PetscScalar *x, *tmp, *tmps, sum;
973: const PetscScalar *b;
974: const MatScalar *aa = a->a, *v;
976: PetscFunctionBegin;
977: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
979: PetscCall(VecGetArrayRead(bb, &b));
980: PetscCall(VecGetArrayWrite(xx, &x));
981: tmp = a->solve_work;
983: PetscCall(ISGetIndices(isrow, &rout));
984: r = rout;
985: PetscCall(ISGetIndices(iscol, &cout));
986: c = cout + (n - 1);
988: /* forward solve the lower triangular */
989: tmp[0] = b[*r++];
990: tmps = tmp;
991: for (i = 1; i < n; i++) {
992: v = aa + ai[i];
993: vi = aj + ai[i];
994: nz = a->diag[i] - ai[i];
995: sum = b[*r++];
996: PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
997: tmp[i] = sum;
998: }
1000: /* backward solve the upper triangular */
1001: for (i = n - 1; i >= 0; i--) {
1002: v = aa + a->diag[i] + 1;
1003: vi = aj + a->diag[i] + 1;
1004: nz = ai[i + 1] - a->diag[i] - 1;
1005: sum = tmp[i];
1006: PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1007: x[*c--] = tmp[i] = sum * aa[a->diag[i]];
1008: }
1010: PetscCall(ISRestoreIndices(isrow, &rout));
1011: PetscCall(ISRestoreIndices(iscol, &cout));
1012: PetscCall(VecRestoreArrayRead(bb, &b));
1013: PetscCall(VecRestoreArrayWrite(xx, &x));
1014: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1015: PetscFunctionReturn(PETSC_SUCCESS);
1016: }
1018: static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X)
1019: {
1020: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1021: IS iscol = a->col, isrow = a->row;
1022: PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
1023: PetscInt nz, neq, ldb, ldx;
1024: const PetscInt *rout, *cout, *r, *c;
1025: PetscScalar *x, *tmp = a->solve_work, *tmps, sum;
1026: const PetscScalar *b, *aa = a->a, *v;
1027: PetscBool isdense;
1029: PetscFunctionBegin;
1030: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1031: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1032: PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1033: if (X != B) {
1034: PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1035: PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1036: }
1037: PetscCall(MatDenseGetArrayRead(B, &b));
1038: PetscCall(MatDenseGetLDA(B, &ldb));
1039: PetscCall(MatDenseGetArray(X, &x));
1040: PetscCall(MatDenseGetLDA(X, &ldx));
1041: PetscCall(ISGetIndices(isrow, &rout));
1042: r = rout;
1043: PetscCall(ISGetIndices(iscol, &cout));
1044: c = cout;
1045: for (neq = 0; neq < B->cmap->n; neq++) {
1046: /* forward solve the lower triangular */
1047: tmp[0] = b[r[0]];
1048: tmps = tmp;
1049: for (i = 1; i < n; i++) {
1050: v = aa + ai[i];
1051: vi = aj + ai[i];
1052: nz = a->diag[i] - ai[i];
1053: sum = b[r[i]];
1054: PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1055: tmp[i] = sum;
1056: }
1057: /* backward solve the upper triangular */
1058: for (i = n - 1; i >= 0; i--) {
1059: v = aa + a->diag[i] + 1;
1060: vi = aj + a->diag[i] + 1;
1061: nz = ai[i + 1] - a->diag[i] - 1;
1062: sum = tmp[i];
1063: PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1064: x[c[i]] = tmp[i] = sum * aa[a->diag[i]];
1065: }
1066: b += ldb;
1067: x += ldx;
1068: }
1069: PetscCall(ISRestoreIndices(isrow, &rout));
1070: PetscCall(ISRestoreIndices(iscol, &cout));
1071: PetscCall(MatDenseRestoreArrayRead(B, &b));
1072: PetscCall(MatDenseRestoreArray(X, &x));
1073: PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1074: PetscFunctionReturn(PETSC_SUCCESS);
1075: }
1077: PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X)
1078: {
1079: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1080: IS iscol = a->col, isrow = a->row;
1081: PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
1082: PetscInt nz, neq, ldb, ldx;
1083: const PetscInt *rout, *cout, *r, *c;
1084: PetscScalar *x, *tmp = a->solve_work, sum;
1085: const PetscScalar *b, *aa = a->a, *v;
1086: PetscBool isdense;
1088: PetscFunctionBegin;
1089: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1090: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1091: PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1092: if (X != B) {
1093: PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1094: PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1095: }
1096: PetscCall(MatDenseGetArrayRead(B, &b));
1097: PetscCall(MatDenseGetLDA(B, &ldb));
1098: PetscCall(MatDenseGetArray(X, &x));
1099: PetscCall(MatDenseGetLDA(X, &ldx));
1100: PetscCall(ISGetIndices(isrow, &rout));
1101: r = rout;
1102: PetscCall(ISGetIndices(iscol, &cout));
1103: c = cout;
1104: for (neq = 0; neq < B->cmap->n; neq++) {
1105: /* forward solve the lower triangular */
1106: tmp[0] = b[r[0]];
1107: v = aa;
1108: vi = aj;
1109: for (i = 1; i < n; i++) {
1110: nz = ai[i + 1] - ai[i];
1111: sum = b[r[i]];
1112: PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
1113: tmp[i] = sum;
1114: v += nz;
1115: vi += nz;
1116: }
1117: /* backward solve the upper triangular */
1118: for (i = n - 1; i >= 0; i--) {
1119: v = aa + adiag[i + 1] + 1;
1120: vi = aj + adiag[i + 1] + 1;
1121: nz = adiag[i] - adiag[i + 1] - 1;
1122: sum = tmp[i];
1123: PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
1124: x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
1125: }
1126: b += ldb;
1127: x += ldx;
1128: }
1129: PetscCall(ISRestoreIndices(isrow, &rout));
1130: PetscCall(ISRestoreIndices(iscol, &cout));
1131: PetscCall(MatDenseRestoreArrayRead(B, &b));
1132: PetscCall(MatDenseRestoreArray(X, &x));
1133: PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1134: PetscFunctionReturn(PETSC_SUCCESS);
1135: }
1137: PetscErrorCode MatMatSolveTranspose_SeqAIJ(Mat A, Mat B, Mat X)
1138: {
1139: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1140: IS iscol = a->col, isrow = a->row;
1141: PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, j;
1142: PetscInt nz, neq, ldb, ldx;
1143: const PetscInt *rout, *cout, *r, *c;
1144: PetscScalar *x, *tmp = a->solve_work, s1;
1145: const PetscScalar *b, *aa = a->a, *v;
1146: PetscBool isdense;
1148: PetscFunctionBegin;
1149: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1150: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1151: PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1152: if (X != B) {
1153: PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1154: PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1155: }
1156: PetscCall(MatDenseGetArrayRead(B, &b));
1157: PetscCall(MatDenseGetLDA(B, &ldb));
1158: PetscCall(MatDenseGetArray(X, &x));
1159: PetscCall(MatDenseGetLDA(X, &ldx));
1160: tmp = a->solve_work;
1161: PetscCall(ISGetIndices(isrow, &rout));
1162: r = rout;
1163: PetscCall(ISGetIndices(iscol, &cout));
1164: c = cout;
1165: for (neq = 0; neq < B->cmap->n; neq++) {
1166: /* copy the b into temp work space according to permutation */
1167: for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1169: /* forward solve the U^T */
1170: for (i = 0; i < n; i++) {
1171: v = aa + adiag[i + 1] + 1;
1172: vi = aj + adiag[i + 1] + 1;
1173: nz = adiag[i] - adiag[i + 1] - 1;
1174: s1 = tmp[i];
1175: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1176: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1177: tmp[i] = s1;
1178: }
1180: /* backward solve the L^T */
1181: for (i = n - 1; i >= 0; i--) {
1182: v = aa + ai[i];
1183: vi = aj + ai[i];
1184: nz = ai[i + 1] - ai[i];
1185: s1 = tmp[i];
1186: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1187: }
1189: /* copy tmp into x according to permutation */
1190: for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1191: b += ldb;
1192: x += ldx;
1193: }
1194: PetscCall(ISRestoreIndices(isrow, &rout));
1195: PetscCall(ISRestoreIndices(iscol, &cout));
1196: PetscCall(MatDenseRestoreArrayRead(B, &b));
1197: PetscCall(MatDenseRestoreArray(X, &x));
1198: PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1199: PetscFunctionReturn(PETSC_SUCCESS);
1200: }
1202: static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx)
1203: {
1204: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1205: IS iscol = a->col, isrow = a->row;
1206: const PetscInt *r, *c, *rout, *cout;
1207: PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
1208: PetscInt nz, row;
1209: PetscScalar *x, *tmp, *tmps, sum;
1210: const PetscScalar *b;
1211: const MatScalar *aa = a->a, *v;
1213: PetscFunctionBegin;
1214: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1216: PetscCall(VecGetArrayRead(bb, &b));
1217: PetscCall(VecGetArrayWrite(xx, &x));
1218: tmp = a->solve_work;
1220: PetscCall(ISGetIndices(isrow, &rout));
1221: r = rout;
1222: PetscCall(ISGetIndices(iscol, &cout));
1223: c = cout + (n - 1);
1225: /* forward solve the lower triangular */
1226: tmp[0] = b[*r++];
1227: tmps = tmp;
1228: for (row = 1; row < n; row++) {
1229: i = rout[row]; /* permuted row */
1230: v = aa + ai[i];
1231: vi = aj + ai[i];
1232: nz = a->diag[i] - ai[i];
1233: sum = b[*r++];
1234: PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1235: tmp[row] = sum;
1236: }
1238: /* backward solve the upper triangular */
1239: for (row = n - 1; row >= 0; row--) {
1240: i = rout[row]; /* permuted row */
1241: v = aa + a->diag[i] + 1;
1242: vi = aj + a->diag[i] + 1;
1243: nz = ai[i + 1] - a->diag[i] - 1;
1244: sum = tmp[row];
1245: PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1246: x[*c--] = tmp[row] = sum * aa[a->diag[i]];
1247: }
1249: PetscCall(ISRestoreIndices(isrow, &rout));
1250: PetscCall(ISRestoreIndices(iscol, &cout));
1251: PetscCall(VecRestoreArrayRead(bb, &b));
1252: PetscCall(VecRestoreArrayWrite(xx, &x));
1253: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1254: PetscFunctionReturn(PETSC_SUCCESS);
1255: }
1257: #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1258: static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx)
1259: {
1260: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1261: PetscInt n = A->rmap->n;
1262: const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag;
1263: PetscScalar *x;
1264: const PetscScalar *b;
1265: const MatScalar *aa = a->a;
1266: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1267: PetscInt adiag_i, i, nz, ai_i;
1268: const PetscInt *vi;
1269: const MatScalar *v;
1270: PetscScalar sum;
1271: #endif
1273: PetscFunctionBegin;
1274: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1276: PetscCall(VecGetArrayRead(bb, &b));
1277: PetscCall(VecGetArrayWrite(xx, &x));
1279: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1280: fortransolveaij_(&n, x, ai, aj, adiag, aa, b);
1281: #else
1282: /* forward solve the lower triangular */
1283: x[0] = b[0];
1284: for (i = 1; i < n; i++) {
1285: ai_i = ai[i];
1286: v = aa + ai_i;
1287: vi = aj + ai_i;
1288: nz = adiag[i] - ai_i;
1289: sum = b[i];
1290: PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1291: x[i] = sum;
1292: }
1294: /* backward solve the upper triangular */
1295: for (i = n - 1; i >= 0; i--) {
1296: adiag_i = adiag[i];
1297: v = aa + adiag_i + 1;
1298: vi = aj + adiag_i + 1;
1299: nz = ai[i + 1] - adiag_i - 1;
1300: sum = x[i];
1301: PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1302: x[i] = sum * aa[adiag_i];
1303: }
1304: #endif
1305: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1306: PetscCall(VecRestoreArrayRead(bb, &b));
1307: PetscCall(VecRestoreArrayWrite(xx, &x));
1308: PetscFunctionReturn(PETSC_SUCCESS);
1309: }
1311: static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx)
1312: {
1313: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1314: IS iscol = a->col, isrow = a->row;
1315: PetscInt i, n = A->rmap->n, j;
1316: PetscInt nz;
1317: const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j;
1318: PetscScalar *x, *tmp, sum;
1319: const PetscScalar *b;
1320: const MatScalar *aa = a->a, *v;
1322: PetscFunctionBegin;
1323: if (yy != xx) PetscCall(VecCopy(yy, xx));
1325: PetscCall(VecGetArrayRead(bb, &b));
1326: PetscCall(VecGetArray(xx, &x));
1327: tmp = a->solve_work;
1329: PetscCall(ISGetIndices(isrow, &rout));
1330: r = rout;
1331: PetscCall(ISGetIndices(iscol, &cout));
1332: c = cout + (n - 1);
1334: /* forward solve the lower triangular */
1335: tmp[0] = b[*r++];
1336: for (i = 1; i < n; i++) {
1337: v = aa + ai[i];
1338: vi = aj + ai[i];
1339: nz = a->diag[i] - ai[i];
1340: sum = b[*r++];
1341: for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1342: tmp[i] = sum;
1343: }
1345: /* backward solve the upper triangular */
1346: for (i = n - 1; i >= 0; i--) {
1347: v = aa + a->diag[i] + 1;
1348: vi = aj + a->diag[i] + 1;
1349: nz = ai[i + 1] - a->diag[i] - 1;
1350: sum = tmp[i];
1351: for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1352: tmp[i] = sum * aa[a->diag[i]];
1353: x[*c--] += tmp[i];
1354: }
1356: PetscCall(ISRestoreIndices(isrow, &rout));
1357: PetscCall(ISRestoreIndices(iscol, &cout));
1358: PetscCall(VecRestoreArrayRead(bb, &b));
1359: PetscCall(VecRestoreArray(xx, &x));
1360: PetscCall(PetscLogFlops(2.0 * a->nz));
1361: PetscFunctionReturn(PETSC_SUCCESS);
1362: }
1364: PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx)
1365: {
1366: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1367: IS iscol = a->col, isrow = a->row;
1368: PetscInt i, n = A->rmap->n, j;
1369: PetscInt nz;
1370: const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
1371: PetscScalar *x, *tmp, sum;
1372: const PetscScalar *b;
1373: const MatScalar *aa = a->a, *v;
1375: PetscFunctionBegin;
1376: if (yy != xx) PetscCall(VecCopy(yy, xx));
1378: PetscCall(VecGetArrayRead(bb, &b));
1379: PetscCall(VecGetArray(xx, &x));
1380: tmp = a->solve_work;
1382: PetscCall(ISGetIndices(isrow, &rout));
1383: r = rout;
1384: PetscCall(ISGetIndices(iscol, &cout));
1385: c = cout;
1387: /* forward solve the lower triangular */
1388: tmp[0] = b[r[0]];
1389: v = aa;
1390: vi = aj;
1391: for (i = 1; i < n; i++) {
1392: nz = ai[i + 1] - ai[i];
1393: sum = b[r[i]];
1394: for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1395: tmp[i] = sum;
1396: v += nz;
1397: vi += nz;
1398: }
1400: /* backward solve the upper triangular */
1401: v = aa + adiag[n - 1];
1402: vi = aj + adiag[n - 1];
1403: for (i = n - 1; i >= 0; i--) {
1404: nz = adiag[i] - adiag[i + 1] - 1;
1405: sum = tmp[i];
1406: for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1407: tmp[i] = sum * v[nz];
1408: x[c[i]] += tmp[i];
1409: v += nz + 1;
1410: vi += nz + 1;
1411: }
1413: PetscCall(ISRestoreIndices(isrow, &rout));
1414: PetscCall(ISRestoreIndices(iscol, &cout));
1415: PetscCall(VecRestoreArrayRead(bb, &b));
1416: PetscCall(VecRestoreArray(xx, &x));
1417: PetscCall(PetscLogFlops(2.0 * a->nz));
1418: PetscFunctionReturn(PETSC_SUCCESS);
1419: }
1421: PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
1422: {
1423: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1424: IS iscol = a->col, isrow = a->row;
1425: const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1426: PetscInt i, n = A->rmap->n, j;
1427: PetscInt nz;
1428: PetscScalar *x, *tmp, s1;
1429: const MatScalar *aa = a->a, *v;
1430: const PetscScalar *b;
1432: PetscFunctionBegin;
1433: PetscCall(VecGetArrayRead(bb, &b));
1434: PetscCall(VecGetArrayWrite(xx, &x));
1435: tmp = a->solve_work;
1437: PetscCall(ISGetIndices(isrow, &rout));
1438: r = rout;
1439: PetscCall(ISGetIndices(iscol, &cout));
1440: c = cout;
1442: /* copy the b into temp work space according to permutation */
1443: for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1445: /* forward solve the U^T */
1446: for (i = 0; i < n; i++) {
1447: v = aa + diag[i];
1448: vi = aj + diag[i] + 1;
1449: nz = ai[i + 1] - diag[i] - 1;
1450: s1 = tmp[i];
1451: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1452: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1453: tmp[i] = s1;
1454: }
1456: /* backward solve the L^T */
1457: for (i = n - 1; i >= 0; i--) {
1458: v = aa + diag[i] - 1;
1459: vi = aj + diag[i] - 1;
1460: nz = diag[i] - ai[i];
1461: s1 = tmp[i];
1462: for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1463: }
1465: /* copy tmp into x according to permutation */
1466: for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1468: PetscCall(ISRestoreIndices(isrow, &rout));
1469: PetscCall(ISRestoreIndices(iscol, &cout));
1470: PetscCall(VecRestoreArrayRead(bb, &b));
1471: PetscCall(VecRestoreArrayWrite(xx, &x));
1473: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1474: PetscFunctionReturn(PETSC_SUCCESS);
1475: }
1477: PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx)
1478: {
1479: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1480: IS iscol = a->col, isrow = a->row;
1481: const PetscInt *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1482: PetscInt i, n = A->rmap->n, j;
1483: PetscInt nz;
1484: PetscScalar *x, *tmp, s1;
1485: const MatScalar *aa = a->a, *v;
1486: const PetscScalar *b;
1488: PetscFunctionBegin;
1489: PetscCall(VecGetArrayRead(bb, &b));
1490: PetscCall(VecGetArrayWrite(xx, &x));
1491: tmp = a->solve_work;
1493: PetscCall(ISGetIndices(isrow, &rout));
1494: r = rout;
1495: PetscCall(ISGetIndices(iscol, &cout));
1496: c = cout;
1498: /* copy the b into temp work space according to permutation */
1499: for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1501: /* forward solve the U^T */
1502: for (i = 0; i < n; i++) {
1503: v = aa + adiag[i + 1] + 1;
1504: vi = aj + adiag[i + 1] + 1;
1505: nz = adiag[i] - adiag[i + 1] - 1;
1506: s1 = tmp[i];
1507: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1508: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1509: tmp[i] = s1;
1510: }
1512: /* backward solve the L^T */
1513: for (i = n - 1; i >= 0; i--) {
1514: v = aa + ai[i];
1515: vi = aj + ai[i];
1516: nz = ai[i + 1] - ai[i];
1517: s1 = tmp[i];
1518: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1519: }
1521: /* copy tmp into x according to permutation */
1522: for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1524: PetscCall(ISRestoreIndices(isrow, &rout));
1525: PetscCall(ISRestoreIndices(iscol, &cout));
1526: PetscCall(VecRestoreArrayRead(bb, &b));
1527: PetscCall(VecRestoreArrayWrite(xx, &x));
1529: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1530: PetscFunctionReturn(PETSC_SUCCESS);
1531: }
1533: PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx)
1534: {
1535: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1536: IS iscol = a->col, isrow = a->row;
1537: const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1538: PetscInt i, n = A->rmap->n, j;
1539: PetscInt nz;
1540: PetscScalar *x, *tmp, s1;
1541: const MatScalar *aa = a->a, *v;
1542: const PetscScalar *b;
1544: PetscFunctionBegin;
1545: if (zz != xx) PetscCall(VecCopy(zz, xx));
1546: PetscCall(VecGetArrayRead(bb, &b));
1547: PetscCall(VecGetArray(xx, &x));
1548: tmp = a->solve_work;
1550: PetscCall(ISGetIndices(isrow, &rout));
1551: r = rout;
1552: PetscCall(ISGetIndices(iscol, &cout));
1553: c = cout;
1555: /* copy the b into temp work space according to permutation */
1556: for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1558: /* forward solve the U^T */
1559: for (i = 0; i < n; i++) {
1560: v = aa + diag[i];
1561: vi = aj + diag[i] + 1;
1562: nz = ai[i + 1] - diag[i] - 1;
1563: s1 = tmp[i];
1564: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1565: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1566: tmp[i] = s1;
1567: }
1569: /* backward solve the L^T */
1570: for (i = n - 1; i >= 0; i--) {
1571: v = aa + diag[i] - 1;
1572: vi = aj + diag[i] - 1;
1573: nz = diag[i] - ai[i];
1574: s1 = tmp[i];
1575: for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1576: }
1578: /* copy tmp into x according to permutation */
1579: for (i = 0; i < n; i++) x[r[i]] += tmp[i];
1581: PetscCall(ISRestoreIndices(isrow, &rout));
1582: PetscCall(ISRestoreIndices(iscol, &cout));
1583: PetscCall(VecRestoreArrayRead(bb, &b));
1584: PetscCall(VecRestoreArray(xx, &x));
1586: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1587: PetscFunctionReturn(PETSC_SUCCESS);
1588: }
1590: PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx)
1591: {
1592: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1593: IS iscol = a->col, isrow = a->row;
1594: const PetscInt *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1595: PetscInt i, n = A->rmap->n, j;
1596: PetscInt nz;
1597: PetscScalar *x, *tmp, s1;
1598: const MatScalar *aa = a->a, *v;
1599: const PetscScalar *b;
1601: PetscFunctionBegin;
1602: if (zz != xx) PetscCall(VecCopy(zz, xx));
1603: PetscCall(VecGetArrayRead(bb, &b));
1604: PetscCall(VecGetArray(xx, &x));
1605: tmp = a->solve_work;
1607: PetscCall(ISGetIndices(isrow, &rout));
1608: r = rout;
1609: PetscCall(ISGetIndices(iscol, &cout));
1610: c = cout;
1612: /* copy the b into temp work space according to permutation */
1613: for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1615: /* forward solve the U^T */
1616: for (i = 0; i < n; i++) {
1617: v = aa + adiag[i + 1] + 1;
1618: vi = aj + adiag[i + 1] + 1;
1619: nz = adiag[i] - adiag[i + 1] - 1;
1620: s1 = tmp[i];
1621: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1622: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1623: tmp[i] = s1;
1624: }
1626: /* backward solve the L^T */
1627: for (i = n - 1; i >= 0; i--) {
1628: v = aa + ai[i];
1629: vi = aj + ai[i];
1630: nz = ai[i + 1] - ai[i];
1631: s1 = tmp[i];
1632: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1633: }
1635: /* copy tmp into x according to permutation */
1636: for (i = 0; i < n; i++) x[r[i]] += tmp[i];
1638: PetscCall(ISRestoreIndices(isrow, &rout));
1639: PetscCall(ISRestoreIndices(iscol, &cout));
1640: PetscCall(VecRestoreArrayRead(bb, &b));
1641: PetscCall(VecRestoreArray(xx, &x));
1643: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1644: PetscFunctionReturn(PETSC_SUCCESS);
1645: }
1647: /*
1648: ilu() under revised new data structure.
1649: Factored arrays bj and ba are stored as
1650: L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1652: bi=fact->i is an array of size n+1, in which
1653: bi[i]: points to 1st entry of L(i,:),i=0,...,n-1
1654: bi[n]: points to L(n-1,n-1)+1
1656: bdiag=fact->diag is an array of size n+1,in which
1657: bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1658: bdiag[n]: points to entry of U(n-1,0)-1
1660: U(i,:) contains bdiag[i] as its last entry, i.e.,
1661: U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1662: */
1663: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1664: {
1665: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b;
1666: const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag = a->diag;
1667: PetscInt i, j, k = 0, nz, *bi, *bj, *bdiag;
1668: IS isicol;
1670: PetscFunctionBegin;
1671: PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1672: PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE));
1673: b = (Mat_SeqAIJ *)(fact)->data;
1675: /* allocate matrix arrays for new data structure */
1676: PetscCall(PetscMalloc3(ai[n] + 1, &b->a, ai[n] + 1, &b->j, n + 1, &b->i));
1678: b->singlemalloc = PETSC_TRUE;
1679: if (!b->diag) PetscCall(PetscMalloc1(n + 1, &b->diag));
1680: bdiag = b->diag;
1682: if (n > 0) PetscCall(PetscArrayzero(b->a, ai[n]));
1684: /* set bi and bj with new data structure */
1685: bi = b->i;
1686: bj = b->j;
1688: /* L part */
1689: bi[0] = 0;
1690: for (i = 0; i < n; i++) {
1691: nz = adiag[i] - ai[i];
1692: bi[i + 1] = bi[i] + nz;
1693: aj = a->j + ai[i];
1694: for (j = 0; j < nz; j++) bj[k++] = aj[j];
1695: }
1697: /* U part */
1698: bdiag[n] = bi[n] - 1;
1699: for (i = n - 1; i >= 0; i--) {
1700: nz = ai[i + 1] - adiag[i] - 1;
1701: aj = a->j + adiag[i] + 1;
1702: for (j = 0; j < nz; j++) bj[k++] = aj[j];
1703: /* diag[i] */
1704: bj[k++] = i;
1705: bdiag[i] = bdiag[i + 1] + nz + 1;
1706: }
1708: fact->factortype = MAT_FACTOR_ILU;
1709: fact->info.factor_mallocs = 0;
1710: fact->info.fill_ratio_given = info->fill;
1711: fact->info.fill_ratio_needed = 1.0;
1712: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1713: PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1715: b = (Mat_SeqAIJ *)(fact)->data;
1716: b->row = isrow;
1717: b->col = iscol;
1718: b->icol = isicol;
1719: PetscCall(PetscMalloc1(fact->rmap->n + 1, &b->solve_work));
1720: PetscCall(PetscObjectReference((PetscObject)isrow));
1721: PetscCall(PetscObjectReference((PetscObject)iscol));
1722: PetscFunctionReturn(PETSC_SUCCESS);
1723: }
1725: PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1726: {
1727: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b;
1728: IS isicol;
1729: const PetscInt *r, *ic;
1730: PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j;
1731: PetscInt *bi, *cols, nnz, *cols_lvl;
1732: PetscInt *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1733: PetscInt i, levels, diagonal_fill;
1734: PetscBool col_identity, row_identity, missing;
1735: PetscReal f;
1736: PetscInt nlnk, *lnk, *lnk_lvl = NULL;
1737: PetscBT lnkbt;
1738: PetscInt nzi, *bj, **bj_ptr, **bjlvl_ptr;
1739: PetscFreeSpaceList free_space = NULL, current_space = NULL;
1740: PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1742: PetscFunctionBegin;
1743: PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
1744: PetscCall(MatMissingDiagonal(A, &missing, &i));
1745: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
1747: levels = (PetscInt)info->levels;
1748: PetscCall(ISIdentity(isrow, &row_identity));
1749: PetscCall(ISIdentity(iscol, &col_identity));
1750: if (!levels && row_identity && col_identity) {
1751: /* special case: ilu(0) with natural ordering */
1752: PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info));
1753: if (a->inode.size) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1754: PetscFunctionReturn(PETSC_SUCCESS);
1755: }
1757: PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1758: PetscCall(ISGetIndices(isrow, &r));
1759: PetscCall(ISGetIndices(isicol, &ic));
1761: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1762: PetscCall(PetscMalloc1(n + 1, &bi));
1763: PetscCall(PetscMalloc1(n + 1, &bdiag));
1764: bi[0] = bdiag[0] = 0;
1765: PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));
1767: /* create a linked list for storing column indices of the active row */
1768: nlnk = n + 1;
1769: PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));
1771: /* initial FreeSpace size is f*(ai[n]+1) */
1772: f = info->fill;
1773: diagonal_fill = (PetscInt)info->diagonal_fill;
1774: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1775: current_space = free_space;
1776: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1777: current_space_lvl = free_space_lvl;
1778: for (i = 0; i < n; i++) {
1779: nzi = 0;
1780: /* copy current row into linked list */
1781: nnz = ai[r[i] + 1] - ai[r[i]];
1782: PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1783: cols = aj + ai[r[i]];
1784: lnk[i] = -1; /* marker to indicate if diagonal exists */
1785: PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1786: nzi += nlnk;
1788: /* make sure diagonal entry is included */
1789: if (diagonal_fill && lnk[i] == -1) {
1790: fm = n;
1791: while (lnk[fm] < i) fm = lnk[fm];
1792: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
1793: lnk[fm] = i;
1794: lnk_lvl[i] = 0;
1795: nzi++;
1796: dcount++;
1797: }
1799: /* add pivot rows into the active row */
1800: nzbd = 0;
1801: prow = lnk[n];
1802: while (prow < i) {
1803: nnz = bdiag[prow];
1804: cols = bj_ptr[prow] + nnz + 1;
1805: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1806: nnz = bi[prow + 1] - bi[prow] - nnz - 1;
1807: PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1808: nzi += nlnk;
1809: prow = lnk[prow];
1810: nzbd++;
1811: }
1812: bdiag[i] = nzbd;
1813: bi[i + 1] = bi[i] + nzi;
1814: /* if free space is not available, make more free space */
1815: if (current_space->local_remaining < nzi) {
1816: nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */
1817: PetscCall(PetscFreeSpaceGet(nnz, ¤t_space));
1818: PetscCall(PetscFreeSpaceGet(nnz, ¤t_space_lvl));
1819: reallocs++;
1820: }
1822: /* copy data into free_space and free_space_lvl, then initialize lnk */
1823: PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
1824: bj_ptr[i] = current_space->array;
1825: bjlvl_ptr[i] = current_space_lvl->array;
1827: /* make sure the active row i has diagonal entry */
1828: PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix, try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);
1830: current_space->array += nzi;
1831: current_space->local_used += nzi;
1832: current_space->local_remaining -= nzi;
1833: current_space_lvl->array += nzi;
1834: current_space_lvl->local_used += nzi;
1835: current_space_lvl->local_remaining -= nzi;
1836: }
1838: PetscCall(ISRestoreIndices(isrow, &r));
1839: PetscCall(ISRestoreIndices(isicol, &ic));
1840: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1841: PetscCall(PetscMalloc1(bi[n] + 1, &bj));
1842: PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
1844: PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
1845: PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
1846: PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));
1848: #if defined(PETSC_USE_INFO)
1849: {
1850: PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1851: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
1852: PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
1853: PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
1854: PetscCall(PetscInfo(A, "for best performance.\n"));
1855: if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
1856: }
1857: #endif
1858: /* put together the new matrix */
1859: PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
1860: b = (Mat_SeqAIJ *)(fact)->data;
1862: b->free_a = PETSC_TRUE;
1863: b->free_ij = PETSC_TRUE;
1864: b->singlemalloc = PETSC_FALSE;
1866: PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a));
1868: b->j = bj;
1869: b->i = bi;
1870: b->diag = bdiag;
1871: b->ilen = NULL;
1872: b->imax = NULL;
1873: b->row = isrow;
1874: b->col = iscol;
1875: PetscCall(PetscObjectReference((PetscObject)isrow));
1876: PetscCall(PetscObjectReference((PetscObject)iscol));
1877: b->icol = isicol;
1879: PetscCall(PetscMalloc1(n + 1, &b->solve_work));
1880: /* In b structure: Free imax, ilen, old a, old j.
1881: Allocate bdiag, solve_work, new a, new j */
1882: b->maxnz = b->nz = bdiag[0] + 1;
1884: (fact)->info.factor_mallocs = reallocs;
1885: (fact)->info.fill_ratio_given = f;
1886: (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1887: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1888: if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1889: PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1890: PetscFunctionReturn(PETSC_SUCCESS);
1891: }
1893: #if 0
1894: // unused
1895: static PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1896: {
1897: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b;
1898: IS isicol;
1899: const PetscInt *r, *ic;
1900: PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j;
1901: PetscInt *bi, *cols, nnz, *cols_lvl;
1902: PetscInt *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1903: PetscInt i, levels, diagonal_fill;
1904: PetscBool col_identity, row_identity;
1905: PetscReal f;
1906: PetscInt nlnk, *lnk, *lnk_lvl = NULL;
1907: PetscBT lnkbt;
1908: PetscInt nzi, *bj, **bj_ptr, **bjlvl_ptr;
1909: PetscFreeSpaceList free_space = NULL, current_space = NULL;
1910: PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1911: PetscBool missing;
1913: PetscFunctionBegin;
1914: PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
1915: PetscCall(MatMissingDiagonal(A, &missing, &i));
1916: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
1918: f = info->fill;
1919: levels = (PetscInt)info->levels;
1920: diagonal_fill = (PetscInt)info->diagonal_fill;
1922: PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1924: PetscCall(ISIdentity(isrow, &row_identity));
1925: PetscCall(ISIdentity(iscol, &col_identity));
1926: if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1927: PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_TRUE));
1929: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
1930: if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1931: fact->factortype = MAT_FACTOR_ILU;
1932: (fact)->info.factor_mallocs = 0;
1933: (fact)->info.fill_ratio_given = info->fill;
1934: (fact)->info.fill_ratio_needed = 1.0;
1936: b = (Mat_SeqAIJ *)(fact)->data;
1937: b->row = isrow;
1938: b->col = iscol;
1939: b->icol = isicol;
1940: PetscCall(PetscMalloc1((fact)->rmap->n + 1, &b->solve_work));
1941: PetscCall(PetscObjectReference((PetscObject)isrow));
1942: PetscCall(PetscObjectReference((PetscObject)iscol));
1943: PetscFunctionReturn(PETSC_SUCCESS);
1944: }
1946: PetscCall(ISGetIndices(isrow, &r));
1947: PetscCall(ISGetIndices(isicol, &ic));
1949: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1950: PetscCall(PetscMalloc1(n + 1, &bi));
1951: PetscCall(PetscMalloc1(n + 1, &bdiag));
1952: bi[0] = bdiag[0] = 0;
1954: PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));
1956: /* create a linked list for storing column indices of the active row */
1957: nlnk = n + 1;
1958: PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));
1960: /* initial FreeSpace size is f*(ai[n]+1) */
1961: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1962: current_space = free_space;
1963: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1964: current_space_lvl = free_space_lvl;
1966: for (i = 0; i < n; i++) {
1967: nzi = 0;
1968: /* copy current row into linked list */
1969: nnz = ai[r[i] + 1] - ai[r[i]];
1970: PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1971: cols = aj + ai[r[i]];
1972: lnk[i] = -1; /* marker to indicate if diagonal exists */
1973: PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1974: nzi += nlnk;
1976: /* make sure diagonal entry is included */
1977: if (diagonal_fill && lnk[i] == -1) {
1978: fm = n;
1979: while (lnk[fm] < i) fm = lnk[fm];
1980: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
1981: lnk[fm] = i;
1982: lnk_lvl[i] = 0;
1983: nzi++;
1984: dcount++;
1985: }
1987: /* add pivot rows into the active row */
1988: nzbd = 0;
1989: prow = lnk[n];
1990: while (prow < i) {
1991: nnz = bdiag[prow];
1992: cols = bj_ptr[prow] + nnz + 1;
1993: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1994: nnz = bi[prow + 1] - bi[prow] - nnz - 1;
1995: PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1996: nzi += nlnk;
1997: prow = lnk[prow];
1998: nzbd++;
1999: }
2000: bdiag[i] = nzbd;
2001: bi[i + 1] = bi[i] + nzi;
2003: /* if free space is not available, make more free space */
2004: if (current_space->local_remaining < nzi) {
2005: nnz = PetscIntMultTruncate(nzi, n - i); /* estimated and max additional space needed */
2006: PetscCall(PetscFreeSpaceGet(nnz, ¤t_space));
2007: PetscCall(PetscFreeSpaceGet(nnz, ¤t_space_lvl));
2008: reallocs++;
2009: }
2011: /* copy data into free_space and free_space_lvl, then initialize lnk */
2012: PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
2013: bj_ptr[i] = current_space->array;
2014: bjlvl_ptr[i] = current_space_lvl->array;
2016: /* make sure the active row i has diagonal entry */
2017: PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix, try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);
2019: current_space->array += nzi;
2020: current_space->local_used += nzi;
2021: current_space->local_remaining -= nzi;
2022: current_space_lvl->array += nzi;
2023: current_space_lvl->local_used += nzi;
2024: current_space_lvl->local_remaining -= nzi;
2025: }
2027: PetscCall(ISRestoreIndices(isrow, &r));
2028: PetscCall(ISRestoreIndices(isicol, &ic));
2030: /* destroy list of free space and other temporary arrays */
2031: PetscCall(PetscMalloc1(bi[n] + 1, &bj));
2032: PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); /* copy free_space -> bj */
2033: PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2034: PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
2035: PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));
2037: #if defined(PETSC_USE_INFO)
2038: {
2039: PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
2040: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
2041: PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
2042: PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
2043: PetscCall(PetscInfo(A, "for best performance.\n"));
2044: if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
2045: }
2046: #endif
2048: /* put together the new matrix */
2049: PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
2050: b = (Mat_SeqAIJ *)(fact)->data;
2052: b->free_a = PETSC_TRUE;
2053: b->free_ij = PETSC_TRUE;
2054: b->singlemalloc = PETSC_FALSE;
2056: PetscCall(PetscMalloc1(bi[n], &b->a));
2057: b->j = bj;
2058: b->i = bi;
2059: for (i = 0; i < n; i++) bdiag[i] += bi[i];
2060: b->diag = bdiag;
2061: b->ilen = NULL;
2062: b->imax = NULL;
2063: b->row = isrow;
2064: b->col = iscol;
2065: PetscCall(PetscObjectReference((PetscObject)isrow));
2066: PetscCall(PetscObjectReference((PetscObject)iscol));
2067: b->icol = isicol;
2068: PetscCall(PetscMalloc1(n + 1, &b->solve_work));
2069: /* In b structure: Free imax, ilen, old a, old j.
2070: Allocate bdiag, solve_work, new a, new j */
2071: b->maxnz = b->nz = bi[n];
2073: (fact)->info.factor_mallocs = reallocs;
2074: (fact)->info.fill_ratio_given = f;
2075: (fact)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
2076: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
2077: if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2078: PetscFunctionReturn(PETSC_SUCCESS);
2079: }
2080: #endif
2082: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
2083: {
2084: Mat C = B;
2085: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
2086: Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data;
2087: IS ip = b->row, iip = b->icol;
2088: const PetscInt *rip, *riip;
2089: PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
2090: PetscInt *ai = a->i, *aj = a->j;
2091: PetscInt k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
2092: MatScalar *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
2093: PetscBool perm_identity;
2094: FactorShiftCtx sctx;
2095: PetscReal rs;
2096: MatScalar d, *v;
2098: PetscFunctionBegin;
2099: /* MatPivotSetUp(): initialize shift context sctx */
2100: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
2102: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2103: sctx.shift_top = info->zeropivot;
2104: for (i = 0; i < mbs; i++) {
2105: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2106: d = (aa)[a->diag[i]];
2107: rs = -PetscAbsScalar(d) - PetscRealPart(d);
2108: v = aa + ai[i];
2109: nz = ai[i + 1] - ai[i];
2110: for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
2111: if (rs > sctx.shift_top) sctx.shift_top = rs;
2112: }
2113: sctx.shift_top *= 1.1;
2114: sctx.nshift_max = 5;
2115: sctx.shift_lo = 0.;
2116: sctx.shift_hi = 1.;
2117: }
2119: PetscCall(ISGetIndices(ip, &rip));
2120: PetscCall(ISGetIndices(iip, &riip));
2122: /* allocate working arrays
2123: c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2124: il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2125: */
2126: PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r));
2128: do {
2129: sctx.newshift = PETSC_FALSE;
2131: for (i = 0; i < mbs; i++) c2r[i] = mbs;
2132: if (mbs) il[0] = 0;
2134: for (k = 0; k < mbs; k++) {
2135: /* zero rtmp */
2136: nz = bi[k + 1] - bi[k];
2137: bjtmp = bj + bi[k];
2138: for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
2140: /* load in initial unfactored row */
2141: bval = ba + bi[k];
2142: jmin = ai[rip[k]];
2143: jmax = ai[rip[k] + 1];
2144: for (j = jmin; j < jmax; j++) {
2145: col = riip[aj[j]];
2146: if (col >= k) { /* only take upper triangular entry */
2147: rtmp[col] = aa[j];
2148: *bval++ = 0.0; /* for in-place factorization */
2149: }
2150: }
2151: /* shift the diagonal of the matrix: ZeropivotApply() */
2152: rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
2154: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2155: dk = rtmp[k];
2156: i = c2r[k]; /* first row to be added to k_th row */
2158: while (i < k) {
2159: nexti = c2r[i]; /* next row to be added to k_th row */
2161: /* compute multiplier, update diag(k) and U(i,k) */
2162: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2163: uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
2164: dk += uikdi * ba[ili]; /* update diag[k] */
2165: ba[ili] = uikdi; /* -U(i,k) */
2167: /* add multiple of row i to k-th row */
2168: jmin = ili + 1;
2169: jmax = bi[i + 1];
2170: if (jmin < jmax) {
2171: for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
2172: /* update il and c2r for row i */
2173: il[i] = jmin;
2174: j = bj[jmin];
2175: c2r[i] = c2r[j];
2176: c2r[j] = i;
2177: }
2178: i = nexti;
2179: }
2181: /* copy data into U(k,:) */
2182: rs = 0.0;
2183: jmin = bi[k];
2184: jmax = bi[k + 1] - 1;
2185: if (jmin < jmax) {
2186: for (j = jmin; j < jmax; j++) {
2187: col = bj[j];
2188: ba[j] = rtmp[col];
2189: rs += PetscAbsScalar(ba[j]);
2190: }
2191: /* add the k-th row into il and c2r */
2192: il[k] = jmin;
2193: i = bj[jmin];
2194: c2r[k] = c2r[i];
2195: c2r[i] = k;
2196: }
2198: /* MatPivotCheck() */
2199: sctx.rs = rs;
2200: sctx.pv = dk;
2201: PetscCall(MatPivotCheck(B, A, info, &sctx, i));
2202: if (sctx.newshift) break;
2203: dk = sctx.pv;
2205: ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
2206: }
2207: } while (sctx.newshift);
2209: PetscCall(PetscFree3(rtmp, il, c2r));
2210: PetscCall(ISRestoreIndices(ip, &rip));
2211: PetscCall(ISRestoreIndices(iip, &riip));
2213: PetscCall(ISIdentity(ip, &perm_identity));
2214: if (perm_identity) {
2215: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2216: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2217: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2218: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2219: } else {
2220: B->ops->solve = MatSolve_SeqSBAIJ_1;
2221: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
2222: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1;
2223: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1;
2224: }
2226: C->assembled = PETSC_TRUE;
2227: C->preallocated = PETSC_TRUE;
2229: PetscCall(PetscLogFlops(C->rmap->n));
2231: /* MatPivotView() */
2232: if (sctx.nshift) {
2233: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2234: PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
2235: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2236: PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2237: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
2238: PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
2239: }
2240: }
2241: PetscFunctionReturn(PETSC_SUCCESS);
2242: }
2244: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
2245: {
2246: Mat C = B;
2247: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
2248: Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data;
2249: IS ip = b->row, iip = b->icol;
2250: const PetscInt *rip, *riip;
2251: PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp;
2252: PetscInt *ai = a->i, *aj = a->j;
2253: PetscInt k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
2254: MatScalar *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
2255: PetscBool perm_identity;
2256: FactorShiftCtx sctx;
2257: PetscReal rs;
2258: MatScalar d, *v;
2260: PetscFunctionBegin;
2261: /* MatPivotSetUp(): initialize shift context sctx */
2262: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
2264: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2265: sctx.shift_top = info->zeropivot;
2266: for (i = 0; i < mbs; i++) {
2267: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2268: d = (aa)[a->diag[i]];
2269: rs = -PetscAbsScalar(d) - PetscRealPart(d);
2270: v = aa + ai[i];
2271: nz = ai[i + 1] - ai[i];
2272: for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
2273: if (rs > sctx.shift_top) sctx.shift_top = rs;
2274: }
2275: sctx.shift_top *= 1.1;
2276: sctx.nshift_max = 5;
2277: sctx.shift_lo = 0.;
2278: sctx.shift_hi = 1.;
2279: }
2281: PetscCall(ISGetIndices(ip, &rip));
2282: PetscCall(ISGetIndices(iip, &riip));
2284: /* initialization */
2285: PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));
2287: do {
2288: sctx.newshift = PETSC_FALSE;
2290: for (i = 0; i < mbs; i++) jl[i] = mbs;
2291: il[0] = 0;
2293: for (k = 0; k < mbs; k++) {
2294: /* zero rtmp */
2295: nz = bi[k + 1] - bi[k];
2296: bjtmp = bj + bi[k];
2297: for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
2299: bval = ba + bi[k];
2300: /* initialize k-th row by the perm[k]-th row of A */
2301: jmin = ai[rip[k]];
2302: jmax = ai[rip[k] + 1];
2303: for (j = jmin; j < jmax; j++) {
2304: col = riip[aj[j]];
2305: if (col >= k) { /* only take upper triangular entry */
2306: rtmp[col] = aa[j];
2307: *bval++ = 0.0; /* for in-place factorization */
2308: }
2309: }
2310: /* shift the diagonal of the matrix */
2311: if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2313: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2314: dk = rtmp[k];
2315: i = jl[k]; /* first row to be added to k_th row */
2317: while (i < k) {
2318: nexti = jl[i]; /* next row to be added to k_th row */
2320: /* compute multiplier, update diag(k) and U(i,k) */
2321: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2322: uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
2323: dk += uikdi * ba[ili];
2324: ba[ili] = uikdi; /* -U(i,k) */
2326: /* add multiple of row i to k-th row */
2327: jmin = ili + 1;
2328: jmax = bi[i + 1];
2329: if (jmin < jmax) {
2330: for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
2331: /* update il and jl for row i */
2332: il[i] = jmin;
2333: j = bj[jmin];
2334: jl[i] = jl[j];
2335: jl[j] = i;
2336: }
2337: i = nexti;
2338: }
2340: /* shift the diagonals when zero pivot is detected */
2341: /* compute rs=sum of abs(off-diagonal) */
2342: rs = 0.0;
2343: jmin = bi[k] + 1;
2344: nz = bi[k + 1] - jmin;
2345: bcol = bj + jmin;
2346: for (j = 0; j < nz; j++) rs += PetscAbsScalar(rtmp[bcol[j]]);
2348: sctx.rs = rs;
2349: sctx.pv = dk;
2350: PetscCall(MatPivotCheck(B, A, info, &sctx, k));
2351: if (sctx.newshift) break;
2352: dk = sctx.pv;
2354: /* copy data into U(k,:) */
2355: ba[bi[k]] = 1.0 / dk; /* U(k,k) */
2356: jmin = bi[k] + 1;
2357: jmax = bi[k + 1];
2358: if (jmin < jmax) {
2359: for (j = jmin; j < jmax; j++) {
2360: col = bj[j];
2361: ba[j] = rtmp[col];
2362: }
2363: /* add the k-th row into il and jl */
2364: il[k] = jmin;
2365: i = bj[jmin];
2366: jl[k] = jl[i];
2367: jl[i] = k;
2368: }
2369: }
2370: } while (sctx.newshift);
2372: PetscCall(PetscFree3(rtmp, il, jl));
2373: PetscCall(ISRestoreIndices(ip, &rip));
2374: PetscCall(ISRestoreIndices(iip, &riip));
2376: PetscCall(ISIdentity(ip, &perm_identity));
2377: if (perm_identity) {
2378: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2379: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2380: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2381: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2382: } else {
2383: B->ops->solve = MatSolve_SeqSBAIJ_1_inplace;
2384: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2385: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace;
2386: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace;
2387: }
2389: C->assembled = PETSC_TRUE;
2390: C->preallocated = PETSC_TRUE;
2392: PetscCall(PetscLogFlops(C->rmap->n));
2393: if (sctx.nshift) {
2394: if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2395: PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2396: } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2397: PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2398: }
2399: }
2400: PetscFunctionReturn(PETSC_SUCCESS);
2401: }
2403: /*
2404: icc() under revised new data structure.
2405: Factored arrays bj and ba are stored as
2406: U(0,:),...,U(i,:),U(n-1,:)
2408: ui=fact->i is an array of size n+1, in which
2409: ui+
2410: ui[i]: points to 1st entry of U(i,:),i=0,...,n-1
2411: ui[n]: points to U(n-1,n-1)+1
2413: udiag=fact->diag is an array of size n,in which
2414: udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2416: U(i,:) contains udiag[i] as its last entry, i.e.,
2417: U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2418: */
2420: PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2421: {
2422: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
2423: Mat_SeqSBAIJ *b;
2424: PetscBool perm_identity, missing;
2425: PetscInt reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2426: const PetscInt *rip, *riip;
2427: PetscInt jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2428: PetscInt nlnk, *lnk, *lnk_lvl = NULL, d;
2429: PetscInt ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2430: PetscReal fill = info->fill, levels = info->levels;
2431: PetscFreeSpaceList free_space = NULL, current_space = NULL;
2432: PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2433: PetscBT lnkbt;
2434: IS iperm;
2436: PetscFunctionBegin;
2437: PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2438: PetscCall(MatMissingDiagonal(A, &missing, &d));
2439: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
2440: PetscCall(ISIdentity(perm, &perm_identity));
2441: PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2443: PetscCall(PetscMalloc1(am + 1, &ui));
2444: PetscCall(PetscMalloc1(am + 1, &udiag));
2445: ui[0] = 0;
2447: /* ICC(0) without matrix ordering: simply rearrange column indices */
2448: if (!levels && perm_identity) {
2449: for (i = 0; i < am; i++) {
2450: ncols = ai[i + 1] - a->diag[i];
2451: ui[i + 1] = ui[i] + ncols;
2452: udiag[i] = ui[i + 1] - 1; /* points to the last entry of U(i,:) */
2453: }
2454: PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2455: cols = uj;
2456: for (i = 0; i < am; i++) {
2457: aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2458: ncols = ai[i + 1] - a->diag[i] - 1;
2459: for (j = 0; j < ncols; j++) *cols++ = aj[j];
2460: *cols++ = i; /* diagonal is located as the last entry of U(i,:) */
2461: }
2462: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2463: PetscCall(ISGetIndices(iperm, &riip));
2464: PetscCall(ISGetIndices(perm, &rip));
2466: /* initialization */
2467: PetscCall(PetscMalloc1(am + 1, &ajtmp));
2469: /* jl: linked list for storing indices of the pivot rows
2470: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2471: PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
2472: for (i = 0; i < am; i++) {
2473: jl[i] = am;
2474: il[i] = 0;
2475: }
2477: /* create and initialize a linked list for storing column indices of the active row k */
2478: nlnk = am + 1;
2479: PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));
2481: /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2482: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2483: current_space = free_space;
2484: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl));
2485: current_space_lvl = free_space_lvl;
2487: for (k = 0; k < am; k++) { /* for each active row k */
2488: /* initialize lnk by the column indices of row rip[k] of A */
2489: nzk = 0;
2490: ncols = ai[rip[k] + 1] - ai[rip[k]];
2491: PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2492: ncols_upper = 0;
2493: for (j = 0; j < ncols; j++) {
2494: i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2495: if (riip[i] >= k) { /* only take upper triangular entry */
2496: ajtmp[ncols_upper] = i;
2497: ncols_upper++;
2498: }
2499: }
2500: PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
2501: nzk += nlnk;
2503: /* update lnk by computing fill-in for each pivot row to be merged in */
2504: prow = jl[k]; /* 1st pivot row */
2506: while (prow < k) {
2507: nextprow = jl[prow];
2509: /* merge prow into k-th row */
2510: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2511: jmax = ui[prow + 1];
2512: ncols = jmax - jmin;
2513: i = jmin - ui[prow];
2514: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2515: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
2516: j = *(uj - 1);
2517: PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
2518: nzk += nlnk;
2520: /* update il and jl for prow */
2521: if (jmin < jmax) {
2522: il[prow] = jmin;
2523: j = *cols;
2524: jl[prow] = jl[j];
2525: jl[j] = prow;
2526: }
2527: prow = nextprow;
2528: }
2530: /* if free space is not available, make more free space */
2531: if (current_space->local_remaining < nzk) {
2532: i = am - k + 1; /* num of unfactored rows */
2533: i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2534: PetscCall(PetscFreeSpaceGet(i, ¤t_space));
2535: PetscCall(PetscFreeSpaceGet(i, ¤t_space_lvl));
2536: reallocs++;
2537: }
2539: /* copy data into free_space and free_space_lvl, then initialize lnk */
2540: PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
2541: PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
2543: /* add the k-th row into il and jl */
2544: if (nzk > 1) {
2545: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2546: jl[k] = jl[i];
2547: jl[i] = k;
2548: il[k] = ui[k] + 1;
2549: }
2550: uj_ptr[k] = current_space->array;
2551: uj_lvl_ptr[k] = current_space_lvl->array;
2553: current_space->array += nzk;
2554: current_space->local_used += nzk;
2555: current_space->local_remaining -= nzk;
2557: current_space_lvl->array += nzk;
2558: current_space_lvl->local_used += nzk;
2559: current_space_lvl->local_remaining -= nzk;
2561: ui[k + 1] = ui[k] + nzk;
2562: }
2564: PetscCall(ISRestoreIndices(perm, &rip));
2565: PetscCall(ISRestoreIndices(iperm, &riip));
2566: PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
2567: PetscCall(PetscFree(ajtmp));
2569: /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2570: PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2571: PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */
2572: PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2573: PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
2575: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2577: /* put together the new matrix in MATSEQSBAIJ format */
2578: b = (Mat_SeqSBAIJ *)(fact)->data;
2579: b->singlemalloc = PETSC_FALSE;
2581: PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
2583: b->j = uj;
2584: b->i = ui;
2585: b->diag = udiag;
2586: b->free_diag = PETSC_TRUE;
2587: b->ilen = NULL;
2588: b->imax = NULL;
2589: b->row = perm;
2590: b->col = perm;
2591: PetscCall(PetscObjectReference((PetscObject)perm));
2592: PetscCall(PetscObjectReference((PetscObject)perm));
2593: b->icol = iperm;
2594: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2596: PetscCall(PetscMalloc1(am + 1, &b->solve_work));
2598: b->maxnz = b->nz = ui[am];
2599: b->free_a = PETSC_TRUE;
2600: b->free_ij = PETSC_TRUE;
2602: fact->info.factor_mallocs = reallocs;
2603: fact->info.fill_ratio_given = fill;
2604: if (ai[am] != 0) {
2605: /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2606: fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2607: } else {
2608: fact->info.fill_ratio_needed = 0.0;
2609: }
2610: #if defined(PETSC_USE_INFO)
2611: if (ai[am] != 0) {
2612: PetscReal af = fact->info.fill_ratio_needed;
2613: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2614: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2615: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2616: } else {
2617: PetscCall(PetscInfo(A, "Empty matrix\n"));
2618: }
2619: #endif
2620: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2621: PetscFunctionReturn(PETSC_SUCCESS);
2622: }
2624: #if 0
2625: // unused
2626: static PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2627: {
2628: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
2629: Mat_SeqSBAIJ *b;
2630: PetscBool perm_identity, missing;
2631: PetscInt reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2632: const PetscInt *rip, *riip;
2633: PetscInt jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2634: PetscInt nlnk, *lnk, *lnk_lvl = NULL, d;
2635: PetscInt ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2636: PetscReal fill = info->fill, levels = info->levels;
2637: PetscFreeSpaceList free_space = NULL, current_space = NULL;
2638: PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2639: PetscBT lnkbt;
2640: IS iperm;
2642: PetscFunctionBegin;
2643: PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2644: PetscCall(MatMissingDiagonal(A, &missing, &d));
2645: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
2646: PetscCall(ISIdentity(perm, &perm_identity));
2647: PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2649: PetscCall(PetscMalloc1(am + 1, &ui));
2650: PetscCall(PetscMalloc1(am + 1, &udiag));
2651: ui[0] = 0;
2653: /* ICC(0) without matrix ordering: simply copies fill pattern */
2654: if (!levels && perm_identity) {
2655: for (i = 0; i < am; i++) {
2656: ui[i + 1] = ui[i] + ai[i + 1] - a->diag[i];
2657: udiag[i] = ui[i];
2658: }
2659: PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2660: cols = uj;
2661: for (i = 0; i < am; i++) {
2662: aj = a->j + a->diag[i];
2663: ncols = ui[i + 1] - ui[i];
2664: for (j = 0; j < ncols; j++) *cols++ = *aj++;
2665: }
2666: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2667: PetscCall(ISGetIndices(iperm, &riip));
2668: PetscCall(ISGetIndices(perm, &rip));
2670: /* initialization */
2671: PetscCall(PetscMalloc1(am + 1, &ajtmp));
2673: /* jl: linked list for storing indices of the pivot rows
2674: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2675: PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
2676: for (i = 0; i < am; i++) {
2677: jl[i] = am;
2678: il[i] = 0;
2679: }
2681: /* create and initialize a linked list for storing column indices of the active row k */
2682: nlnk = am + 1;
2683: PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));
2685: /* initial FreeSpace size is fill*(ai[am]+1) */
2686: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space));
2687: current_space = free_space;
2688: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space_lvl));
2689: current_space_lvl = free_space_lvl;
2691: for (k = 0; k < am; k++) { /* for each active row k */
2692: /* initialize lnk by the column indices of row rip[k] of A */
2693: nzk = 0;
2694: ncols = ai[rip[k] + 1] - ai[rip[k]];
2695: PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2696: ncols_upper = 0;
2697: for (j = 0; j < ncols; j++) {
2698: i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2699: if (riip[i] >= k) { /* only take upper triangular entry */
2700: ajtmp[ncols_upper] = i;
2701: ncols_upper++;
2702: }
2703: }
2704: PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
2705: nzk += nlnk;
2707: /* update lnk by computing fill-in for each pivot row to be merged in */
2708: prow = jl[k]; /* 1st pivot row */
2710: while (prow < k) {
2711: nextprow = jl[prow];
2713: /* merge prow into k-th row */
2714: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2715: jmax = ui[prow + 1];
2716: ncols = jmax - jmin;
2717: i = jmin - ui[prow];
2718: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2719: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
2720: j = *(uj - 1);
2721: PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
2722: nzk += nlnk;
2724: /* update il and jl for prow */
2725: if (jmin < jmax) {
2726: il[prow] = jmin;
2727: j = *cols;
2728: jl[prow] = jl[j];
2729: jl[j] = prow;
2730: }
2731: prow = nextprow;
2732: }
2734: /* if free space is not available, make more free space */
2735: if (current_space->local_remaining < nzk) {
2736: i = am - k + 1; /* num of unfactored rows */
2737: i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2738: PetscCall(PetscFreeSpaceGet(i, ¤t_space));
2739: PetscCall(PetscFreeSpaceGet(i, ¤t_space_lvl));
2740: reallocs++;
2741: }
2743: /* copy data into free_space and free_space_lvl, then initialize lnk */
2744: PetscCheck(nzk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
2745: PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
2747: /* add the k-th row into il and jl */
2748: if (nzk > 1) {
2749: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2750: jl[k] = jl[i];
2751: jl[i] = k;
2752: il[k] = ui[k] + 1;
2753: }
2754: uj_ptr[k] = current_space->array;
2755: uj_lvl_ptr[k] = current_space_lvl->array;
2757: current_space->array += nzk;
2758: current_space->local_used += nzk;
2759: current_space->local_remaining -= nzk;
2761: current_space_lvl->array += nzk;
2762: current_space_lvl->local_used += nzk;
2763: current_space_lvl->local_remaining -= nzk;
2765: ui[k + 1] = ui[k] + nzk;
2766: }
2768: #if defined(PETSC_USE_INFO)
2769: if (ai[am] != 0) {
2770: PetscReal af = (PetscReal)ui[am] / ((PetscReal)ai[am]);
2771: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2772: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2773: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2774: } else {
2775: PetscCall(PetscInfo(A, "Empty matrix\n"));
2776: }
2777: #endif
2779: PetscCall(ISRestoreIndices(perm, &rip));
2780: PetscCall(ISRestoreIndices(iperm, &riip));
2781: PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
2782: PetscCall(PetscFree(ajtmp));
2784: /* destroy list of free space and other temporary array(s) */
2785: PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2786: PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
2787: PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2788: PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
2790: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2792: /* put together the new matrix in MATSEQSBAIJ format */
2794: b = (Mat_SeqSBAIJ *)fact->data;
2795: b->singlemalloc = PETSC_FALSE;
2797: PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
2799: b->j = uj;
2800: b->i = ui;
2801: b->diag = udiag;
2802: b->free_diag = PETSC_TRUE;
2803: b->ilen = NULL;
2804: b->imax = NULL;
2805: b->row = perm;
2806: b->col = perm;
2808: PetscCall(PetscObjectReference((PetscObject)perm));
2809: PetscCall(PetscObjectReference((PetscObject)perm));
2811: b->icol = iperm;
2812: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2813: PetscCall(PetscMalloc1(am + 1, &b->solve_work));
2814: b->maxnz = b->nz = ui[am];
2815: b->free_a = PETSC_TRUE;
2816: b->free_ij = PETSC_TRUE;
2818: fact->info.factor_mallocs = reallocs;
2819: fact->info.fill_ratio_given = fill;
2820: if (ai[am] != 0) {
2821: fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]);
2822: } else {
2823: fact->info.fill_ratio_needed = 0.0;
2824: }
2825: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2826: PetscFunctionReturn(PETSC_SUCCESS);
2827: }
2828: #endif
2830: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2831: {
2832: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
2833: Mat_SeqSBAIJ *b;
2834: PetscBool perm_identity, missing;
2835: PetscReal fill = info->fill;
2836: const PetscInt *rip, *riip;
2837: PetscInt i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
2838: PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
2839: PetscInt nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
2840: PetscFreeSpaceList free_space = NULL, current_space = NULL;
2841: PetscBT lnkbt;
2842: IS iperm;
2844: PetscFunctionBegin;
2845: PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2846: PetscCall(MatMissingDiagonal(A, &missing, &i));
2847: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
2849: /* check whether perm is the identity mapping */
2850: PetscCall(ISIdentity(perm, &perm_identity));
2851: PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2852: PetscCall(ISGetIndices(iperm, &riip));
2853: PetscCall(ISGetIndices(perm, &rip));
2855: /* initialization */
2856: PetscCall(PetscMalloc1(am + 1, &ui));
2857: PetscCall(PetscMalloc1(am + 1, &udiag));
2858: ui[0] = 0;
2860: /* jl: linked list for storing indices of the pivot rows
2861: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2862: PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
2863: for (i = 0; i < am; i++) {
2864: jl[i] = am;
2865: il[i] = 0;
2866: }
2868: /* create and initialize a linked list for storing column indices of the active row k */
2869: nlnk = am + 1;
2870: PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));
2872: /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2873: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2874: current_space = free_space;
2876: for (k = 0; k < am; k++) { /* for each active row k */
2877: /* initialize lnk by the column indices of row rip[k] of A */
2878: nzk = 0;
2879: ncols = ai[rip[k] + 1] - ai[rip[k]];
2880: PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2881: ncols_upper = 0;
2882: for (j = 0; j < ncols; j++) {
2883: i = riip[*(aj + ai[rip[k]] + j)];
2884: if (i >= k) { /* only take upper triangular entry */
2885: cols[ncols_upper] = i;
2886: ncols_upper++;
2887: }
2888: }
2889: PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
2890: nzk += nlnk;
2892: /* update lnk by computing fill-in for each pivot row to be merged in */
2893: prow = jl[k]; /* 1st pivot row */
2895: while (prow < k) {
2896: nextprow = jl[prow];
2897: /* merge prow into k-th row */
2898: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2899: jmax = ui[prow + 1];
2900: ncols = jmax - jmin;
2901: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2902: PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
2903: nzk += nlnk;
2905: /* update il and jl for prow */
2906: if (jmin < jmax) {
2907: il[prow] = jmin;
2908: j = *uj_ptr;
2909: jl[prow] = jl[j];
2910: jl[j] = prow;
2911: }
2912: prow = nextprow;
2913: }
2915: /* if free space is not available, make more free space */
2916: if (current_space->local_remaining < nzk) {
2917: i = am - k + 1; /* num of unfactored rows */
2918: i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2919: PetscCall(PetscFreeSpaceGet(i, ¤t_space));
2920: reallocs++;
2921: }
2923: /* copy data into free space, then initialize lnk */
2924: PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));
2926: /* add the k-th row into il and jl */
2927: if (nzk > 1) {
2928: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2929: jl[k] = jl[i];
2930: jl[i] = k;
2931: il[k] = ui[k] + 1;
2932: }
2933: ui_ptr[k] = current_space->array;
2935: current_space->array += nzk;
2936: current_space->local_used += nzk;
2937: current_space->local_remaining -= nzk;
2939: ui[k + 1] = ui[k] + nzk;
2940: }
2942: PetscCall(ISRestoreIndices(perm, &rip));
2943: PetscCall(ISRestoreIndices(iperm, &riip));
2944: PetscCall(PetscFree4(ui_ptr, jl, il, cols));
2946: /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2947: PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2948: PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */
2949: PetscCall(PetscLLDestroy(lnk, lnkbt));
2951: /* put together the new matrix in MATSEQSBAIJ format */
2953: b = (Mat_SeqSBAIJ *)fact->data;
2954: b->singlemalloc = PETSC_FALSE;
2955: b->free_a = PETSC_TRUE;
2956: b->free_ij = PETSC_TRUE;
2958: PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
2960: b->j = uj;
2961: b->i = ui;
2962: b->diag = udiag;
2963: b->free_diag = PETSC_TRUE;
2964: b->ilen = NULL;
2965: b->imax = NULL;
2966: b->row = perm;
2967: b->col = perm;
2969: PetscCall(PetscObjectReference((PetscObject)perm));
2970: PetscCall(PetscObjectReference((PetscObject)perm));
2972: b->icol = iperm;
2973: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2975: PetscCall(PetscMalloc1(am + 1, &b->solve_work));
2977: b->maxnz = b->nz = ui[am];
2979: fact->info.factor_mallocs = reallocs;
2980: fact->info.fill_ratio_given = fill;
2981: if (ai[am] != 0) {
2982: /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2983: fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2984: } else {
2985: fact->info.fill_ratio_needed = 0.0;
2986: }
2987: #if defined(PETSC_USE_INFO)
2988: if (ai[am] != 0) {
2989: PetscReal af = fact->info.fill_ratio_needed;
2990: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2991: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2992: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2993: } else {
2994: PetscCall(PetscInfo(A, "Empty matrix\n"));
2995: }
2996: #endif
2997: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2998: PetscFunctionReturn(PETSC_SUCCESS);
2999: }
3001: #if 0
3002: // unused
3003: static PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
3004: {
3005: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
3006: Mat_SeqSBAIJ *b;
3007: PetscBool perm_identity, missing;
3008: PetscReal fill = info->fill;
3009: const PetscInt *rip, *riip;
3010: PetscInt i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
3011: PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
3012: PetscInt nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr;
3013: PetscFreeSpaceList free_space = NULL, current_space = NULL;
3014: PetscBT lnkbt;
3015: IS iperm;
3017: PetscFunctionBegin;
3018: PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
3019: PetscCall(MatMissingDiagonal(A, &missing, &i));
3020: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
3022: /* check whether perm is the identity mapping */
3023: PetscCall(ISIdentity(perm, &perm_identity));
3024: PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
3025: PetscCall(ISGetIndices(iperm, &riip));
3026: PetscCall(ISGetIndices(perm, &rip));
3028: /* initialization */
3029: PetscCall(PetscMalloc1(am + 1, &ui));
3030: ui[0] = 0;
3032: /* jl: linked list for storing indices of the pivot rows
3033: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
3034: PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
3035: for (i = 0; i < am; i++) {
3036: jl[i] = am;
3037: il[i] = 0;
3038: }
3040: /* create and initialize a linked list for storing column indices of the active row k */
3041: nlnk = am + 1;
3042: PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));
3044: /* initial FreeSpace size is fill*(ai[am]+1) */
3045: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space));
3046: current_space = free_space;
3048: for (k = 0; k < am; k++) { /* for each active row k */
3049: /* initialize lnk by the column indices of row rip[k] of A */
3050: nzk = 0;
3051: ncols = ai[rip[k] + 1] - ai[rip[k]];
3052: PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
3053: ncols_upper = 0;
3054: for (j = 0; j < ncols; j++) {
3055: i = riip[*(aj + ai[rip[k]] + j)];
3056: if (i >= k) { /* only take upper triangular entry */
3057: cols[ncols_upper] = i;
3058: ncols_upper++;
3059: }
3060: }
3061: PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
3062: nzk += nlnk;
3064: /* update lnk by computing fill-in for each pivot row to be merged in */
3065: prow = jl[k]; /* 1st pivot row */
3067: while (prow < k) {
3068: nextprow = jl[prow];
3069: /* merge prow into k-th row */
3070: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
3071: jmax = ui[prow + 1];
3072: ncols = jmax - jmin;
3073: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3074: PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
3075: nzk += nlnk;
3077: /* update il and jl for prow */
3078: if (jmin < jmax) {
3079: il[prow] = jmin;
3080: j = *uj_ptr;
3081: jl[prow] = jl[j];
3082: jl[j] = prow;
3083: }
3084: prow = nextprow;
3085: }
3087: /* if free space is not available, make more free space */
3088: if (current_space->local_remaining < nzk) {
3089: i = am - k + 1; /* num of unfactored rows */
3090: i = PetscMin(i * nzk, i * (i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3091: PetscCall(PetscFreeSpaceGet(i, ¤t_space));
3092: reallocs++;
3093: }
3095: /* copy data into free space, then initialize lnk */
3096: PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));
3098: /* add the k-th row into il and jl */
3099: if (nzk - 1 > 0) {
3100: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3101: jl[k] = jl[i];
3102: jl[i] = k;
3103: il[k] = ui[k] + 1;
3104: }
3105: ui_ptr[k] = current_space->array;
3107: current_space->array += nzk;
3108: current_space->local_used += nzk;
3109: current_space->local_remaining -= nzk;
3111: ui[k + 1] = ui[k] + nzk;
3112: }
3114: #if defined(PETSC_USE_INFO)
3115: if (ai[am] != 0) {
3116: PetscReal af = (PetscReal)ui[am] / (PetscReal)ai[am];
3117: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
3118: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
3119: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
3120: } else {
3121: PetscCall(PetscInfo(A, "Empty matrix\n"));
3122: }
3123: #endif
3125: PetscCall(ISRestoreIndices(perm, &rip));
3126: PetscCall(ISRestoreIndices(iperm, &riip));
3127: PetscCall(PetscFree4(ui_ptr, jl, il, cols));
3129: /* destroy list of free space and other temporary array(s) */
3130: PetscCall(PetscMalloc1(ui[am] + 1, &uj));
3131: PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
3132: PetscCall(PetscLLDestroy(lnk, lnkbt));
3134: /* put together the new matrix in MATSEQSBAIJ format */
3136: b = (Mat_SeqSBAIJ *)fact->data;
3137: b->singlemalloc = PETSC_FALSE;
3138: b->free_a = PETSC_TRUE;
3139: b->free_ij = PETSC_TRUE;
3141: PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
3143: b->j = uj;
3144: b->i = ui;
3145: b->diag = NULL;
3146: b->ilen = NULL;
3147: b->imax = NULL;
3148: b->row = perm;
3149: b->col = perm;
3151: PetscCall(PetscObjectReference((PetscObject)perm));
3152: PetscCall(PetscObjectReference((PetscObject)perm));
3154: b->icol = iperm;
3155: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3157: PetscCall(PetscMalloc1(am + 1, &b->solve_work));
3158: b->maxnz = b->nz = ui[am];
3160: fact->info.factor_mallocs = reallocs;
3161: fact->info.fill_ratio_given = fill;
3162: if (ai[am] != 0) {
3163: fact->info.fill_ratio_needed = (PetscReal)ui[am] / (PetscReal)ai[am];
3164: } else {
3165: fact->info.fill_ratio_needed = 0.0;
3166: }
3167: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3168: PetscFunctionReturn(PETSC_SUCCESS);
3169: }
3170: #endif
3172: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx)
3173: {
3174: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
3175: PetscInt n = A->rmap->n;
3176: const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag, *vi;
3177: PetscScalar *x, sum;
3178: const PetscScalar *b;
3179: const MatScalar *aa = a->a, *v;
3180: PetscInt i, nz;
3182: PetscFunctionBegin;
3183: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
3185: PetscCall(VecGetArrayRead(bb, &b));
3186: PetscCall(VecGetArrayWrite(xx, &x));
3188: /* forward solve the lower triangular */
3189: x[0] = b[0];
3190: v = aa;
3191: vi = aj;
3192: for (i = 1; i < n; i++) {
3193: nz = ai[i + 1] - ai[i];
3194: sum = b[i];
3195: PetscSparseDenseMinusDot(sum, x, v, vi, nz);
3196: v += nz;
3197: vi += nz;
3198: x[i] = sum;
3199: }
3201: /* backward solve the upper triangular */
3202: for (i = n - 1; i >= 0; i--) {
3203: v = aa + adiag[i + 1] + 1;
3204: vi = aj + adiag[i + 1] + 1;
3205: nz = adiag[i] - adiag[i + 1] - 1;
3206: sum = x[i];
3207: PetscSparseDenseMinusDot(sum, x, v, vi, nz);
3208: x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3209: }
3211: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
3212: PetscCall(VecRestoreArrayRead(bb, &b));
3213: PetscCall(VecRestoreArrayWrite(xx, &x));
3214: PetscFunctionReturn(PETSC_SUCCESS);
3215: }
3217: PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx)
3218: {
3219: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
3220: IS iscol = a->col, isrow = a->row;
3221: PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz;
3222: const PetscInt *rout, *cout, *r, *c;
3223: PetscScalar *x, *tmp, sum;
3224: const PetscScalar *b;
3225: const MatScalar *aa = a->a, *v;
3227: PetscFunctionBegin;
3228: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
3230: PetscCall(VecGetArrayRead(bb, &b));
3231: PetscCall(VecGetArrayWrite(xx, &x));
3232: tmp = a->solve_work;
3234: PetscCall(ISGetIndices(isrow, &rout));
3235: r = rout;
3236: PetscCall(ISGetIndices(iscol, &cout));
3237: c = cout;
3239: /* forward solve the lower triangular */
3240: tmp[0] = b[r[0]];
3241: v = aa;
3242: vi = aj;
3243: for (i = 1; i < n; i++) {
3244: nz = ai[i + 1] - ai[i];
3245: sum = b[r[i]];
3246: PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3247: tmp[i] = sum;
3248: v += nz;
3249: vi += nz;
3250: }
3252: /* backward solve the upper triangular */
3253: for (i = n - 1; i >= 0; i--) {
3254: v = aa + adiag[i + 1] + 1;
3255: vi = aj + adiag[i + 1] + 1;
3256: nz = adiag[i] - adiag[i + 1] - 1;
3257: sum = tmp[i];
3258: PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3259: x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
3260: }
3262: PetscCall(ISRestoreIndices(isrow, &rout));
3263: PetscCall(ISRestoreIndices(iscol, &cout));
3264: PetscCall(VecRestoreArrayRead(bb, &b));
3265: PetscCall(VecRestoreArrayWrite(xx, &x));
3266: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
3267: PetscFunctionReturn(PETSC_SUCCESS);
3268: }
3270: #if 0
3271: // unused
3272: /*
3273: This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3274: */
3275: static PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A, IS isrow, IS iscol, const MatFactorInfo *info, Mat *fact)
3276: {
3277: Mat B = *fact;
3278: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b;
3279: IS isicol;
3280: const PetscInt *r, *ic;
3281: PetscInt i, n = A->rmap->n, *ai = a->i, *aj = a->j, *ajtmp, *adiag;
3282: PetscInt *bi, *bj, *bdiag, *bdiag_rev;
3283: PetscInt row, nzi, nzi_bl, nzi_bu, *im, nzi_al, nzi_au;
3284: PetscInt nlnk, *lnk;
3285: PetscBT lnkbt;
3286: PetscBool row_identity, icol_identity;
3287: MatScalar *aatmp, *pv, *batmp, *ba, *rtmp, *pc, multiplier, *vtmp, diag_tmp;
3288: const PetscInt *ics;
3289: PetscInt j, nz, *pj, *bjtmp, k, ncut, *jtmp;
3290: PetscReal dt = info->dt, shift = info->shiftamount;
3291: PetscInt dtcount = (PetscInt)info->dtcount, nnz_max;
3292: PetscBool missing;
3294: PetscFunctionBegin;
3295: if (dt == (PetscReal)PETSC_DEFAULT) dt = 0.005;
3296: if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5 * a->rmax);
3298: /* symbolic factorization, can be reused */
3299: PetscCall(MatMissingDiagonal(A, &missing, &i));
3300: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
3301: adiag = a->diag;
3303: PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
3305: /* bdiag is location of diagonal in factor */
3306: PetscCall(PetscMalloc1(n + 1, &bdiag)); /* becomes b->diag */
3307: PetscCall(PetscMalloc1(n + 1, &bdiag_rev)); /* temporary */
3309: /* allocate row pointers bi */
3310: PetscCall(PetscMalloc1(2 * n + 2, &bi));
3312: /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3313: if (dtcount > n - 1) dtcount = n - 1; /* diagonal is excluded */
3314: nnz_max = ai[n] + 2 * n * dtcount + 2;
3316: PetscCall(PetscMalloc1(nnz_max + 1, &bj));
3317: PetscCall(PetscMalloc1(nnz_max + 1, &ba));
3319: /* put together the new matrix */
3320: PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
3321: b = (Mat_SeqAIJ *)B->data;
3323: b->free_a = PETSC_TRUE;
3324: b->free_ij = PETSC_TRUE;
3325: b->singlemalloc = PETSC_FALSE;
3327: b->a = ba;
3328: b->j = bj;
3329: b->i = bi;
3330: b->diag = bdiag;
3331: b->ilen = NULL;
3332: b->imax = NULL;
3333: b->row = isrow;
3334: b->col = iscol;
3335: PetscCall(PetscObjectReference((PetscObject)isrow));
3336: PetscCall(PetscObjectReference((PetscObject)iscol));
3337: b->icol = isicol;
3339: PetscCall(PetscMalloc1(n + 1, &b->solve_work));
3340: b->maxnz = nnz_max;
3342: B->factortype = MAT_FACTOR_ILUDT;
3343: B->info.factor_mallocs = 0;
3344: B->info.fill_ratio_given = ((PetscReal)nnz_max) / ((PetscReal)ai[n]);
3345: /* end of symbolic factorization */
3347: PetscCall(ISGetIndices(isrow, &r));
3348: PetscCall(ISGetIndices(isicol, &ic));
3349: ics = ic;
3351: /* linked list for storing column indices of the active row */
3352: nlnk = n + 1;
3353: PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
3355: /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3356: PetscCall(PetscMalloc2(n, &im, n, &jtmp));
3357: /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3358: PetscCall(PetscMalloc2(n, &rtmp, n, &vtmp));
3359: PetscCall(PetscArrayzero(rtmp, n));
3361: bi[0] = 0;
3362: bdiag[0] = nnz_max - 1; /* location of diag[0] in factor B */
3363: bdiag_rev[n] = bdiag[0];
3364: bi[2 * n + 1] = bdiag[0] + 1; /* endof bj and ba array */
3365: for (i = 0; i < n; i++) {
3366: /* copy initial fill into linked list */
3367: nzi = ai[r[i] + 1] - ai[r[i]];
3368: PetscCheck(nzi, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
3369: nzi_al = adiag[r[i]] - ai[r[i]];
3370: nzi_au = ai[r[i] + 1] - adiag[r[i]] - 1;
3371: ajtmp = aj + ai[r[i]];
3372: PetscCall(PetscLLAddPerm(nzi, ajtmp, ic, n, &nlnk, lnk, lnkbt));
3374: /* load in initial (unfactored row) */
3375: aatmp = a->a + ai[r[i]];
3376: for (j = 0; j < nzi; j++) rtmp[ics[*ajtmp++]] = *aatmp++;
3378: /* add pivot rows into linked list */
3379: row = lnk[n];
3380: while (row < i) {
3381: nzi_bl = bi[row + 1] - bi[row] + 1;
3382: bjtmp = bj + bdiag[row + 1] + 1; /* points to 1st column next to the diagonal in U */
3383: PetscCall(PetscLLAddSortedLU(bjtmp, row, &nlnk, lnk, lnkbt, i, nzi_bl, im));
3384: nzi += nlnk;
3385: row = lnk[row];
3386: }
3388: /* copy data from lnk into jtmp, then initialize lnk */
3389: PetscCall(PetscLLClean(n, n, nzi, lnk, jtmp, lnkbt));
3391: /* numerical factorization */
3392: bjtmp = jtmp;
3393: row = *bjtmp++; /* 1st pivot row */
3394: while (row < i) {
3395: pc = rtmp + row;
3396: pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3397: multiplier = (*pc) * (*pv);
3398: *pc = multiplier;
3399: if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
3400: pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
3401: pv = ba + bdiag[row + 1] + 1;
3402: nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
3403: for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3404: PetscCall(PetscLogFlops(1 + 2.0 * nz));
3405: }
3406: row = *bjtmp++;
3407: }
3409: /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3410: diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */
3411: nzi_bl = 0;
3412: j = 0;
3413: while (jtmp[j] < i) { /* Note: jtmp is sorted */
3414: vtmp[j] = rtmp[jtmp[j]];
3415: rtmp[jtmp[j]] = 0.0;
3416: nzi_bl++;
3417: j++;
3418: }
3419: nzi_bu = nzi - nzi_bl - 1;
3420: while (j < nzi) {
3421: vtmp[j] = rtmp[jtmp[j]];
3422: rtmp[jtmp[j]] = 0.0;
3423: j++;
3424: }
3426: bjtmp = bj + bi[i];
3427: batmp = ba + bi[i];
3428: /* apply level dropping rule to L part */
3429: ncut = nzi_al + dtcount;
3430: if (ncut < nzi_bl) {
3431: PetscCall(PetscSortSplit(ncut, nzi_bl, vtmp, jtmp));
3432: PetscCall(PetscSortIntWithScalarArray(ncut, jtmp, vtmp));
3433: } else {
3434: ncut = nzi_bl;
3435: }
3436: for (j = 0; j < ncut; j++) {
3437: bjtmp[j] = jtmp[j];
3438: batmp[j] = vtmp[j];
3439: }
3440: bi[i + 1] = bi[i] + ncut;
3441: nzi = ncut + 1;
3443: /* apply level dropping rule to U part */
3444: ncut = nzi_au + dtcount;
3445: if (ncut < nzi_bu) {
3446: PetscCall(PetscSortSplit(ncut, nzi_bu, vtmp + nzi_bl + 1, jtmp + nzi_bl + 1));
3447: PetscCall(PetscSortIntWithScalarArray(ncut, jtmp + nzi_bl + 1, vtmp + nzi_bl + 1));
3448: } else {
3449: ncut = nzi_bu;
3450: }
3451: nzi += ncut;
3453: /* mark bdiagonal */
3454: bdiag[i + 1] = bdiag[i] - (ncut + 1);
3455: bdiag_rev[n - i - 1] = bdiag[i + 1];
3456: bi[2 * n - i] = bi[2 * n - i + 1] - (ncut + 1);
3457: bjtmp = bj + bdiag[i];
3458: batmp = ba + bdiag[i];
3459: *bjtmp = i;
3460: *batmp = diag_tmp; /* rtmp[i]; */
3461: if (*batmp == 0.0) *batmp = dt + shift;
3462: *batmp = 1.0 / (*batmp); /* invert diagonal entries for simpler triangular solves */
3464: bjtmp = bj + bdiag[i + 1] + 1;
3465: batmp = ba + bdiag[i + 1] + 1;
3466: for (k = 0; k < ncut; k++) {
3467: bjtmp[k] = jtmp[nzi_bl + 1 + k];
3468: batmp[k] = vtmp[nzi_bl + 1 + k];
3469: }
3471: im[i] = nzi; /* used by PetscLLAddSortedLU() */
3472: } /* for (i=0; i<n; i++) */
3473: PetscCheck(bi[n] < bdiag[n], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "end of L array %" PetscInt_FMT " cannot >= the beginning of U array %" PetscInt_FMT, bi[n], bdiag[n]);
3475: PetscCall(ISRestoreIndices(isrow, &r));
3476: PetscCall(ISRestoreIndices(isicol, &ic));
3478: PetscCall(PetscLLDestroy(lnk, lnkbt));
3479: PetscCall(PetscFree2(im, jtmp));
3480: PetscCall(PetscFree2(rtmp, vtmp));
3481: PetscCall(PetscFree(bdiag_rev));
3483: PetscCall(PetscLogFlops(B->cmap->n));
3484: b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3486: PetscCall(ISIdentity(isrow, &row_identity));
3487: PetscCall(ISIdentity(isicol, &icol_identity));
3488: if (row_identity && icol_identity) {
3489: B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3490: } else {
3491: B->ops->solve = MatSolve_SeqAIJ;
3492: }
3494: B->ops->solveadd = NULL;
3495: B->ops->solvetranspose = NULL;
3496: B->ops->solvetransposeadd = NULL;
3497: B->ops->matsolve = NULL;
3498: B->assembled = PETSC_TRUE;
3499: B->preallocated = PETSC_TRUE;
3500: PetscFunctionReturn(PETSC_SUCCESS);
3501: }
3503: /* a wrapper of MatILUDTFactor_SeqAIJ() */
3504: /*
3505: This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3506: */
3508: static PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info)
3509: {
3510: PetscFunctionBegin;
3511: PetscCall(MatILUDTFactor_SeqAIJ(A, row, col, info, &fact));
3512: PetscFunctionReturn(PETSC_SUCCESS);
3513: }
3515: /*
3516: same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3517: - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3518: */
3519: /*
3520: This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3521: */
3523: static PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact, Mat A, const MatFactorInfo *info)
3524: {
3525: Mat C = fact;
3526: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
3527: IS isrow = b->row, isicol = b->icol;
3528: const PetscInt *r, *ic, *ics;
3529: PetscInt i, j, k, n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
3530: PetscInt *ajtmp, *bjtmp, nz, nzl, nzu, row, *bdiag = b->diag, *pj;
3531: MatScalar *rtmp, *pc, multiplier, *v, *pv, *aa = a->a;
3532: PetscReal dt = info->dt, shift = info->shiftamount;
3533: PetscBool row_identity, col_identity;
3535: PetscFunctionBegin;
3536: PetscCall(ISGetIndices(isrow, &r));
3537: PetscCall(ISGetIndices(isicol, &ic));
3538: PetscCall(PetscMalloc1(n + 1, &rtmp));
3539: ics = ic;
3541: for (i = 0; i < n; i++) {
3542: /* initialize rtmp array */
3543: nzl = bi[i + 1] - bi[i]; /* num of nonzeros in L(i,:) */
3544: bjtmp = bj + bi[i];
3545: for (j = 0; j < nzl; j++) rtmp[*bjtmp++] = 0.0;
3546: rtmp[i] = 0.0;
3547: nzu = bdiag[i] - bdiag[i + 1]; /* num of nonzeros in U(i,:) */
3548: bjtmp = bj + bdiag[i + 1] + 1;
3549: for (j = 0; j < nzu; j++) rtmp[*bjtmp++] = 0.0;
3551: /* load in initial unfactored row of A */
3552: nz = ai[r[i] + 1] - ai[r[i]];
3553: ajtmp = aj + ai[r[i]];
3554: v = aa + ai[r[i]];
3555: for (j = 0; j < nz; j++) rtmp[ics[*ajtmp++]] = v[j];
3557: /* numerical factorization */
3558: bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */
3559: nzl = bi[i + 1] - bi[i]; /* num of entries in L(i,:) */
3560: k = 0;
3561: while (k < nzl) {
3562: row = *bjtmp++;
3563: pc = rtmp + row;
3564: pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3565: multiplier = (*pc) * (*pv);
3566: *pc = multiplier;
3567: if (PetscAbsScalar(multiplier) > dt) {
3568: pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
3569: pv = b->a + bdiag[row + 1] + 1;
3570: nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
3571: for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3572: PetscCall(PetscLogFlops(1 + 2.0 * nz));
3573: }
3574: k++;
3575: }
3577: /* finished row so stick it into b->a */
3578: /* L-part */
3579: pv = b->a + bi[i];
3580: pj = bj + bi[i];
3581: nzl = bi[i + 1] - bi[i];
3582: for (j = 0; j < nzl; j++) pv[j] = rtmp[pj[j]];
3584: /* diagonal: invert diagonal entries for simpler triangular solves */
3585: if (rtmp[i] == 0.0) rtmp[i] = dt + shift;
3586: b->a[bdiag[i]] = 1.0 / rtmp[i];
3588: /* U-part */
3589: pv = b->a + bdiag[i + 1] + 1;
3590: pj = bj + bdiag[i + 1] + 1;
3591: nzu = bdiag[i] - bdiag[i + 1] - 1;
3592: for (j = 0; j < nzu; j++) pv[j] = rtmp[pj[j]];
3593: }
3595: PetscCall(PetscFree(rtmp));
3596: PetscCall(ISRestoreIndices(isicol, &ic));
3597: PetscCall(ISRestoreIndices(isrow, &r));
3599: PetscCall(ISIdentity(isrow, &row_identity));
3600: PetscCall(ISIdentity(isicol, &col_identity));
3601: if (row_identity && col_identity) {
3602: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3603: } else {
3604: C->ops->solve = MatSolve_SeqAIJ;
3605: }
3606: C->ops->solveadd = NULL;
3607: C->ops->solvetranspose = NULL;
3608: C->ops->solvetransposeadd = NULL;
3609: C->ops->matsolve = NULL;
3610: C->assembled = PETSC_TRUE;
3611: C->preallocated = PETSC_TRUE;
3613: PetscCall(PetscLogFlops(C->cmap->n));
3614: PetscFunctionReturn(PETSC_SUCCESS);
3615: }
3616: #endif