Actual source code: aijfact.c
1: #include <../src/mat/impls/aij/seq/aij.h>
2: #include <../src/mat/impls/sbaij/seq/sbaij.h>
3: #include <petscbt.h>
4: #include <../src/mat/utils/freespace.h>
6: /*
7: Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
9: This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
10: */
11: #if 0
12: PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat, MatOrderingType type, IS *irow, IS *icol)
13: {
14: Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->data;
15: PetscInt i, j, jj, k, kk, n = mat->rmap->n, current = 0, newcurrent = 0, *order;
16: const PetscInt *ai = a->i, *aj = a->j;
17: const PetscScalar *aa = a->a;
18: PetscBool *done;
19: PetscReal best, past = 0, future;
21: PetscFunctionBegin;
22: /* pick initial row */
23: best = -1;
24: for (i = 0; i < n; i++) {
25: future = 0.0;
26: for (j = ai[i]; j < ai[i + 1]; j++) {
27: if (aj[j] != i) future += PetscAbsScalar(aa[j]);
28: else past = PetscAbsScalar(aa[j]);
29: }
30: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
31: if (past / future > best) {
32: best = past / future;
33: current = i;
34: }
35: }
37: PetscCall(PetscMalloc1(n, &done));
38: PetscCall(PetscArrayzero(done, n));
39: PetscCall(PetscMalloc1(n, &order));
40: order[0] = current;
41: for (i = 0; i < n - 1; i++) {
42: done[current] = PETSC_TRUE;
43: best = -1;
44: /* loop over all neighbors of current pivot */
45: for (j = ai[current]; j < ai[current + 1]; j++) {
46: jj = aj[j];
47: if (done[jj]) continue;
48: /* loop over columns of potential next row computing weights for below and above diagonal */
49: past = future = 0.0;
50: for (k = ai[jj]; k < ai[jj + 1]; k++) {
51: kk = aj[k];
52: if (done[kk]) past += PetscAbsScalar(aa[k]);
53: else if (kk != jj) future += PetscAbsScalar(aa[k]);
54: }
55: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
56: if (past / future > best) {
57: best = past / future;
58: newcurrent = jj;
59: }
60: }
61: if (best == -1) { /* no neighbors to select from so select best of all that remain */
62: best = -1;
63: for (k = 0; k < n; k++) {
64: if (done[k]) continue;
65: future = 0.0;
66: past = 0.0;
67: for (j = ai[k]; j < ai[k + 1]; j++) {
68: kk = aj[j];
69: if (done[kk]) past += PetscAbsScalar(aa[j]);
70: else if (kk != k) future += PetscAbsScalar(aa[j]);
71: }
72: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
73: if (past / future > best) {
74: best = past / future;
75: newcurrent = k;
76: }
77: }
78: }
79: PetscCheck(current != newcurrent, PETSC_COMM_SELF, PETSC_ERR_PLIB, "newcurrent cannot be current");
80: current = newcurrent;
81: order[i + 1] = current;
82: }
83: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, order, PETSC_COPY_VALUES, irow));
84: *icol = *irow;
85: PetscCall(PetscObjectReference((PetscObject)*irow));
86: PetscCall(PetscFree(done));
87: PetscCall(PetscFree(order));
88: PetscFunctionReturn(PETSC_SUCCESS);
89: }
90: #endif
92: static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type)
93: {
94: PetscFunctionBegin;
95: *type = MATSOLVERPETSC;
96: PetscFunctionReturn(PETSC_SUCCESS);
97: }
99: PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B)
100: {
101: PetscInt n = A->rmap->n;
103: PetscFunctionBegin;
104: #if defined(PETSC_USE_COMPLEX)
105: if ((ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
106: PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY or MAT_FACTOR_ICC are not supported. Use MAT_FACTOR_LU instead.\n"));
107: *B = NULL;
108: PetscFunctionReturn(PETSC_SUCCESS);
109: }
110: #endif
112: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
113: PetscCall(MatSetSizes(*B, n, n, n, n));
114: if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
115: PetscCall(MatSetType(*B, MATSEQAIJ));
117: (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
118: (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
120: PetscCall(MatSetBlockSizesFromMats(*B, A, A));
121: PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU]));
122: PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU]));
123: PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT]));
124: } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
125: PetscCall(MatSetType(*B, MATSEQSBAIJ));
126: PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL));
128: (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ;
129: (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
130: PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY]));
131: PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC]));
132: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
133: (*B)->factortype = ftype;
135: PetscCall(PetscFree((*B)->solvertype));
136: PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype));
137: (*B)->canuseordering = PETSC_TRUE;
138: PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc));
139: PetscFunctionReturn(PETSC_SUCCESS);
140: }
142: #if 0
143: // currently unused
144: static PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
145: {
146: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b;
147: IS isicol;
148: const PetscInt *r, *ic;
149: PetscInt i, n = A->rmap->n, *ai = a->i, *aj = a->j;
150: PetscInt *bi, *bj, *ajtmp;
151: PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
152: PetscReal f;
153: PetscInt nlnk, *lnk, k, **bi_ptr;
154: PetscFreeSpaceList free_space = NULL, current_space = NULL;
155: PetscBT lnkbt;
156: PetscBool missing;
158: PetscFunctionBegin;
159: PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
160: PetscCall(MatMissingDiagonal(A, &missing, &i));
161: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
163: PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
164: PetscCall(ISGetIndices(isrow, &r));
165: PetscCall(ISGetIndices(isicol, &ic));
167: /* get new row pointers */
168: PetscCall(PetscMalloc1(n + 1, &bi));
169: bi[0] = 0;
171: /* bdiag is location of diagonal in factor */
172: PetscCall(PetscMalloc1(n + 1, &bdiag));
173: bdiag[0] = 0;
175: /* linked list for storing column indices of the active row */
176: nlnk = n + 1;
177: PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
179: PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
181: /* initial FreeSpace size is f*(ai[n]+1) */
182: f = info->fill;
183: if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
184: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
185: current_space = free_space;
187: for (i = 0; i < n; i++) {
188: /* copy previous fill into linked list */
189: nzi = 0;
190: nnz = ai[r[i] + 1] - ai[r[i]];
191: ajtmp = aj + ai[r[i]];
192: PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
193: nzi += nlnk;
195: /* add pivot rows into linked list */
196: row = lnk[n];
197: while (row < i) {
198: nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
199: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
200: PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
201: nzi += nlnk;
202: row = lnk[row];
203: }
204: bi[i + 1] = bi[i] + nzi;
205: im[i] = nzi;
207: /* mark bdiag */
208: nzbd = 0;
209: nnz = nzi;
210: k = lnk[n];
211: while (nnz-- && k < i) {
212: nzbd++;
213: k = lnk[k];
214: }
215: bdiag[i] = bi[i] + nzbd;
217: /* if free space is not available, make more free space */
218: if (current_space->local_remaining < nzi) {
219: nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */
220: PetscCall(PetscFreeSpaceGet(nnz, ¤t_space));
221: reallocs++;
222: }
224: /* copy data into free space, then initialize lnk */
225: PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
227: bi_ptr[i] = current_space->array;
228: current_space->array += nzi;
229: current_space->local_used += nzi;
230: current_space->local_remaining -= nzi;
231: }
232: #if defined(PETSC_USE_INFO)
233: if (ai[n] != 0) {
234: PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
235: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
236: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
237: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
238: PetscCall(PetscInfo(A, "for best performance.\n"));
239: } else {
240: PetscCall(PetscInfo(A, "Empty matrix\n"));
241: }
242: #endif
244: PetscCall(ISRestoreIndices(isrow, &r));
245: PetscCall(ISRestoreIndices(isicol, &ic));
247: /* destroy list of free space and other temporary array(s) */
248: PetscCall(PetscMalloc1(bi[n] + 1, &bj));
249: PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
250: PetscCall(PetscLLDestroy(lnk, lnkbt));
251: PetscCall(PetscFree2(bi_ptr, im));
253: /* put together the new matrix */
254: PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
255: b = (Mat_SeqAIJ *)(B)->data;
257: b->free_a = PETSC_TRUE;
258: b->free_ij = PETSC_TRUE;
259: b->singlemalloc = PETSC_FALSE;
261: PetscCall(PetscMalloc1(bi[n] + 1, &b->a));
262: b->j = bj;
263: b->i = bi;
264: b->diag = bdiag;
265: b->ilen = NULL;
266: b->imax = NULL;
267: b->row = isrow;
268: b->col = iscol;
269: PetscCall(PetscObjectReference((PetscObject)isrow));
270: PetscCall(PetscObjectReference((PetscObject)iscol));
271: b->icol = isicol;
272: PetscCall(PetscMalloc1(n + 1, &b->solve_work));
274: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
275: b->maxnz = b->nz = bi[n];
277: (B)->factortype = MAT_FACTOR_LU;
278: (B)->info.factor_mallocs = reallocs;
279: (B)->info.fill_ratio_given = f;
281: if (ai[n]) {
282: (B)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
283: } else {
284: (B)->info.fill_ratio_needed = 0.0;
285: }
286: (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
287: if (a->inode.size) (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
288: PetscFunctionReturn(PETSC_SUCCESS);
289: }
290: #endif
292: PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
293: {
294: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b;
295: IS isicol;
296: const PetscInt *r, *ic, *ai = a->i, *aj = a->j, *ajtmp;
297: PetscInt i, n = A->rmap->n;
298: PetscInt *bi, *bj;
299: PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
300: PetscReal f;
301: PetscInt nlnk, *lnk, k, **bi_ptr;
302: PetscFreeSpaceList free_space = NULL, current_space = NULL;
303: PetscBT lnkbt;
304: PetscBool missing;
306: PetscFunctionBegin;
307: PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
308: PetscCall(MatMissingDiagonal(A, &missing, &i));
309: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
311: PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
312: PetscCall(ISGetIndices(isrow, &r));
313: PetscCall(ISGetIndices(isicol, &ic));
315: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
316: PetscCall(PetscMalloc1(n + 1, &bi));
317: PetscCall(PetscMalloc1(n + 1, &bdiag));
318: bi[0] = bdiag[0] = 0;
320: /* linked list for storing column indices of the active row */
321: nlnk = n + 1;
322: PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
324: PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
326: /* initial FreeSpace size is f*(ai[n]+1) */
327: f = info->fill;
328: if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
329: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
330: current_space = free_space;
332: for (i = 0; i < n; i++) {
333: /* copy previous fill into linked list */
334: nzi = 0;
335: nnz = ai[r[i] + 1] - ai[r[i]];
336: ajtmp = aj + ai[r[i]];
337: PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
338: nzi += nlnk;
340: /* add pivot rows into linked list */
341: row = lnk[n];
342: while (row < i) {
343: nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */
344: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
345: PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
346: nzi += nlnk;
347: row = lnk[row];
348: }
349: bi[i + 1] = bi[i] + nzi;
350: im[i] = nzi;
352: /* mark bdiag */
353: nzbd = 0;
354: nnz = nzi;
355: k = lnk[n];
356: while (nnz-- && k < i) {
357: nzbd++;
358: k = lnk[k];
359: }
360: bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
362: /* if free space is not available, make more free space */
363: if (current_space->local_remaining < nzi) {
364: /* estimated additional space needed */
365: nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi));
366: PetscCall(PetscFreeSpaceGet(nnz, ¤t_space));
367: reallocs++;
368: }
370: /* copy data into free space, then initialize lnk */
371: PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
373: bi_ptr[i] = current_space->array;
374: current_space->array += nzi;
375: current_space->local_used += nzi;
376: current_space->local_remaining -= nzi;
377: }
379: PetscCall(ISRestoreIndices(isrow, &r));
380: PetscCall(ISRestoreIndices(isicol, &ic));
382: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
383: PetscCall(PetscMalloc1(bi[n] + 1, &bj));
384: PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
385: PetscCall(PetscLLDestroy(lnk, lnkbt));
386: PetscCall(PetscFree2(bi_ptr, im));
388: /* put together the new matrix */
389: PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
390: b = (Mat_SeqAIJ *)(B)->data;
392: b->free_a = PETSC_TRUE;
393: b->free_ij = PETSC_TRUE;
394: b->singlemalloc = PETSC_FALSE;
396: PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a));
398: b->j = bj;
399: b->i = bi;
400: b->diag = bdiag;
401: b->ilen = NULL;
402: b->imax = NULL;
403: b->row = isrow;
404: b->col = iscol;
405: PetscCall(PetscObjectReference((PetscObject)isrow));
406: PetscCall(PetscObjectReference((PetscObject)iscol));
407: b->icol = isicol;
408: PetscCall(PetscMalloc1(n + 1, &b->solve_work));
410: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
411: b->maxnz = b->nz = bdiag[0] + 1;
413: B->factortype = MAT_FACTOR_LU;
414: B->info.factor_mallocs = reallocs;
415: B->info.fill_ratio_given = f;
417: if (ai[n]) {
418: B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
419: } else {
420: B->info.fill_ratio_needed = 0.0;
421: }
422: #if defined(PETSC_USE_INFO)
423: if (ai[n] != 0) {
424: PetscReal af = B->info.fill_ratio_needed;
425: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
426: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
427: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
428: PetscCall(PetscInfo(A, "for best performance.\n"));
429: } else {
430: PetscCall(PetscInfo(A, "Empty matrix\n"));
431: }
432: #endif
433: B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
434: if (a->inode.size) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
435: PetscCall(MatSeqAIJCheckInode_FactorLU(B));
436: PetscFunctionReturn(PETSC_SUCCESS);
437: }
439: /*
440: Trouble in factorization, should we dump the original matrix?
441: */
442: PetscErrorCode MatFactorDumpMatrix(Mat A)
443: {
444: PetscBool flg = PETSC_FALSE;
446: PetscFunctionBegin;
447: PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL));
448: if (flg) {
449: PetscViewer viewer;
450: char filename[PETSC_MAX_PATH_LEN];
452: PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank));
453: PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer));
454: PetscCall(MatView(A, viewer));
455: PetscCall(PetscViewerDestroy(&viewer));
456: }
457: PetscFunctionReturn(PETSC_SUCCESS);
458: }
460: PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
461: {
462: Mat C = B;
463: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
464: IS isrow = b->row, isicol = b->icol;
465: const PetscInt *r, *ic, *ics;
466: const PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag;
467: PetscInt i, j, k, nz, nzL, row, *pj;
468: const PetscInt *ajtmp, *bjtmp;
469: MatScalar *rtmp, *pc, multiplier, *pv;
470: const MatScalar *aa = a->a, *v;
471: PetscBool row_identity, col_identity;
472: FactorShiftCtx sctx;
473: const PetscInt *ddiag;
474: PetscReal rs;
475: MatScalar d;
477: PetscFunctionBegin;
478: /* MatPivotSetUp(): initialize shift context sctx */
479: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
481: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
482: ddiag = a->diag;
483: sctx.shift_top = info->zeropivot;
484: for (i = 0; i < n; i++) {
485: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
486: d = (aa)[ddiag[i]];
487: rs = -PetscAbsScalar(d) - PetscRealPart(d);
488: v = aa + ai[i];
489: nz = ai[i + 1] - ai[i];
490: for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
491: if (rs > sctx.shift_top) sctx.shift_top = rs;
492: }
493: sctx.shift_top *= 1.1;
494: sctx.nshift_max = 5;
495: sctx.shift_lo = 0.;
496: sctx.shift_hi = 1.;
497: }
499: PetscCall(ISGetIndices(isrow, &r));
500: PetscCall(ISGetIndices(isicol, &ic));
501: PetscCall(PetscMalloc1(n + 1, &rtmp));
502: ics = ic;
504: do {
505: sctx.newshift = PETSC_FALSE;
506: for (i = 0; i < n; i++) {
507: /* zero rtmp */
508: /* L part */
509: nz = bi[i + 1] - bi[i];
510: bjtmp = bj + bi[i];
511: for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
513: /* U part */
514: nz = bdiag[i] - bdiag[i + 1];
515: bjtmp = bj + bdiag[i + 1] + 1;
516: for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
518: /* load in initial (unfactored row) */
519: nz = ai[r[i] + 1] - ai[r[i]];
520: ajtmp = aj + ai[r[i]];
521: v = aa + ai[r[i]];
522: for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
523: /* ZeropivotApply() */
524: rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */
526: /* elimination */
527: bjtmp = bj + bi[i];
528: row = *bjtmp++;
529: nzL = bi[i + 1] - bi[i];
530: for (k = 0; k < nzL; k++) {
531: pc = rtmp + row;
532: if (*pc != 0.0) {
533: pv = b->a + bdiag[row];
534: multiplier = *pc * (*pv);
535: *pc = multiplier;
537: pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */
538: pv = b->a + bdiag[row + 1] + 1;
539: nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */
541: for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
542: PetscCall(PetscLogFlops(1 + 2.0 * nz));
543: }
544: row = *bjtmp++;
545: }
547: /* finished row so stick it into b->a */
548: rs = 0.0;
549: /* L part */
550: pv = b->a + bi[i];
551: pj = b->j + bi[i];
552: nz = bi[i + 1] - bi[i];
553: for (j = 0; j < nz; j++) {
554: pv[j] = rtmp[pj[j]];
555: rs += PetscAbsScalar(pv[j]);
556: }
558: /* U part */
559: pv = b->a + bdiag[i + 1] + 1;
560: pj = b->j + bdiag[i + 1] + 1;
561: nz = bdiag[i] - bdiag[i + 1] - 1;
562: for (j = 0; j < nz; j++) {
563: pv[j] = rtmp[pj[j]];
564: rs += PetscAbsScalar(pv[j]);
565: }
567: sctx.rs = rs;
568: sctx.pv = rtmp[i];
569: PetscCall(MatPivotCheck(B, A, info, &sctx, i));
570: if (sctx.newshift) break; /* break for-loop */
571: rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
573: /* Mark diagonal and invert diagonal for simpler triangular solves */
574: pv = b->a + bdiag[i];
575: *pv = 1.0 / rtmp[i];
577: } /* endof for (i=0; i<n; i++) { */
579: /* MatPivotRefine() */
580: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
581: /*
582: * if no shift in this attempt & shifting & started shifting & can refine,
583: * then try lower shift
584: */
585: sctx.shift_hi = sctx.shift_fraction;
586: sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
587: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
588: sctx.newshift = PETSC_TRUE;
589: sctx.nshift++;
590: }
591: } while (sctx.newshift);
593: PetscCall(PetscFree(rtmp));
594: PetscCall(ISRestoreIndices(isicol, &ic));
595: PetscCall(ISRestoreIndices(isrow, &r));
597: PetscCall(ISIdentity(isrow, &row_identity));
598: PetscCall(ISIdentity(isicol, &col_identity));
599: if (b->inode.size) {
600: C->ops->solve = MatSolve_SeqAIJ_Inode;
601: } else if (row_identity && col_identity) {
602: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
603: } else {
604: C->ops->solve = MatSolve_SeqAIJ;
605: }
606: C->ops->solveadd = MatSolveAdd_SeqAIJ;
607: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ;
608: C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
609: C->ops->matsolve = MatMatSolve_SeqAIJ;
610: C->assembled = PETSC_TRUE;
611: C->preallocated = PETSC_TRUE;
613: PetscCall(PetscLogFlops(C->cmap->n));
615: /* MatShiftView(A,info,&sctx) */
616: if (sctx.nshift) {
617: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
618: PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
619: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
620: PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
621: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
622: PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
623: }
624: }
625: PetscFunctionReturn(PETSC_SUCCESS);
626: }
628: static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat, Mat, Mat);
629: static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat, Vec, Vec);
630: static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat, Vec, Vec, Vec);
632: PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
633: {
634: Mat C = B;
635: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
636: IS isrow = b->row, isicol = b->icol;
637: const PetscInt *r, *ic, *ics;
638: PetscInt nz, row, i, j, n = A->rmap->n, diag;
639: const PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
640: const PetscInt *ajtmp, *bjtmp, *diag_offset = b->diag, *pj;
641: MatScalar *pv, *rtmp, *pc, multiplier, d;
642: const MatScalar *v, *aa = a->a;
643: PetscReal rs = 0.0;
644: FactorShiftCtx sctx;
645: const PetscInt *ddiag;
646: PetscBool row_identity, col_identity;
648: PetscFunctionBegin;
649: /* MatPivotSetUp(): initialize shift context sctx */
650: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
652: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
653: ddiag = a->diag;
654: sctx.shift_top = info->zeropivot;
655: for (i = 0; i < n; i++) {
656: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
657: d = (aa)[ddiag[i]];
658: rs = -PetscAbsScalar(d) - PetscRealPart(d);
659: v = aa + ai[i];
660: nz = ai[i + 1] - ai[i];
661: for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
662: if (rs > sctx.shift_top) sctx.shift_top = rs;
663: }
664: sctx.shift_top *= 1.1;
665: sctx.nshift_max = 5;
666: sctx.shift_lo = 0.;
667: sctx.shift_hi = 1.;
668: }
670: PetscCall(ISGetIndices(isrow, &r));
671: PetscCall(ISGetIndices(isicol, &ic));
672: PetscCall(PetscMalloc1(n + 1, &rtmp));
673: ics = ic;
675: do {
676: sctx.newshift = PETSC_FALSE;
677: for (i = 0; i < n; i++) {
678: nz = bi[i + 1] - bi[i];
679: bjtmp = bj + bi[i];
680: for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
682: /* load in initial (unfactored row) */
683: nz = ai[r[i] + 1] - ai[r[i]];
684: ajtmp = aj + ai[r[i]];
685: v = aa + ai[r[i]];
686: for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
687: rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
689: row = *bjtmp++;
690: while (row < i) {
691: pc = rtmp + row;
692: if (*pc != 0.0) {
693: pv = b->a + diag_offset[row];
694: pj = b->j + diag_offset[row] + 1;
695: multiplier = *pc / *pv++;
696: *pc = multiplier;
697: nz = bi[row + 1] - diag_offset[row] - 1;
698: for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
699: PetscCall(PetscLogFlops(1 + 2.0 * nz));
700: }
701: row = *bjtmp++;
702: }
703: /* finished row so stick it into b->a */
704: pv = b->a + bi[i];
705: pj = b->j + bi[i];
706: nz = bi[i + 1] - bi[i];
707: diag = diag_offset[i] - bi[i];
708: rs = 0.0;
709: for (j = 0; j < nz; j++) {
710: pv[j] = rtmp[pj[j]];
711: rs += PetscAbsScalar(pv[j]);
712: }
713: rs -= PetscAbsScalar(pv[diag]);
715: sctx.rs = rs;
716: sctx.pv = pv[diag];
717: PetscCall(MatPivotCheck(B, A, info, &sctx, i));
718: if (sctx.newshift) break;
719: pv[diag] = sctx.pv;
720: }
722: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
723: /*
724: * if no shift in this attempt & shifting & started shifting & can refine,
725: * then try lower shift
726: */
727: sctx.shift_hi = sctx.shift_fraction;
728: sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
729: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
730: sctx.newshift = PETSC_TRUE;
731: sctx.nshift++;
732: }
733: } while (sctx.newshift);
735: /* invert diagonal entries for simpler triangular solves */
736: for (i = 0; i < n; i++) b->a[diag_offset[i]] = 1.0 / b->a[diag_offset[i]];
737: PetscCall(PetscFree(rtmp));
738: PetscCall(ISRestoreIndices(isicol, &ic));
739: PetscCall(ISRestoreIndices(isrow, &r));
741: PetscCall(ISIdentity(isrow, &row_identity));
742: PetscCall(ISIdentity(isicol, &col_identity));
743: if (row_identity && col_identity) {
744: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
745: } else {
746: C->ops->solve = MatSolve_SeqAIJ_inplace;
747: }
748: C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace;
749: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
750: C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
751: C->ops->matsolve = MatMatSolve_SeqAIJ_inplace;
753: C->assembled = PETSC_TRUE;
754: C->preallocated = PETSC_TRUE;
756: PetscCall(PetscLogFlops(C->cmap->n));
757: if (sctx.nshift) {
758: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
759: PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
760: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
761: PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
762: }
763: }
764: (C)->ops->solve = MatSolve_SeqAIJ_inplace;
765: (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
767: PetscCall(MatSeqAIJCheckInode(C));
768: PetscFunctionReturn(PETSC_SUCCESS);
769: }
771: static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat, Vec, Vec);
773: /*
774: This routine implements inplace ILU(0) with row or/and column permutations.
775: Input:
776: A - original matrix
777: Output;
778: A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
779: a->j (col index) is permuted by the inverse of colperm, then sorted
780: a->a reordered accordingly with a->j
781: a->diag (ptr to diagonal elements) is updated.
782: */
783: PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info)
784: {
785: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
786: IS isrow = a->row, isicol = a->icol;
787: const PetscInt *r, *ic, *ics;
788: PetscInt i, j, n = A->rmap->n, *ai = a->i, *aj = a->j;
789: PetscInt *ajtmp, nz, row;
790: PetscInt *diag = a->diag, nbdiag, *pj;
791: PetscScalar *rtmp, *pc, multiplier, d;
792: MatScalar *pv, *v;
793: PetscReal rs;
794: FactorShiftCtx sctx;
795: const MatScalar *aa = a->a, *vtmp;
797: PetscFunctionBegin;
798: PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address");
800: /* MatPivotSetUp(): initialize shift context sctx */
801: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
803: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
804: const PetscInt *ddiag = a->diag;
805: sctx.shift_top = info->zeropivot;
806: for (i = 0; i < n; i++) {
807: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
808: d = (aa)[ddiag[i]];
809: rs = -PetscAbsScalar(d) - PetscRealPart(d);
810: vtmp = aa + ai[i];
811: nz = ai[i + 1] - ai[i];
812: for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]);
813: if (rs > sctx.shift_top) sctx.shift_top = rs;
814: }
815: sctx.shift_top *= 1.1;
816: sctx.nshift_max = 5;
817: sctx.shift_lo = 0.;
818: sctx.shift_hi = 1.;
819: }
821: PetscCall(ISGetIndices(isrow, &r));
822: PetscCall(ISGetIndices(isicol, &ic));
823: PetscCall(PetscMalloc1(n + 1, &rtmp));
824: PetscCall(PetscArrayzero(rtmp, n + 1));
825: ics = ic;
827: #if defined(MV)
828: sctx.shift_top = 0.;
829: sctx.nshift_max = 0;
830: sctx.shift_lo = 0.;
831: sctx.shift_hi = 0.;
832: sctx.shift_fraction = 0.;
834: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
835: sctx.shift_top = 0.;
836: for (i = 0; i < n; i++) {
837: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
838: d = (a->a)[diag[i]];
839: rs = -PetscAbsScalar(d) - PetscRealPart(d);
840: v = a->a + ai[i];
841: nz = ai[i + 1] - ai[i];
842: for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
843: if (rs > sctx.shift_top) sctx.shift_top = rs;
844: }
845: if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
846: sctx.shift_top *= 1.1;
847: sctx.nshift_max = 5;
848: sctx.shift_lo = 0.;
849: sctx.shift_hi = 1.;
850: }
852: sctx.shift_amount = 0.;
853: sctx.nshift = 0;
854: #endif
856: do {
857: sctx.newshift = PETSC_FALSE;
858: for (i = 0; i < n; i++) {
859: /* load in initial unfactored row */
860: nz = ai[r[i] + 1] - ai[r[i]];
861: ajtmp = aj + ai[r[i]];
862: v = a->a + ai[r[i]];
863: /* sort permuted ajtmp and values v accordingly */
864: for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]];
865: PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v));
867: diag[r[i]] = ai[r[i]];
868: for (j = 0; j < nz; j++) {
869: rtmp[ajtmp[j]] = v[j];
870: if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
871: }
872: rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
874: row = *ajtmp++;
875: while (row < i) {
876: pc = rtmp + row;
877: if (*pc != 0.0) {
878: pv = a->a + diag[r[row]];
879: pj = aj + diag[r[row]] + 1;
881: multiplier = *pc / *pv++;
882: *pc = multiplier;
883: nz = ai[r[row] + 1] - diag[r[row]] - 1;
884: for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
885: PetscCall(PetscLogFlops(1 + 2.0 * nz));
886: }
887: row = *ajtmp++;
888: }
889: /* finished row so overwrite it onto a->a */
890: pv = a->a + ai[r[i]];
891: pj = aj + ai[r[i]];
892: nz = ai[r[i] + 1] - ai[r[i]];
893: nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
895: rs = 0.0;
896: for (j = 0; j < nz; j++) {
897: pv[j] = rtmp[pj[j]];
898: if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
899: }
901: sctx.rs = rs;
902: sctx.pv = pv[nbdiag];
903: PetscCall(MatPivotCheck(B, A, info, &sctx, i));
904: if (sctx.newshift) break;
905: pv[nbdiag] = sctx.pv;
906: }
908: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
909: /*
910: * if no shift in this attempt & shifting & started shifting & can refine,
911: * then try lower shift
912: */
913: sctx.shift_hi = sctx.shift_fraction;
914: sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
915: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
916: sctx.newshift = PETSC_TRUE;
917: sctx.nshift++;
918: }
919: } while (sctx.newshift);
921: /* invert diagonal entries for simpler triangular solves */
922: for (i = 0; i < n; i++) a->a[diag[r[i]]] = 1.0 / a->a[diag[r[i]]];
924: PetscCall(PetscFree(rtmp));
925: PetscCall(ISRestoreIndices(isicol, &ic));
926: PetscCall(ISRestoreIndices(isrow, &r));
928: A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm;
929: A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace;
930: A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
931: A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
933: A->assembled = PETSC_TRUE;
934: A->preallocated = PETSC_TRUE;
936: PetscCall(PetscLogFlops(A->cmap->n));
937: if (sctx.nshift) {
938: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
939: PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
940: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
941: PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
942: }
943: }
944: PetscFunctionReturn(PETSC_SUCCESS);
945: }
947: PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info)
948: {
949: Mat C;
951: PetscFunctionBegin;
952: PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C));
953: PetscCall(MatLUFactorSymbolic(C, A, row, col, info));
954: PetscCall(MatLUFactorNumeric(C, A, info));
956: A->ops->solve = C->ops->solve;
957: A->ops->solvetranspose = C->ops->solvetranspose;
959: PetscCall(MatHeaderMerge(A, &C));
960: PetscFunctionReturn(PETSC_SUCCESS);
961: }
963: PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
964: {
965: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
966: IS iscol = a->col, isrow = a->row;
967: PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
968: PetscInt nz;
969: const PetscInt *rout, *cout, *r, *c;
970: PetscScalar *x, *tmp, *tmps, sum;
971: const PetscScalar *b;
972: const MatScalar *aa = a->a, *v;
974: PetscFunctionBegin;
975: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
977: PetscCall(VecGetArrayRead(bb, &b));
978: PetscCall(VecGetArrayWrite(xx, &x));
979: tmp = a->solve_work;
981: PetscCall(ISGetIndices(isrow, &rout));
982: r = rout;
983: PetscCall(ISGetIndices(iscol, &cout));
984: c = cout + (n - 1);
986: /* forward solve the lower triangular */
987: tmp[0] = b[*r++];
988: tmps = tmp;
989: for (i = 1; i < n; i++) {
990: v = aa + ai[i];
991: vi = aj + ai[i];
992: nz = a->diag[i] - ai[i];
993: sum = b[*r++];
994: PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
995: tmp[i] = sum;
996: }
998: /* backward solve the upper triangular */
999: for (i = n - 1; i >= 0; i--) {
1000: v = aa + a->diag[i] + 1;
1001: vi = aj + a->diag[i] + 1;
1002: nz = ai[i + 1] - a->diag[i] - 1;
1003: sum = tmp[i];
1004: PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1005: x[*c--] = tmp[i] = sum * aa[a->diag[i]];
1006: }
1008: PetscCall(ISRestoreIndices(isrow, &rout));
1009: PetscCall(ISRestoreIndices(iscol, &cout));
1010: PetscCall(VecRestoreArrayRead(bb, &b));
1011: PetscCall(VecRestoreArrayWrite(xx, &x));
1012: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1013: PetscFunctionReturn(PETSC_SUCCESS);
1014: }
1016: static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X)
1017: {
1018: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1019: IS iscol = a->col, isrow = a->row;
1020: PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
1021: PetscInt nz, neq, ldb, ldx;
1022: const PetscInt *rout, *cout, *r, *c;
1023: PetscScalar *x, *tmp = a->solve_work, *tmps, sum;
1024: const PetscScalar *b, *aa = a->a, *v;
1025: PetscBool isdense;
1027: PetscFunctionBegin;
1028: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1029: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1030: PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1031: if (X != B) {
1032: PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1033: PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1034: }
1035: PetscCall(MatDenseGetArrayRead(B, &b));
1036: PetscCall(MatDenseGetLDA(B, &ldb));
1037: PetscCall(MatDenseGetArray(X, &x));
1038: PetscCall(MatDenseGetLDA(X, &ldx));
1039: PetscCall(ISGetIndices(isrow, &rout));
1040: r = rout;
1041: PetscCall(ISGetIndices(iscol, &cout));
1042: c = cout;
1043: for (neq = 0; neq < B->cmap->n; neq++) {
1044: /* forward solve the lower triangular */
1045: tmp[0] = b[r[0]];
1046: tmps = tmp;
1047: for (i = 1; i < n; i++) {
1048: v = aa + ai[i];
1049: vi = aj + ai[i];
1050: nz = a->diag[i] - ai[i];
1051: sum = b[r[i]];
1052: PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1053: tmp[i] = sum;
1054: }
1055: /* backward solve the upper triangular */
1056: for (i = n - 1; i >= 0; i--) {
1057: v = aa + a->diag[i] + 1;
1058: vi = aj + a->diag[i] + 1;
1059: nz = ai[i + 1] - a->diag[i] - 1;
1060: sum = tmp[i];
1061: PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1062: x[c[i]] = tmp[i] = sum * aa[a->diag[i]];
1063: }
1064: b += ldb;
1065: x += ldx;
1066: }
1067: PetscCall(ISRestoreIndices(isrow, &rout));
1068: PetscCall(ISRestoreIndices(iscol, &cout));
1069: PetscCall(MatDenseRestoreArrayRead(B, &b));
1070: PetscCall(MatDenseRestoreArray(X, &x));
1071: PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1072: PetscFunctionReturn(PETSC_SUCCESS);
1073: }
1075: PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X)
1076: {
1077: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1078: IS iscol = a->col, isrow = a->row;
1079: PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
1080: PetscInt nz, neq, ldb, ldx;
1081: const PetscInt *rout, *cout, *r, *c;
1082: PetscScalar *x, *tmp = a->solve_work, sum;
1083: const PetscScalar *b, *aa = a->a, *v;
1084: PetscBool isdense;
1086: PetscFunctionBegin;
1087: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1088: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1089: PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1090: if (X != B) {
1091: PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1092: PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1093: }
1094: PetscCall(MatDenseGetArrayRead(B, &b));
1095: PetscCall(MatDenseGetLDA(B, &ldb));
1096: PetscCall(MatDenseGetArray(X, &x));
1097: PetscCall(MatDenseGetLDA(X, &ldx));
1098: PetscCall(ISGetIndices(isrow, &rout));
1099: r = rout;
1100: PetscCall(ISGetIndices(iscol, &cout));
1101: c = cout;
1102: for (neq = 0; neq < B->cmap->n; neq++) {
1103: /* forward solve the lower triangular */
1104: tmp[0] = b[r[0]];
1105: v = aa;
1106: vi = aj;
1107: for (i = 1; i < n; i++) {
1108: nz = ai[i + 1] - ai[i];
1109: sum = b[r[i]];
1110: PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
1111: tmp[i] = sum;
1112: v += nz;
1113: vi += nz;
1114: }
1115: /* backward solve the upper triangular */
1116: for (i = n - 1; i >= 0; i--) {
1117: v = aa + adiag[i + 1] + 1;
1118: vi = aj + adiag[i + 1] + 1;
1119: nz = adiag[i] - adiag[i + 1] - 1;
1120: sum = tmp[i];
1121: PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
1122: x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
1123: }
1124: b += ldb;
1125: x += ldx;
1126: }
1127: PetscCall(ISRestoreIndices(isrow, &rout));
1128: PetscCall(ISRestoreIndices(iscol, &cout));
1129: PetscCall(MatDenseRestoreArrayRead(B, &b));
1130: PetscCall(MatDenseRestoreArray(X, &x));
1131: PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1132: PetscFunctionReturn(PETSC_SUCCESS);
1133: }
1135: static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx)
1136: {
1137: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1138: IS iscol = a->col, isrow = a->row;
1139: const PetscInt *r, *c, *rout, *cout;
1140: PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
1141: PetscInt nz, row;
1142: PetscScalar *x, *tmp, *tmps, sum;
1143: const PetscScalar *b;
1144: const MatScalar *aa = a->a, *v;
1146: PetscFunctionBegin;
1147: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1149: PetscCall(VecGetArrayRead(bb, &b));
1150: PetscCall(VecGetArrayWrite(xx, &x));
1151: tmp = a->solve_work;
1153: PetscCall(ISGetIndices(isrow, &rout));
1154: r = rout;
1155: PetscCall(ISGetIndices(iscol, &cout));
1156: c = cout + (n - 1);
1158: /* forward solve the lower triangular */
1159: tmp[0] = b[*r++];
1160: tmps = tmp;
1161: for (row = 1; row < n; row++) {
1162: i = rout[row]; /* permuted row */
1163: v = aa + ai[i];
1164: vi = aj + ai[i];
1165: nz = a->diag[i] - ai[i];
1166: sum = b[*r++];
1167: PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1168: tmp[row] = sum;
1169: }
1171: /* backward solve the upper triangular */
1172: for (row = n - 1; row >= 0; row--) {
1173: i = rout[row]; /* permuted row */
1174: v = aa + a->diag[i] + 1;
1175: vi = aj + a->diag[i] + 1;
1176: nz = ai[i + 1] - a->diag[i] - 1;
1177: sum = tmp[row];
1178: PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1179: x[*c--] = tmp[row] = sum * aa[a->diag[i]];
1180: }
1182: PetscCall(ISRestoreIndices(isrow, &rout));
1183: PetscCall(ISRestoreIndices(iscol, &cout));
1184: PetscCall(VecRestoreArrayRead(bb, &b));
1185: PetscCall(VecRestoreArrayWrite(xx, &x));
1186: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1187: PetscFunctionReturn(PETSC_SUCCESS);
1188: }
1190: #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1191: static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx)
1192: {
1193: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1194: PetscInt n = A->rmap->n;
1195: const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag;
1196: PetscScalar *x;
1197: const PetscScalar *b;
1198: const MatScalar *aa = a->a;
1199: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1200: PetscInt adiag_i, i, nz, ai_i;
1201: const PetscInt *vi;
1202: const MatScalar *v;
1203: PetscScalar sum;
1204: #endif
1206: PetscFunctionBegin;
1207: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1209: PetscCall(VecGetArrayRead(bb, &b));
1210: PetscCall(VecGetArrayWrite(xx, &x));
1212: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1213: fortransolveaij_(&n, x, ai, aj, adiag, aa, b);
1214: #else
1215: /* forward solve the lower triangular */
1216: x[0] = b[0];
1217: for (i = 1; i < n; i++) {
1218: ai_i = ai[i];
1219: v = aa + ai_i;
1220: vi = aj + ai_i;
1221: nz = adiag[i] - ai_i;
1222: sum = b[i];
1223: PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1224: x[i] = sum;
1225: }
1227: /* backward solve the upper triangular */
1228: for (i = n - 1; i >= 0; i--) {
1229: adiag_i = adiag[i];
1230: v = aa + adiag_i + 1;
1231: vi = aj + adiag_i + 1;
1232: nz = ai[i + 1] - adiag_i - 1;
1233: sum = x[i];
1234: PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1235: x[i] = sum * aa[adiag_i];
1236: }
1237: #endif
1238: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1239: PetscCall(VecRestoreArrayRead(bb, &b));
1240: PetscCall(VecRestoreArrayWrite(xx, &x));
1241: PetscFunctionReturn(PETSC_SUCCESS);
1242: }
1244: static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx)
1245: {
1246: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1247: IS iscol = a->col, isrow = a->row;
1248: PetscInt i, n = A->rmap->n, j;
1249: PetscInt nz;
1250: const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j;
1251: PetscScalar *x, *tmp, sum;
1252: const PetscScalar *b;
1253: const MatScalar *aa = a->a, *v;
1255: PetscFunctionBegin;
1256: if (yy != xx) PetscCall(VecCopy(yy, xx));
1258: PetscCall(VecGetArrayRead(bb, &b));
1259: PetscCall(VecGetArray(xx, &x));
1260: tmp = a->solve_work;
1262: PetscCall(ISGetIndices(isrow, &rout));
1263: r = rout;
1264: PetscCall(ISGetIndices(iscol, &cout));
1265: c = cout + (n - 1);
1267: /* forward solve the lower triangular */
1268: tmp[0] = b[*r++];
1269: for (i = 1; i < n; i++) {
1270: v = aa + ai[i];
1271: vi = aj + ai[i];
1272: nz = a->diag[i] - ai[i];
1273: sum = b[*r++];
1274: for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1275: tmp[i] = sum;
1276: }
1278: /* backward solve the upper triangular */
1279: for (i = n - 1; i >= 0; i--) {
1280: v = aa + a->diag[i] + 1;
1281: vi = aj + a->diag[i] + 1;
1282: nz = ai[i + 1] - a->diag[i] - 1;
1283: sum = tmp[i];
1284: for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1285: tmp[i] = sum * aa[a->diag[i]];
1286: x[*c--] += tmp[i];
1287: }
1289: PetscCall(ISRestoreIndices(isrow, &rout));
1290: PetscCall(ISRestoreIndices(iscol, &cout));
1291: PetscCall(VecRestoreArrayRead(bb, &b));
1292: PetscCall(VecRestoreArray(xx, &x));
1293: PetscCall(PetscLogFlops(2.0 * a->nz));
1294: PetscFunctionReturn(PETSC_SUCCESS);
1295: }
1297: PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx)
1298: {
1299: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1300: IS iscol = a->col, isrow = a->row;
1301: PetscInt i, n = A->rmap->n, j;
1302: PetscInt nz;
1303: const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
1304: PetscScalar *x, *tmp, sum;
1305: const PetscScalar *b;
1306: const MatScalar *aa = a->a, *v;
1308: PetscFunctionBegin;
1309: if (yy != xx) PetscCall(VecCopy(yy, xx));
1311: PetscCall(VecGetArrayRead(bb, &b));
1312: PetscCall(VecGetArray(xx, &x));
1313: tmp = a->solve_work;
1315: PetscCall(ISGetIndices(isrow, &rout));
1316: r = rout;
1317: PetscCall(ISGetIndices(iscol, &cout));
1318: c = cout;
1320: /* forward solve the lower triangular */
1321: tmp[0] = b[r[0]];
1322: v = aa;
1323: vi = aj;
1324: for (i = 1; i < n; i++) {
1325: nz = ai[i + 1] - ai[i];
1326: sum = b[r[i]];
1327: for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1328: tmp[i] = sum;
1329: v += nz;
1330: vi += nz;
1331: }
1333: /* backward solve the upper triangular */
1334: v = aa + adiag[n - 1];
1335: vi = aj + adiag[n - 1];
1336: for (i = n - 1; i >= 0; i--) {
1337: nz = adiag[i] - adiag[i + 1] - 1;
1338: sum = tmp[i];
1339: for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1340: tmp[i] = sum * v[nz];
1341: x[c[i]] += tmp[i];
1342: v += nz + 1;
1343: vi += nz + 1;
1344: }
1346: PetscCall(ISRestoreIndices(isrow, &rout));
1347: PetscCall(ISRestoreIndices(iscol, &cout));
1348: PetscCall(VecRestoreArrayRead(bb, &b));
1349: PetscCall(VecRestoreArray(xx, &x));
1350: PetscCall(PetscLogFlops(2.0 * a->nz));
1351: PetscFunctionReturn(PETSC_SUCCESS);
1352: }
1354: PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
1355: {
1356: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1357: IS iscol = a->col, isrow = a->row;
1358: const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1359: PetscInt i, n = A->rmap->n, j;
1360: PetscInt nz;
1361: PetscScalar *x, *tmp, s1;
1362: const MatScalar *aa = a->a, *v;
1363: const PetscScalar *b;
1365: PetscFunctionBegin;
1366: PetscCall(VecGetArrayRead(bb, &b));
1367: PetscCall(VecGetArrayWrite(xx, &x));
1368: tmp = a->solve_work;
1370: PetscCall(ISGetIndices(isrow, &rout));
1371: r = rout;
1372: PetscCall(ISGetIndices(iscol, &cout));
1373: c = cout;
1375: /* copy the b into temp work space according to permutation */
1376: for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1378: /* forward solve the U^T */
1379: for (i = 0; i < n; i++) {
1380: v = aa + diag[i];
1381: vi = aj + diag[i] + 1;
1382: nz = ai[i + 1] - diag[i] - 1;
1383: s1 = tmp[i];
1384: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1385: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1386: tmp[i] = s1;
1387: }
1389: /* backward solve the L^T */
1390: for (i = n - 1; i >= 0; i--) {
1391: v = aa + diag[i] - 1;
1392: vi = aj + diag[i] - 1;
1393: nz = diag[i] - ai[i];
1394: s1 = tmp[i];
1395: for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1396: }
1398: /* copy tmp into x according to permutation */
1399: for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1401: PetscCall(ISRestoreIndices(isrow, &rout));
1402: PetscCall(ISRestoreIndices(iscol, &cout));
1403: PetscCall(VecRestoreArrayRead(bb, &b));
1404: PetscCall(VecRestoreArrayWrite(xx, &x));
1406: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1407: PetscFunctionReturn(PETSC_SUCCESS);
1408: }
1410: PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx)
1411: {
1412: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1413: IS iscol = a->col, isrow = a->row;
1414: const PetscInt *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1415: PetscInt i, n = A->rmap->n, j;
1416: PetscInt nz;
1417: PetscScalar *x, *tmp, s1;
1418: const MatScalar *aa = a->a, *v;
1419: const PetscScalar *b;
1421: PetscFunctionBegin;
1422: PetscCall(VecGetArrayRead(bb, &b));
1423: PetscCall(VecGetArrayWrite(xx, &x));
1424: tmp = a->solve_work;
1426: PetscCall(ISGetIndices(isrow, &rout));
1427: r = rout;
1428: PetscCall(ISGetIndices(iscol, &cout));
1429: c = cout;
1431: /* copy the b into temp work space according to permutation */
1432: for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1434: /* forward solve the U^T */
1435: for (i = 0; i < n; i++) {
1436: v = aa + adiag[i + 1] + 1;
1437: vi = aj + adiag[i + 1] + 1;
1438: nz = adiag[i] - adiag[i + 1] - 1;
1439: s1 = tmp[i];
1440: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1441: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1442: tmp[i] = s1;
1443: }
1445: /* backward solve the L^T */
1446: for (i = n - 1; i >= 0; i--) {
1447: v = aa + ai[i];
1448: vi = aj + ai[i];
1449: nz = ai[i + 1] - ai[i];
1450: s1 = tmp[i];
1451: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1452: }
1454: /* copy tmp into x according to permutation */
1455: for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1457: PetscCall(ISRestoreIndices(isrow, &rout));
1458: PetscCall(ISRestoreIndices(iscol, &cout));
1459: PetscCall(VecRestoreArrayRead(bb, &b));
1460: PetscCall(VecRestoreArrayWrite(xx, &x));
1462: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1463: PetscFunctionReturn(PETSC_SUCCESS);
1464: }
1466: PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx)
1467: {
1468: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1469: IS iscol = a->col, isrow = a->row;
1470: const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1471: PetscInt i, n = A->rmap->n, j;
1472: PetscInt nz;
1473: PetscScalar *x, *tmp, s1;
1474: const MatScalar *aa = a->a, *v;
1475: const PetscScalar *b;
1477: PetscFunctionBegin;
1478: if (zz != xx) PetscCall(VecCopy(zz, xx));
1479: PetscCall(VecGetArrayRead(bb, &b));
1480: PetscCall(VecGetArray(xx, &x));
1481: tmp = a->solve_work;
1483: PetscCall(ISGetIndices(isrow, &rout));
1484: r = rout;
1485: PetscCall(ISGetIndices(iscol, &cout));
1486: c = cout;
1488: /* copy the b into temp work space according to permutation */
1489: for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1491: /* forward solve the U^T */
1492: for (i = 0; i < n; i++) {
1493: v = aa + diag[i];
1494: vi = aj + diag[i] + 1;
1495: nz = ai[i + 1] - diag[i] - 1;
1496: s1 = tmp[i];
1497: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1498: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1499: tmp[i] = s1;
1500: }
1502: /* backward solve the L^T */
1503: for (i = n - 1; i >= 0; i--) {
1504: v = aa + diag[i] - 1;
1505: vi = aj + diag[i] - 1;
1506: nz = diag[i] - ai[i];
1507: s1 = tmp[i];
1508: for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1509: }
1511: /* copy tmp into x according to permutation */
1512: for (i = 0; i < n; i++) x[r[i]] += tmp[i];
1514: PetscCall(ISRestoreIndices(isrow, &rout));
1515: PetscCall(ISRestoreIndices(iscol, &cout));
1516: PetscCall(VecRestoreArrayRead(bb, &b));
1517: PetscCall(VecRestoreArray(xx, &x));
1519: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1520: PetscFunctionReturn(PETSC_SUCCESS);
1521: }
1523: PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx)
1524: {
1525: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1526: IS iscol = a->col, isrow = a->row;
1527: const PetscInt *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1528: PetscInt i, n = A->rmap->n, j;
1529: PetscInt nz;
1530: PetscScalar *x, *tmp, s1;
1531: const MatScalar *aa = a->a, *v;
1532: const PetscScalar *b;
1534: PetscFunctionBegin;
1535: if (zz != xx) PetscCall(VecCopy(zz, xx));
1536: PetscCall(VecGetArrayRead(bb, &b));
1537: PetscCall(VecGetArray(xx, &x));
1538: tmp = a->solve_work;
1540: PetscCall(ISGetIndices(isrow, &rout));
1541: r = rout;
1542: PetscCall(ISGetIndices(iscol, &cout));
1543: c = cout;
1545: /* copy the b into temp work space according to permutation */
1546: for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1548: /* forward solve the U^T */
1549: for (i = 0; i < n; i++) {
1550: v = aa + adiag[i + 1] + 1;
1551: vi = aj + adiag[i + 1] + 1;
1552: nz = adiag[i] - adiag[i + 1] - 1;
1553: s1 = tmp[i];
1554: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1555: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1556: tmp[i] = s1;
1557: }
1559: /* backward solve the L^T */
1560: for (i = n - 1; i >= 0; i--) {
1561: v = aa + ai[i];
1562: vi = aj + ai[i];
1563: nz = ai[i + 1] - ai[i];
1564: s1 = tmp[i];
1565: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1566: }
1568: /* copy tmp into x according to permutation */
1569: for (i = 0; i < n; i++) x[r[i]] += tmp[i];
1571: PetscCall(ISRestoreIndices(isrow, &rout));
1572: PetscCall(ISRestoreIndices(iscol, &cout));
1573: PetscCall(VecRestoreArrayRead(bb, &b));
1574: PetscCall(VecRestoreArray(xx, &x));
1576: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1577: PetscFunctionReturn(PETSC_SUCCESS);
1578: }
1580: /*
1581: ilu() under revised new data structure.
1582: Factored arrays bj and ba are stored as
1583: L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1585: bi=fact->i is an array of size n+1, in which
1586: bi[i]: points to 1st entry of L(i,:),i=0,...,n-1
1587: bi[n]: points to L(n-1,n-1)+1
1589: bdiag=fact->diag is an array of size n+1,in which
1590: bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1591: bdiag[n]: points to entry of U(n-1,0)-1
1593: U(i,:) contains bdiag[i] as its last entry, i.e.,
1594: U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1595: */
1596: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1597: {
1598: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b;
1599: const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag = a->diag;
1600: PetscInt i, j, k = 0, nz, *bi, *bj, *bdiag;
1601: IS isicol;
1603: PetscFunctionBegin;
1604: PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1605: PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE));
1606: b = (Mat_SeqAIJ *)(fact)->data;
1608: /* allocate matrix arrays for new data structure */
1609: PetscCall(PetscMalloc3(ai[n] + 1, &b->a, ai[n] + 1, &b->j, n + 1, &b->i));
1611: b->singlemalloc = PETSC_TRUE;
1612: if (!b->diag) PetscCall(PetscMalloc1(n + 1, &b->diag));
1613: bdiag = b->diag;
1615: if (n > 0) PetscCall(PetscArrayzero(b->a, ai[n]));
1617: /* set bi and bj with new data structure */
1618: bi = b->i;
1619: bj = b->j;
1621: /* L part */
1622: bi[0] = 0;
1623: for (i = 0; i < n; i++) {
1624: nz = adiag[i] - ai[i];
1625: bi[i + 1] = bi[i] + nz;
1626: aj = a->j + ai[i];
1627: for (j = 0; j < nz; j++) bj[k++] = aj[j];
1628: }
1630: /* U part */
1631: bdiag[n] = bi[n] - 1;
1632: for (i = n - 1; i >= 0; i--) {
1633: nz = ai[i + 1] - adiag[i] - 1;
1634: aj = a->j + adiag[i] + 1;
1635: for (j = 0; j < nz; j++) bj[k++] = aj[j];
1636: /* diag[i] */
1637: bj[k++] = i;
1638: bdiag[i] = bdiag[i + 1] + nz + 1;
1639: }
1641: fact->factortype = MAT_FACTOR_ILU;
1642: fact->info.factor_mallocs = 0;
1643: fact->info.fill_ratio_given = info->fill;
1644: fact->info.fill_ratio_needed = 1.0;
1645: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1646: PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1648: b = (Mat_SeqAIJ *)(fact)->data;
1649: b->row = isrow;
1650: b->col = iscol;
1651: b->icol = isicol;
1652: PetscCall(PetscMalloc1(fact->rmap->n + 1, &b->solve_work));
1653: PetscCall(PetscObjectReference((PetscObject)isrow));
1654: PetscCall(PetscObjectReference((PetscObject)iscol));
1655: PetscFunctionReturn(PETSC_SUCCESS);
1656: }
1658: PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1659: {
1660: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b;
1661: IS isicol;
1662: const PetscInt *r, *ic;
1663: PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j;
1664: PetscInt *bi, *cols, nnz, *cols_lvl;
1665: PetscInt *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1666: PetscInt i, levels, diagonal_fill;
1667: PetscBool col_identity, row_identity, missing;
1668: PetscReal f;
1669: PetscInt nlnk, *lnk, *lnk_lvl = NULL;
1670: PetscBT lnkbt;
1671: PetscInt nzi, *bj, **bj_ptr, **bjlvl_ptr;
1672: PetscFreeSpaceList free_space = NULL, current_space = NULL;
1673: PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1675: PetscFunctionBegin;
1676: PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
1677: PetscCall(MatMissingDiagonal(A, &missing, &i));
1678: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
1680: levels = (PetscInt)info->levels;
1681: PetscCall(ISIdentity(isrow, &row_identity));
1682: PetscCall(ISIdentity(iscol, &col_identity));
1683: if (!levels && row_identity && col_identity) {
1684: /* special case: ilu(0) with natural ordering */
1685: PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info));
1686: if (a->inode.size) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1687: PetscFunctionReturn(PETSC_SUCCESS);
1688: }
1690: PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1691: PetscCall(ISGetIndices(isrow, &r));
1692: PetscCall(ISGetIndices(isicol, &ic));
1694: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1695: PetscCall(PetscMalloc1(n + 1, &bi));
1696: PetscCall(PetscMalloc1(n + 1, &bdiag));
1697: bi[0] = bdiag[0] = 0;
1698: PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));
1700: /* create a linked list for storing column indices of the active row */
1701: nlnk = n + 1;
1702: PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));
1704: /* initial FreeSpace size is f*(ai[n]+1) */
1705: f = info->fill;
1706: diagonal_fill = (PetscInt)info->diagonal_fill;
1707: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1708: current_space = free_space;
1709: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1710: current_space_lvl = free_space_lvl;
1711: for (i = 0; i < n; i++) {
1712: nzi = 0;
1713: /* copy current row into linked list */
1714: nnz = ai[r[i] + 1] - ai[r[i]];
1715: PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1716: cols = aj + ai[r[i]];
1717: lnk[i] = -1; /* marker to indicate if diagonal exists */
1718: PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1719: nzi += nlnk;
1721: /* make sure diagonal entry is included */
1722: if (diagonal_fill && lnk[i] == -1) {
1723: fm = n;
1724: while (lnk[fm] < i) fm = lnk[fm];
1725: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
1726: lnk[fm] = i;
1727: lnk_lvl[i] = 0;
1728: nzi++;
1729: dcount++;
1730: }
1732: /* add pivot rows into the active row */
1733: nzbd = 0;
1734: prow = lnk[n];
1735: while (prow < i) {
1736: nnz = bdiag[prow];
1737: cols = bj_ptr[prow] + nnz + 1;
1738: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1739: nnz = bi[prow + 1] - bi[prow] - nnz - 1;
1740: PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1741: nzi += nlnk;
1742: prow = lnk[prow];
1743: nzbd++;
1744: }
1745: bdiag[i] = nzbd;
1746: bi[i + 1] = bi[i] + nzi;
1747: /* if free space is not available, make more free space */
1748: if (current_space->local_remaining < nzi) {
1749: nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */
1750: PetscCall(PetscFreeSpaceGet(nnz, ¤t_space));
1751: PetscCall(PetscFreeSpaceGet(nnz, ¤t_space_lvl));
1752: reallocs++;
1753: }
1755: /* copy data into free_space and free_space_lvl, then initialize lnk */
1756: PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
1757: bj_ptr[i] = current_space->array;
1758: bjlvl_ptr[i] = current_space_lvl->array;
1760: /* make sure the active row i has diagonal entry */
1761: PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);
1763: current_space->array += nzi;
1764: current_space->local_used += nzi;
1765: current_space->local_remaining -= nzi;
1766: current_space_lvl->array += nzi;
1767: current_space_lvl->local_used += nzi;
1768: current_space_lvl->local_remaining -= nzi;
1769: }
1771: PetscCall(ISRestoreIndices(isrow, &r));
1772: PetscCall(ISRestoreIndices(isicol, &ic));
1773: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1774: PetscCall(PetscMalloc1(bi[n] + 1, &bj));
1775: PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
1777: PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
1778: PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
1779: PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));
1781: #if defined(PETSC_USE_INFO)
1782: {
1783: PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1784: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
1785: PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
1786: PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
1787: PetscCall(PetscInfo(A, "for best performance.\n"));
1788: if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
1789: }
1790: #endif
1791: /* put together the new matrix */
1792: PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
1793: b = (Mat_SeqAIJ *)(fact)->data;
1795: b->free_a = PETSC_TRUE;
1796: b->free_ij = PETSC_TRUE;
1797: b->singlemalloc = PETSC_FALSE;
1799: PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a));
1801: b->j = bj;
1802: b->i = bi;
1803: b->diag = bdiag;
1804: b->ilen = NULL;
1805: b->imax = NULL;
1806: b->row = isrow;
1807: b->col = iscol;
1808: PetscCall(PetscObjectReference((PetscObject)isrow));
1809: PetscCall(PetscObjectReference((PetscObject)iscol));
1810: b->icol = isicol;
1812: PetscCall(PetscMalloc1(n + 1, &b->solve_work));
1813: /* In b structure: Free imax, ilen, old a, old j.
1814: Allocate bdiag, solve_work, new a, new j */
1815: b->maxnz = b->nz = bdiag[0] + 1;
1817: (fact)->info.factor_mallocs = reallocs;
1818: (fact)->info.fill_ratio_given = f;
1819: (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1820: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1821: if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1822: PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1823: PetscFunctionReturn(PETSC_SUCCESS);
1824: }
1826: #if 0
1827: // unused
1828: static PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1829: {
1830: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b;
1831: IS isicol;
1832: const PetscInt *r, *ic;
1833: PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j;
1834: PetscInt *bi, *cols, nnz, *cols_lvl;
1835: PetscInt *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1836: PetscInt i, levels, diagonal_fill;
1837: PetscBool col_identity, row_identity;
1838: PetscReal f;
1839: PetscInt nlnk, *lnk, *lnk_lvl = NULL;
1840: PetscBT lnkbt;
1841: PetscInt nzi, *bj, **bj_ptr, **bjlvl_ptr;
1842: PetscFreeSpaceList free_space = NULL, current_space = NULL;
1843: PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1844: PetscBool missing;
1846: PetscFunctionBegin;
1847: PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
1848: PetscCall(MatMissingDiagonal(A, &missing, &i));
1849: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
1851: f = info->fill;
1852: levels = (PetscInt)info->levels;
1853: diagonal_fill = (PetscInt)info->diagonal_fill;
1855: PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1857: PetscCall(ISIdentity(isrow, &row_identity));
1858: PetscCall(ISIdentity(iscol, &col_identity));
1859: if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1860: PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_TRUE));
1862: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
1863: if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1864: fact->factortype = MAT_FACTOR_ILU;
1865: (fact)->info.factor_mallocs = 0;
1866: (fact)->info.fill_ratio_given = info->fill;
1867: (fact)->info.fill_ratio_needed = 1.0;
1869: b = (Mat_SeqAIJ *)(fact)->data;
1870: b->row = isrow;
1871: b->col = iscol;
1872: b->icol = isicol;
1873: PetscCall(PetscMalloc1((fact)->rmap->n + 1, &b->solve_work));
1874: PetscCall(PetscObjectReference((PetscObject)isrow));
1875: PetscCall(PetscObjectReference((PetscObject)iscol));
1876: PetscFunctionReturn(PETSC_SUCCESS);
1877: }
1879: PetscCall(ISGetIndices(isrow, &r));
1880: PetscCall(ISGetIndices(isicol, &ic));
1882: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1883: PetscCall(PetscMalloc1(n + 1, &bi));
1884: PetscCall(PetscMalloc1(n + 1, &bdiag));
1885: bi[0] = bdiag[0] = 0;
1887: PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));
1889: /* create a linked list for storing column indices of the active row */
1890: nlnk = n + 1;
1891: PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));
1893: /* initial FreeSpace size is f*(ai[n]+1) */
1894: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1895: current_space = free_space;
1896: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1897: current_space_lvl = free_space_lvl;
1899: for (i = 0; i < n; i++) {
1900: nzi = 0;
1901: /* copy current row into linked list */
1902: nnz = ai[r[i] + 1] - ai[r[i]];
1903: PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1904: cols = aj + ai[r[i]];
1905: lnk[i] = -1; /* marker to indicate if diagonal exists */
1906: PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1907: nzi += nlnk;
1909: /* make sure diagonal entry is included */
1910: if (diagonal_fill && lnk[i] == -1) {
1911: fm = n;
1912: while (lnk[fm] < i) fm = lnk[fm];
1913: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
1914: lnk[fm] = i;
1915: lnk_lvl[i] = 0;
1916: nzi++;
1917: dcount++;
1918: }
1920: /* add pivot rows into the active row */
1921: nzbd = 0;
1922: prow = lnk[n];
1923: while (prow < i) {
1924: nnz = bdiag[prow];
1925: cols = bj_ptr[prow] + nnz + 1;
1926: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1927: nnz = bi[prow + 1] - bi[prow] - nnz - 1;
1928: PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1929: nzi += nlnk;
1930: prow = lnk[prow];
1931: nzbd++;
1932: }
1933: bdiag[i] = nzbd;
1934: bi[i + 1] = bi[i] + nzi;
1936: /* if free space is not available, make more free space */
1937: if (current_space->local_remaining < nzi) {
1938: nnz = PetscIntMultTruncate(nzi, n - i); /* estimated and max additional space needed */
1939: PetscCall(PetscFreeSpaceGet(nnz, ¤t_space));
1940: PetscCall(PetscFreeSpaceGet(nnz, ¤t_space_lvl));
1941: reallocs++;
1942: }
1944: /* copy data into free_space and free_space_lvl, then initialize lnk */
1945: PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
1946: bj_ptr[i] = current_space->array;
1947: bjlvl_ptr[i] = current_space_lvl->array;
1949: /* make sure the active row i has diagonal entry */
1950: PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);
1952: current_space->array += nzi;
1953: current_space->local_used += nzi;
1954: current_space->local_remaining -= nzi;
1955: current_space_lvl->array += nzi;
1956: current_space_lvl->local_used += nzi;
1957: current_space_lvl->local_remaining -= nzi;
1958: }
1960: PetscCall(ISRestoreIndices(isrow, &r));
1961: PetscCall(ISRestoreIndices(isicol, &ic));
1963: /* destroy list of free space and other temporary arrays */
1964: PetscCall(PetscMalloc1(bi[n] + 1, &bj));
1965: PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); /* copy free_space -> bj */
1966: PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
1967: PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
1968: PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));
1970: #if defined(PETSC_USE_INFO)
1971: {
1972: PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
1973: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
1974: PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
1975: PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
1976: PetscCall(PetscInfo(A, "for best performance.\n"));
1977: if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
1978: }
1979: #endif
1981: /* put together the new matrix */
1982: PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
1983: b = (Mat_SeqAIJ *)(fact)->data;
1985: b->free_a = PETSC_TRUE;
1986: b->free_ij = PETSC_TRUE;
1987: b->singlemalloc = PETSC_FALSE;
1989: PetscCall(PetscMalloc1(bi[n], &b->a));
1990: b->j = bj;
1991: b->i = bi;
1992: for (i = 0; i < n; i++) bdiag[i] += bi[i];
1993: b->diag = bdiag;
1994: b->ilen = NULL;
1995: b->imax = NULL;
1996: b->row = isrow;
1997: b->col = iscol;
1998: PetscCall(PetscObjectReference((PetscObject)isrow));
1999: PetscCall(PetscObjectReference((PetscObject)iscol));
2000: b->icol = isicol;
2001: PetscCall(PetscMalloc1(n + 1, &b->solve_work));
2002: /* In b structure: Free imax, ilen, old a, old j.
2003: Allocate bdiag, solve_work, new a, new j */
2004: b->maxnz = b->nz = bi[n];
2006: (fact)->info.factor_mallocs = reallocs;
2007: (fact)->info.fill_ratio_given = f;
2008: (fact)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
2009: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
2010: if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2011: PetscFunctionReturn(PETSC_SUCCESS);
2012: }
2013: #endif
2015: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
2016: {
2017: Mat C = B;
2018: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
2019: Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data;
2020: IS ip = b->row, iip = b->icol;
2021: const PetscInt *rip, *riip;
2022: PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
2023: PetscInt *ai = a->i, *aj = a->j;
2024: PetscInt k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
2025: MatScalar *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
2026: PetscBool perm_identity;
2027: FactorShiftCtx sctx;
2028: PetscReal rs;
2029: MatScalar d, *v;
2031: PetscFunctionBegin;
2032: /* MatPivotSetUp(): initialize shift context sctx */
2033: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
2035: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2036: sctx.shift_top = info->zeropivot;
2037: for (i = 0; i < mbs; i++) {
2038: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2039: d = (aa)[a->diag[i]];
2040: rs = -PetscAbsScalar(d) - PetscRealPart(d);
2041: v = aa + ai[i];
2042: nz = ai[i + 1] - ai[i];
2043: for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
2044: if (rs > sctx.shift_top) sctx.shift_top = rs;
2045: }
2046: sctx.shift_top *= 1.1;
2047: sctx.nshift_max = 5;
2048: sctx.shift_lo = 0.;
2049: sctx.shift_hi = 1.;
2050: }
2052: PetscCall(ISGetIndices(ip, &rip));
2053: PetscCall(ISGetIndices(iip, &riip));
2055: /* allocate working arrays
2056: c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2057: il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2058: */
2059: PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r));
2061: do {
2062: sctx.newshift = PETSC_FALSE;
2064: for (i = 0; i < mbs; i++) c2r[i] = mbs;
2065: if (mbs) il[0] = 0;
2067: for (k = 0; k < mbs; k++) {
2068: /* zero rtmp */
2069: nz = bi[k + 1] - bi[k];
2070: bjtmp = bj + bi[k];
2071: for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
2073: /* load in initial unfactored row */
2074: bval = ba + bi[k];
2075: jmin = ai[rip[k]];
2076: jmax = ai[rip[k] + 1];
2077: for (j = jmin; j < jmax; j++) {
2078: col = riip[aj[j]];
2079: if (col >= k) { /* only take upper triangular entry */
2080: rtmp[col] = aa[j];
2081: *bval++ = 0.0; /* for in-place factorization */
2082: }
2083: }
2084: /* shift the diagonal of the matrix: ZeropivotApply() */
2085: rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
2087: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2088: dk = rtmp[k];
2089: i = c2r[k]; /* first row to be added to k_th row */
2091: while (i < k) {
2092: nexti = c2r[i]; /* next row to be added to k_th row */
2094: /* compute multiplier, update diag(k) and U(i,k) */
2095: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2096: uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
2097: dk += uikdi * ba[ili]; /* update diag[k] */
2098: ba[ili] = uikdi; /* -U(i,k) */
2100: /* add multiple of row i to k-th row */
2101: jmin = ili + 1;
2102: jmax = bi[i + 1];
2103: if (jmin < jmax) {
2104: for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
2105: /* update il and c2r for row i */
2106: il[i] = jmin;
2107: j = bj[jmin];
2108: c2r[i] = c2r[j];
2109: c2r[j] = i;
2110: }
2111: i = nexti;
2112: }
2114: /* copy data into U(k,:) */
2115: rs = 0.0;
2116: jmin = bi[k];
2117: jmax = bi[k + 1] - 1;
2118: if (jmin < jmax) {
2119: for (j = jmin; j < jmax; j++) {
2120: col = bj[j];
2121: ba[j] = rtmp[col];
2122: rs += PetscAbsScalar(ba[j]);
2123: }
2124: /* add the k-th row into il and c2r */
2125: il[k] = jmin;
2126: i = bj[jmin];
2127: c2r[k] = c2r[i];
2128: c2r[i] = k;
2129: }
2131: /* MatPivotCheck() */
2132: sctx.rs = rs;
2133: sctx.pv = dk;
2134: PetscCall(MatPivotCheck(B, A, info, &sctx, i));
2135: if (sctx.newshift) break;
2136: dk = sctx.pv;
2138: ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
2139: }
2140: } while (sctx.newshift);
2142: PetscCall(PetscFree3(rtmp, il, c2r));
2143: PetscCall(ISRestoreIndices(ip, &rip));
2144: PetscCall(ISRestoreIndices(iip, &riip));
2146: PetscCall(ISIdentity(ip, &perm_identity));
2147: if (perm_identity) {
2148: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2149: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2150: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2151: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2152: } else {
2153: B->ops->solve = MatSolve_SeqSBAIJ_1;
2154: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
2155: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1;
2156: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1;
2157: }
2159: C->assembled = PETSC_TRUE;
2160: C->preallocated = PETSC_TRUE;
2162: PetscCall(PetscLogFlops(C->rmap->n));
2164: /* MatPivotView() */
2165: if (sctx.nshift) {
2166: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2167: PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
2168: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2169: PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2170: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
2171: PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
2172: }
2173: }
2174: PetscFunctionReturn(PETSC_SUCCESS);
2175: }
2177: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
2178: {
2179: Mat C = B;
2180: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
2181: Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data;
2182: IS ip = b->row, iip = b->icol;
2183: const PetscInt *rip, *riip;
2184: PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp;
2185: PetscInt *ai = a->i, *aj = a->j;
2186: PetscInt k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
2187: MatScalar *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
2188: PetscBool perm_identity;
2189: FactorShiftCtx sctx;
2190: PetscReal rs;
2191: MatScalar d, *v;
2193: PetscFunctionBegin;
2194: /* MatPivotSetUp(): initialize shift context sctx */
2195: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
2197: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2198: sctx.shift_top = info->zeropivot;
2199: for (i = 0; i < mbs; i++) {
2200: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2201: d = (aa)[a->diag[i]];
2202: rs = -PetscAbsScalar(d) - PetscRealPart(d);
2203: v = aa + ai[i];
2204: nz = ai[i + 1] - ai[i];
2205: for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
2206: if (rs > sctx.shift_top) sctx.shift_top = rs;
2207: }
2208: sctx.shift_top *= 1.1;
2209: sctx.nshift_max = 5;
2210: sctx.shift_lo = 0.;
2211: sctx.shift_hi = 1.;
2212: }
2214: PetscCall(ISGetIndices(ip, &rip));
2215: PetscCall(ISGetIndices(iip, &riip));
2217: /* initialization */
2218: PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));
2220: do {
2221: sctx.newshift = PETSC_FALSE;
2223: for (i = 0; i < mbs; i++) jl[i] = mbs;
2224: il[0] = 0;
2226: for (k = 0; k < mbs; k++) {
2227: /* zero rtmp */
2228: nz = bi[k + 1] - bi[k];
2229: bjtmp = bj + bi[k];
2230: for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
2232: bval = ba + bi[k];
2233: /* initialize k-th row by the perm[k]-th row of A */
2234: jmin = ai[rip[k]];
2235: jmax = ai[rip[k] + 1];
2236: for (j = jmin; j < jmax; j++) {
2237: col = riip[aj[j]];
2238: if (col >= k) { /* only take upper triangular entry */
2239: rtmp[col] = aa[j];
2240: *bval++ = 0.0; /* for in-place factorization */
2241: }
2242: }
2243: /* shift the diagonal of the matrix */
2244: if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2246: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2247: dk = rtmp[k];
2248: i = jl[k]; /* first row to be added to k_th row */
2250: while (i < k) {
2251: nexti = jl[i]; /* next row to be added to k_th row */
2253: /* compute multiplier, update diag(k) and U(i,k) */
2254: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2255: uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
2256: dk += uikdi * ba[ili];
2257: ba[ili] = uikdi; /* -U(i,k) */
2259: /* add multiple of row i to k-th row */
2260: jmin = ili + 1;
2261: jmax = bi[i + 1];
2262: if (jmin < jmax) {
2263: for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
2264: /* update il and jl for row i */
2265: il[i] = jmin;
2266: j = bj[jmin];
2267: jl[i] = jl[j];
2268: jl[j] = i;
2269: }
2270: i = nexti;
2271: }
2273: /* shift the diagonals when zero pivot is detected */
2274: /* compute rs=sum of abs(off-diagonal) */
2275: rs = 0.0;
2276: jmin = bi[k] + 1;
2277: nz = bi[k + 1] - jmin;
2278: bcol = bj + jmin;
2279: for (j = 0; j < nz; j++) rs += PetscAbsScalar(rtmp[bcol[j]]);
2281: sctx.rs = rs;
2282: sctx.pv = dk;
2283: PetscCall(MatPivotCheck(B, A, info, &sctx, k));
2284: if (sctx.newshift) break;
2285: dk = sctx.pv;
2287: /* copy data into U(k,:) */
2288: ba[bi[k]] = 1.0 / dk; /* U(k,k) */
2289: jmin = bi[k] + 1;
2290: jmax = bi[k + 1];
2291: if (jmin < jmax) {
2292: for (j = jmin; j < jmax; j++) {
2293: col = bj[j];
2294: ba[j] = rtmp[col];
2295: }
2296: /* add the k-th row into il and jl */
2297: il[k] = jmin;
2298: i = bj[jmin];
2299: jl[k] = jl[i];
2300: jl[i] = k;
2301: }
2302: }
2303: } while (sctx.newshift);
2305: PetscCall(PetscFree3(rtmp, il, jl));
2306: PetscCall(ISRestoreIndices(ip, &rip));
2307: PetscCall(ISRestoreIndices(iip, &riip));
2309: PetscCall(ISIdentity(ip, &perm_identity));
2310: if (perm_identity) {
2311: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2312: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2313: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2314: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2315: } else {
2316: B->ops->solve = MatSolve_SeqSBAIJ_1_inplace;
2317: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2318: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace;
2319: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace;
2320: }
2322: C->assembled = PETSC_TRUE;
2323: C->preallocated = PETSC_TRUE;
2325: PetscCall(PetscLogFlops(C->rmap->n));
2326: if (sctx.nshift) {
2327: if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2328: PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2329: } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2330: PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2331: }
2332: }
2333: PetscFunctionReturn(PETSC_SUCCESS);
2334: }
2336: /*
2337: icc() under revised new data structure.
2338: Factored arrays bj and ba are stored as
2339: U(0,:),...,U(i,:),U(n-1,:)
2341: ui=fact->i is an array of size n+1, in which
2342: ui+
2343: ui[i]: points to 1st entry of U(i,:),i=0,...,n-1
2344: ui[n]: points to U(n-1,n-1)+1
2346: udiag=fact->diag is an array of size n,in which
2347: udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2349: U(i,:) contains udiag[i] as its last entry, i.e.,
2350: U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2351: */
2353: PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2354: {
2355: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
2356: Mat_SeqSBAIJ *b;
2357: PetscBool perm_identity, missing;
2358: PetscInt reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2359: const PetscInt *rip, *riip;
2360: PetscInt jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2361: PetscInt nlnk, *lnk, *lnk_lvl = NULL, d;
2362: PetscInt ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2363: PetscReal fill = info->fill, levels = info->levels;
2364: PetscFreeSpaceList free_space = NULL, current_space = NULL;
2365: PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2366: PetscBT lnkbt;
2367: IS iperm;
2369: PetscFunctionBegin;
2370: PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2371: PetscCall(MatMissingDiagonal(A, &missing, &d));
2372: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
2373: PetscCall(ISIdentity(perm, &perm_identity));
2374: PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2376: PetscCall(PetscMalloc1(am + 1, &ui));
2377: PetscCall(PetscMalloc1(am + 1, &udiag));
2378: ui[0] = 0;
2380: /* ICC(0) without matrix ordering: simply rearrange column indices */
2381: if (!levels && perm_identity) {
2382: for (i = 0; i < am; i++) {
2383: ncols = ai[i + 1] - a->diag[i];
2384: ui[i + 1] = ui[i] + ncols;
2385: udiag[i] = ui[i + 1] - 1; /* points to the last entry of U(i,:) */
2386: }
2387: PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2388: cols = uj;
2389: for (i = 0; i < am; i++) {
2390: aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2391: ncols = ai[i + 1] - a->diag[i] - 1;
2392: for (j = 0; j < ncols; j++) *cols++ = aj[j];
2393: *cols++ = i; /* diagonal is located as the last entry of U(i,:) */
2394: }
2395: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2396: PetscCall(ISGetIndices(iperm, &riip));
2397: PetscCall(ISGetIndices(perm, &rip));
2399: /* initialization */
2400: PetscCall(PetscMalloc1(am + 1, &ajtmp));
2402: /* jl: linked list for storing indices of the pivot rows
2403: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2404: PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
2405: for (i = 0; i < am; i++) {
2406: jl[i] = am;
2407: il[i] = 0;
2408: }
2410: /* create and initialize a linked list for storing column indices of the active row k */
2411: nlnk = am + 1;
2412: PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));
2414: /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2415: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2416: current_space = free_space;
2417: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl));
2418: current_space_lvl = free_space_lvl;
2420: for (k = 0; k < am; k++) { /* for each active row k */
2421: /* initialize lnk by the column indices of row rip[k] of A */
2422: nzk = 0;
2423: ncols = ai[rip[k] + 1] - ai[rip[k]];
2424: PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2425: ncols_upper = 0;
2426: for (j = 0; j < ncols; j++) {
2427: i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2428: if (riip[i] >= k) { /* only take upper triangular entry */
2429: ajtmp[ncols_upper] = i;
2430: ncols_upper++;
2431: }
2432: }
2433: PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
2434: nzk += nlnk;
2436: /* update lnk by computing fill-in for each pivot row to be merged in */
2437: prow = jl[k]; /* 1st pivot row */
2439: while (prow < k) {
2440: nextprow = jl[prow];
2442: /* merge prow into k-th row */
2443: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2444: jmax = ui[prow + 1];
2445: ncols = jmax - jmin;
2446: i = jmin - ui[prow];
2447: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2448: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
2449: j = *(uj - 1);
2450: PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
2451: nzk += nlnk;
2453: /* update il and jl for prow */
2454: if (jmin < jmax) {
2455: il[prow] = jmin;
2456: j = *cols;
2457: jl[prow] = jl[j];
2458: jl[j] = prow;
2459: }
2460: prow = nextprow;
2461: }
2463: /* if free space is not available, make more free space */
2464: if (current_space->local_remaining < nzk) {
2465: i = am - k + 1; /* num of unfactored rows */
2466: i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2467: PetscCall(PetscFreeSpaceGet(i, ¤t_space));
2468: PetscCall(PetscFreeSpaceGet(i, ¤t_space_lvl));
2469: reallocs++;
2470: }
2472: /* copy data into free_space and free_space_lvl, then initialize lnk */
2473: PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
2474: PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
2476: /* add the k-th row into il and jl */
2477: if (nzk > 1) {
2478: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2479: jl[k] = jl[i];
2480: jl[i] = k;
2481: il[k] = ui[k] + 1;
2482: }
2483: uj_ptr[k] = current_space->array;
2484: uj_lvl_ptr[k] = current_space_lvl->array;
2486: current_space->array += nzk;
2487: current_space->local_used += nzk;
2488: current_space->local_remaining -= nzk;
2490: current_space_lvl->array += nzk;
2491: current_space_lvl->local_used += nzk;
2492: current_space_lvl->local_remaining -= nzk;
2494: ui[k + 1] = ui[k] + nzk;
2495: }
2497: PetscCall(ISRestoreIndices(perm, &rip));
2498: PetscCall(ISRestoreIndices(iperm, &riip));
2499: PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
2500: PetscCall(PetscFree(ajtmp));
2502: /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2503: PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2504: PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */
2505: PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2506: PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
2508: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2510: /* put together the new matrix in MATSEQSBAIJ format */
2511: b = (Mat_SeqSBAIJ *)(fact)->data;
2512: b->singlemalloc = PETSC_FALSE;
2514: PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
2516: b->j = uj;
2517: b->i = ui;
2518: b->diag = udiag;
2519: b->free_diag = PETSC_TRUE;
2520: b->ilen = NULL;
2521: b->imax = NULL;
2522: b->row = perm;
2523: b->col = perm;
2524: PetscCall(PetscObjectReference((PetscObject)perm));
2525: PetscCall(PetscObjectReference((PetscObject)perm));
2526: b->icol = iperm;
2527: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2529: PetscCall(PetscMalloc1(am + 1, &b->solve_work));
2531: b->maxnz = b->nz = ui[am];
2532: b->free_a = PETSC_TRUE;
2533: b->free_ij = PETSC_TRUE;
2535: fact->info.factor_mallocs = reallocs;
2536: fact->info.fill_ratio_given = fill;
2537: if (ai[am] != 0) {
2538: /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2539: fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2540: } else {
2541: fact->info.fill_ratio_needed = 0.0;
2542: }
2543: #if defined(PETSC_USE_INFO)
2544: if (ai[am] != 0) {
2545: PetscReal af = fact->info.fill_ratio_needed;
2546: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2547: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2548: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2549: } else {
2550: PetscCall(PetscInfo(A, "Empty matrix\n"));
2551: }
2552: #endif
2553: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2554: PetscFunctionReturn(PETSC_SUCCESS);
2555: }
2557: #if 0
2558: // unused
2559: static PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2560: {
2561: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
2562: Mat_SeqSBAIJ *b;
2563: PetscBool perm_identity, missing;
2564: PetscInt reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2565: const PetscInt *rip, *riip;
2566: PetscInt jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2567: PetscInt nlnk, *lnk, *lnk_lvl = NULL, d;
2568: PetscInt ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2569: PetscReal fill = info->fill, levels = info->levels;
2570: PetscFreeSpaceList free_space = NULL, current_space = NULL;
2571: PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2572: PetscBT lnkbt;
2573: IS iperm;
2575: PetscFunctionBegin;
2576: PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2577: PetscCall(MatMissingDiagonal(A, &missing, &d));
2578: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
2579: PetscCall(ISIdentity(perm, &perm_identity));
2580: PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2582: PetscCall(PetscMalloc1(am + 1, &ui));
2583: PetscCall(PetscMalloc1(am + 1, &udiag));
2584: ui[0] = 0;
2586: /* ICC(0) without matrix ordering: simply copies fill pattern */
2587: if (!levels && perm_identity) {
2588: for (i = 0; i < am; i++) {
2589: ui[i + 1] = ui[i] + ai[i + 1] - a->diag[i];
2590: udiag[i] = ui[i];
2591: }
2592: PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2593: cols = uj;
2594: for (i = 0; i < am; i++) {
2595: aj = a->j + a->diag[i];
2596: ncols = ui[i + 1] - ui[i];
2597: for (j = 0; j < ncols; j++) *cols++ = *aj++;
2598: }
2599: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2600: PetscCall(ISGetIndices(iperm, &riip));
2601: PetscCall(ISGetIndices(perm, &rip));
2603: /* initialization */
2604: PetscCall(PetscMalloc1(am + 1, &ajtmp));
2606: /* jl: linked list for storing indices of the pivot rows
2607: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2608: PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
2609: for (i = 0; i < am; i++) {
2610: jl[i] = am;
2611: il[i] = 0;
2612: }
2614: /* create and initialize a linked list for storing column indices of the active row k */
2615: nlnk = am + 1;
2616: PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));
2618: /* initial FreeSpace size is fill*(ai[am]+1) */
2619: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space));
2620: current_space = free_space;
2621: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space_lvl));
2622: current_space_lvl = free_space_lvl;
2624: for (k = 0; k < am; k++) { /* for each active row k */
2625: /* initialize lnk by the column indices of row rip[k] of A */
2626: nzk = 0;
2627: ncols = ai[rip[k] + 1] - ai[rip[k]];
2628: PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2629: ncols_upper = 0;
2630: for (j = 0; j < ncols; j++) {
2631: i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2632: if (riip[i] >= k) { /* only take upper triangular entry */
2633: ajtmp[ncols_upper] = i;
2634: ncols_upper++;
2635: }
2636: }
2637: PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
2638: nzk += nlnk;
2640: /* update lnk by computing fill-in for each pivot row to be merged in */
2641: prow = jl[k]; /* 1st pivot row */
2643: while (prow < k) {
2644: nextprow = jl[prow];
2646: /* merge prow into k-th row */
2647: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2648: jmax = ui[prow + 1];
2649: ncols = jmax - jmin;
2650: i = jmin - ui[prow];
2651: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2652: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
2653: j = *(uj - 1);
2654: PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
2655: nzk += nlnk;
2657: /* update il and jl for prow */
2658: if (jmin < jmax) {
2659: il[prow] = jmin;
2660: j = *cols;
2661: jl[prow] = jl[j];
2662: jl[j] = prow;
2663: }
2664: prow = nextprow;
2665: }
2667: /* if free space is not available, make more free space */
2668: if (current_space->local_remaining < nzk) {
2669: i = am - k + 1; /* num of unfactored rows */
2670: i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2671: PetscCall(PetscFreeSpaceGet(i, ¤t_space));
2672: PetscCall(PetscFreeSpaceGet(i, ¤t_space_lvl));
2673: reallocs++;
2674: }
2676: /* copy data into free_space and free_space_lvl, then initialize lnk */
2677: PetscCheck(nzk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
2678: PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
2680: /* add the k-th row into il and jl */
2681: if (nzk > 1) {
2682: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2683: jl[k] = jl[i];
2684: jl[i] = k;
2685: il[k] = ui[k] + 1;
2686: }
2687: uj_ptr[k] = current_space->array;
2688: uj_lvl_ptr[k] = current_space_lvl->array;
2690: current_space->array += nzk;
2691: current_space->local_used += nzk;
2692: current_space->local_remaining -= nzk;
2694: current_space_lvl->array += nzk;
2695: current_space_lvl->local_used += nzk;
2696: current_space_lvl->local_remaining -= nzk;
2698: ui[k + 1] = ui[k] + nzk;
2699: }
2701: #if defined(PETSC_USE_INFO)
2702: if (ai[am] != 0) {
2703: PetscReal af = (PetscReal)ui[am] / ((PetscReal)ai[am]);
2704: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2705: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2706: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2707: } else {
2708: PetscCall(PetscInfo(A, "Empty matrix\n"));
2709: }
2710: #endif
2712: PetscCall(ISRestoreIndices(perm, &rip));
2713: PetscCall(ISRestoreIndices(iperm, &riip));
2714: PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
2715: PetscCall(PetscFree(ajtmp));
2717: /* destroy list of free space and other temporary array(s) */
2718: PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2719: PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
2720: PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2721: PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
2723: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2725: /* put together the new matrix in MATSEQSBAIJ format */
2727: b = (Mat_SeqSBAIJ *)fact->data;
2728: b->singlemalloc = PETSC_FALSE;
2730: PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
2732: b->j = uj;
2733: b->i = ui;
2734: b->diag = udiag;
2735: b->free_diag = PETSC_TRUE;
2736: b->ilen = NULL;
2737: b->imax = NULL;
2738: b->row = perm;
2739: b->col = perm;
2741: PetscCall(PetscObjectReference((PetscObject)perm));
2742: PetscCall(PetscObjectReference((PetscObject)perm));
2744: b->icol = iperm;
2745: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2746: PetscCall(PetscMalloc1(am + 1, &b->solve_work));
2747: b->maxnz = b->nz = ui[am];
2748: b->free_a = PETSC_TRUE;
2749: b->free_ij = PETSC_TRUE;
2751: fact->info.factor_mallocs = reallocs;
2752: fact->info.fill_ratio_given = fill;
2753: if (ai[am] != 0) {
2754: fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]);
2755: } else {
2756: fact->info.fill_ratio_needed = 0.0;
2757: }
2758: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2759: PetscFunctionReturn(PETSC_SUCCESS);
2760: }
2761: #endif
2763: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2764: {
2765: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
2766: Mat_SeqSBAIJ *b;
2767: PetscBool perm_identity, missing;
2768: PetscReal fill = info->fill;
2769: const PetscInt *rip, *riip;
2770: PetscInt i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
2771: PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
2772: PetscInt nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
2773: PetscFreeSpaceList free_space = NULL, current_space = NULL;
2774: PetscBT lnkbt;
2775: IS iperm;
2777: PetscFunctionBegin;
2778: PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2779: PetscCall(MatMissingDiagonal(A, &missing, &i));
2780: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
2782: /* check whether perm is the identity mapping */
2783: PetscCall(ISIdentity(perm, &perm_identity));
2784: PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2785: PetscCall(ISGetIndices(iperm, &riip));
2786: PetscCall(ISGetIndices(perm, &rip));
2788: /* initialization */
2789: PetscCall(PetscMalloc1(am + 1, &ui));
2790: PetscCall(PetscMalloc1(am + 1, &udiag));
2791: ui[0] = 0;
2793: /* jl: linked list for storing indices of the pivot rows
2794: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2795: PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
2796: for (i = 0; i < am; i++) {
2797: jl[i] = am;
2798: il[i] = 0;
2799: }
2801: /* create and initialize a linked list for storing column indices of the active row k */
2802: nlnk = am + 1;
2803: PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));
2805: /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2806: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2807: current_space = free_space;
2809: for (k = 0; k < am; k++) { /* for each active row k */
2810: /* initialize lnk by the column indices of row rip[k] of A */
2811: nzk = 0;
2812: ncols = ai[rip[k] + 1] - ai[rip[k]];
2813: PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2814: ncols_upper = 0;
2815: for (j = 0; j < ncols; j++) {
2816: i = riip[*(aj + ai[rip[k]] + j)];
2817: if (i >= k) { /* only take upper triangular entry */
2818: cols[ncols_upper] = i;
2819: ncols_upper++;
2820: }
2821: }
2822: PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
2823: nzk += nlnk;
2825: /* update lnk by computing fill-in for each pivot row to be merged in */
2826: prow = jl[k]; /* 1st pivot row */
2828: while (prow < k) {
2829: nextprow = jl[prow];
2830: /* merge prow into k-th row */
2831: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2832: jmax = ui[prow + 1];
2833: ncols = jmax - jmin;
2834: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2835: PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
2836: nzk += nlnk;
2838: /* update il and jl for prow */
2839: if (jmin < jmax) {
2840: il[prow] = jmin;
2841: j = *uj_ptr;
2842: jl[prow] = jl[j];
2843: jl[j] = prow;
2844: }
2845: prow = nextprow;
2846: }
2848: /* if free space is not available, make more free space */
2849: if (current_space->local_remaining < nzk) {
2850: i = am - k + 1; /* num of unfactored rows */
2851: i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2852: PetscCall(PetscFreeSpaceGet(i, ¤t_space));
2853: reallocs++;
2854: }
2856: /* copy data into free space, then initialize lnk */
2857: PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));
2859: /* add the k-th row into il and jl */
2860: if (nzk > 1) {
2861: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2862: jl[k] = jl[i];
2863: jl[i] = k;
2864: il[k] = ui[k] + 1;
2865: }
2866: ui_ptr[k] = current_space->array;
2868: current_space->array += nzk;
2869: current_space->local_used += nzk;
2870: current_space->local_remaining -= nzk;
2872: ui[k + 1] = ui[k] + nzk;
2873: }
2875: PetscCall(ISRestoreIndices(perm, &rip));
2876: PetscCall(ISRestoreIndices(iperm, &riip));
2877: PetscCall(PetscFree4(ui_ptr, jl, il, cols));
2879: /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2880: PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2881: PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */
2882: PetscCall(PetscLLDestroy(lnk, lnkbt));
2884: /* put together the new matrix in MATSEQSBAIJ format */
2886: b = (Mat_SeqSBAIJ *)fact->data;
2887: b->singlemalloc = PETSC_FALSE;
2888: b->free_a = PETSC_TRUE;
2889: b->free_ij = PETSC_TRUE;
2891: PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
2893: b->j = uj;
2894: b->i = ui;
2895: b->diag = udiag;
2896: b->free_diag = PETSC_TRUE;
2897: b->ilen = NULL;
2898: b->imax = NULL;
2899: b->row = perm;
2900: b->col = perm;
2902: PetscCall(PetscObjectReference((PetscObject)perm));
2903: PetscCall(PetscObjectReference((PetscObject)perm));
2905: b->icol = iperm;
2906: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2908: PetscCall(PetscMalloc1(am + 1, &b->solve_work));
2910: b->maxnz = b->nz = ui[am];
2912: fact->info.factor_mallocs = reallocs;
2913: fact->info.fill_ratio_given = fill;
2914: if (ai[am] != 0) {
2915: /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2916: fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2917: } else {
2918: fact->info.fill_ratio_needed = 0.0;
2919: }
2920: #if defined(PETSC_USE_INFO)
2921: if (ai[am] != 0) {
2922: PetscReal af = fact->info.fill_ratio_needed;
2923: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2924: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2925: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2926: } else {
2927: PetscCall(PetscInfo(A, "Empty matrix\n"));
2928: }
2929: #endif
2930: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2931: PetscFunctionReturn(PETSC_SUCCESS);
2932: }
2934: #if 0
2935: // unused
2936: static PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2937: {
2938: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
2939: Mat_SeqSBAIJ *b;
2940: PetscBool perm_identity, missing;
2941: PetscReal fill = info->fill;
2942: const PetscInt *rip, *riip;
2943: PetscInt i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
2944: PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
2945: PetscInt nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr;
2946: PetscFreeSpaceList free_space = NULL, current_space = NULL;
2947: PetscBT lnkbt;
2948: IS iperm;
2950: PetscFunctionBegin;
2951: PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2952: PetscCall(MatMissingDiagonal(A, &missing, &i));
2953: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
2955: /* check whether perm is the identity mapping */
2956: PetscCall(ISIdentity(perm, &perm_identity));
2957: PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2958: PetscCall(ISGetIndices(iperm, &riip));
2959: PetscCall(ISGetIndices(perm, &rip));
2961: /* initialization */
2962: PetscCall(PetscMalloc1(am + 1, &ui));
2963: ui[0] = 0;
2965: /* jl: linked list for storing indices of the pivot rows
2966: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2967: PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
2968: for (i = 0; i < am; i++) {
2969: jl[i] = am;
2970: il[i] = 0;
2971: }
2973: /* create and initialize a linked list for storing column indices of the active row k */
2974: nlnk = am + 1;
2975: PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));
2977: /* initial FreeSpace size is fill*(ai[am]+1) */
2978: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space));
2979: current_space = free_space;
2981: for (k = 0; k < am; k++) { /* for each active row k */
2982: /* initialize lnk by the column indices of row rip[k] of A */
2983: nzk = 0;
2984: ncols = ai[rip[k] + 1] - ai[rip[k]];
2985: PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2986: ncols_upper = 0;
2987: for (j = 0; j < ncols; j++) {
2988: i = riip[*(aj + ai[rip[k]] + j)];
2989: if (i >= k) { /* only take upper triangular entry */
2990: cols[ncols_upper] = i;
2991: ncols_upper++;
2992: }
2993: }
2994: PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
2995: nzk += nlnk;
2997: /* update lnk by computing fill-in for each pivot row to be merged in */
2998: prow = jl[k]; /* 1st pivot row */
3000: while (prow < k) {
3001: nextprow = jl[prow];
3002: /* merge prow into k-th row */
3003: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
3004: jmax = ui[prow + 1];
3005: ncols = jmax - jmin;
3006: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3007: PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
3008: nzk += nlnk;
3010: /* update il and jl for prow */
3011: if (jmin < jmax) {
3012: il[prow] = jmin;
3013: j = *uj_ptr;
3014: jl[prow] = jl[j];
3015: jl[j] = prow;
3016: }
3017: prow = nextprow;
3018: }
3020: /* if free space is not available, make more free space */
3021: if (current_space->local_remaining < nzk) {
3022: i = am - k + 1; /* num of unfactored rows */
3023: i = PetscMin(i * nzk, i * (i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3024: PetscCall(PetscFreeSpaceGet(i, ¤t_space));
3025: reallocs++;
3026: }
3028: /* copy data into free space, then initialize lnk */
3029: PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));
3031: /* add the k-th row into il and jl */
3032: if (nzk - 1 > 0) {
3033: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3034: jl[k] = jl[i];
3035: jl[i] = k;
3036: il[k] = ui[k] + 1;
3037: }
3038: ui_ptr[k] = current_space->array;
3040: current_space->array += nzk;
3041: current_space->local_used += nzk;
3042: current_space->local_remaining -= nzk;
3044: ui[k + 1] = ui[k] + nzk;
3045: }
3047: #if defined(PETSC_USE_INFO)
3048: if (ai[am] != 0) {
3049: PetscReal af = (PetscReal)(ui[am]) / ((PetscReal)ai[am]);
3050: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
3051: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
3052: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
3053: } else {
3054: PetscCall(PetscInfo(A, "Empty matrix\n"));
3055: }
3056: #endif
3058: PetscCall(ISRestoreIndices(perm, &rip));
3059: PetscCall(ISRestoreIndices(iperm, &riip));
3060: PetscCall(PetscFree4(ui_ptr, jl, il, cols));
3062: /* destroy list of free space and other temporary array(s) */
3063: PetscCall(PetscMalloc1(ui[am] + 1, &uj));
3064: PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
3065: PetscCall(PetscLLDestroy(lnk, lnkbt));
3067: /* put together the new matrix in MATSEQSBAIJ format */
3069: b = (Mat_SeqSBAIJ *)fact->data;
3070: b->singlemalloc = PETSC_FALSE;
3071: b->free_a = PETSC_TRUE;
3072: b->free_ij = PETSC_TRUE;
3074: PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
3076: b->j = uj;
3077: b->i = ui;
3078: b->diag = NULL;
3079: b->ilen = NULL;
3080: b->imax = NULL;
3081: b->row = perm;
3082: b->col = perm;
3084: PetscCall(PetscObjectReference((PetscObject)perm));
3085: PetscCall(PetscObjectReference((PetscObject)perm));
3087: b->icol = iperm;
3088: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3090: PetscCall(PetscMalloc1(am + 1, &b->solve_work));
3091: b->maxnz = b->nz = ui[am];
3093: fact->info.factor_mallocs = reallocs;
3094: fact->info.fill_ratio_given = fill;
3095: if (ai[am] != 0) {
3096: fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]);
3097: } else {
3098: fact->info.fill_ratio_needed = 0.0;
3099: }
3100: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3101: PetscFunctionReturn(PETSC_SUCCESS);
3102: }
3103: #endif
3105: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx)
3106: {
3107: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
3108: PetscInt n = A->rmap->n;
3109: const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag, *vi;
3110: PetscScalar *x, sum;
3111: const PetscScalar *b;
3112: const MatScalar *aa = a->a, *v;
3113: PetscInt i, nz;
3115: PetscFunctionBegin;
3116: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
3118: PetscCall(VecGetArrayRead(bb, &b));
3119: PetscCall(VecGetArrayWrite(xx, &x));
3121: /* forward solve the lower triangular */
3122: x[0] = b[0];
3123: v = aa;
3124: vi = aj;
3125: for (i = 1; i < n; i++) {
3126: nz = ai[i + 1] - ai[i];
3127: sum = b[i];
3128: PetscSparseDenseMinusDot(sum, x, v, vi, nz);
3129: v += nz;
3130: vi += nz;
3131: x[i] = sum;
3132: }
3134: /* backward solve the upper triangular */
3135: for (i = n - 1; i >= 0; i--) {
3136: v = aa + adiag[i + 1] + 1;
3137: vi = aj + adiag[i + 1] + 1;
3138: nz = adiag[i] - adiag[i + 1] - 1;
3139: sum = x[i];
3140: PetscSparseDenseMinusDot(sum, x, v, vi, nz);
3141: x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3142: }
3144: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
3145: PetscCall(VecRestoreArrayRead(bb, &b));
3146: PetscCall(VecRestoreArrayWrite(xx, &x));
3147: PetscFunctionReturn(PETSC_SUCCESS);
3148: }
3150: PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx)
3151: {
3152: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
3153: IS iscol = a->col, isrow = a->row;
3154: PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz;
3155: const PetscInt *rout, *cout, *r, *c;
3156: PetscScalar *x, *tmp, sum;
3157: const PetscScalar *b;
3158: const MatScalar *aa = a->a, *v;
3160: PetscFunctionBegin;
3161: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
3163: PetscCall(VecGetArrayRead(bb, &b));
3164: PetscCall(VecGetArrayWrite(xx, &x));
3165: tmp = a->solve_work;
3167: PetscCall(ISGetIndices(isrow, &rout));
3168: r = rout;
3169: PetscCall(ISGetIndices(iscol, &cout));
3170: c = cout;
3172: /* forward solve the lower triangular */
3173: tmp[0] = b[r[0]];
3174: v = aa;
3175: vi = aj;
3176: for (i = 1; i < n; i++) {
3177: nz = ai[i + 1] - ai[i];
3178: sum = b[r[i]];
3179: PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3180: tmp[i] = sum;
3181: v += nz;
3182: vi += nz;
3183: }
3185: /* backward solve the upper triangular */
3186: for (i = n - 1; i >= 0; i--) {
3187: v = aa + adiag[i + 1] + 1;
3188: vi = aj + adiag[i + 1] + 1;
3189: nz = adiag[i] - adiag[i + 1] - 1;
3190: sum = tmp[i];
3191: PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3192: x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
3193: }
3195: PetscCall(ISRestoreIndices(isrow, &rout));
3196: PetscCall(ISRestoreIndices(iscol, &cout));
3197: PetscCall(VecRestoreArrayRead(bb, &b));
3198: PetscCall(VecRestoreArrayWrite(xx, &x));
3199: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
3200: PetscFunctionReturn(PETSC_SUCCESS);
3201: }
3203: #if 0
3204: // unused
3205: /*
3206: This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3207: */
3208: static PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A, IS isrow, IS iscol, const MatFactorInfo *info, Mat *fact)
3209: {
3210: Mat B = *fact;
3211: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b;
3212: IS isicol;
3213: const PetscInt *r, *ic;
3214: PetscInt i, n = A->rmap->n, *ai = a->i, *aj = a->j, *ajtmp, *adiag;
3215: PetscInt *bi, *bj, *bdiag, *bdiag_rev;
3216: PetscInt row, nzi, nzi_bl, nzi_bu, *im, nzi_al, nzi_au;
3217: PetscInt nlnk, *lnk;
3218: PetscBT lnkbt;
3219: PetscBool row_identity, icol_identity;
3220: MatScalar *aatmp, *pv, *batmp, *ba, *rtmp, *pc, multiplier, *vtmp, diag_tmp;
3221: const PetscInt *ics;
3222: PetscInt j, nz, *pj, *bjtmp, k, ncut, *jtmp;
3223: PetscReal dt = info->dt, shift = info->shiftamount;
3224: PetscInt dtcount = (PetscInt)info->dtcount, nnz_max;
3225: PetscBool missing;
3227: PetscFunctionBegin;
3228: if (dt == (PetscReal)PETSC_DEFAULT) dt = 0.005;
3229: if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5 * a->rmax);
3231: /* symbolic factorization, can be reused */
3232: PetscCall(MatMissingDiagonal(A, &missing, &i));
3233: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
3234: adiag = a->diag;
3236: PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
3238: /* bdiag is location of diagonal in factor */
3239: PetscCall(PetscMalloc1(n + 1, &bdiag)); /* becomes b->diag */
3240: PetscCall(PetscMalloc1(n + 1, &bdiag_rev)); /* temporary */
3242: /* allocate row pointers bi */
3243: PetscCall(PetscMalloc1(2 * n + 2, &bi));
3245: /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3246: if (dtcount > n - 1) dtcount = n - 1; /* diagonal is excluded */
3247: nnz_max = ai[n] + 2 * n * dtcount + 2;
3249: PetscCall(PetscMalloc1(nnz_max + 1, &bj));
3250: PetscCall(PetscMalloc1(nnz_max + 1, &ba));
3252: /* put together the new matrix */
3253: PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
3254: b = (Mat_SeqAIJ *)B->data;
3256: b->free_a = PETSC_TRUE;
3257: b->free_ij = PETSC_TRUE;
3258: b->singlemalloc = PETSC_FALSE;
3260: b->a = ba;
3261: b->j = bj;
3262: b->i = bi;
3263: b->diag = bdiag;
3264: b->ilen = NULL;
3265: b->imax = NULL;
3266: b->row = isrow;
3267: b->col = iscol;
3268: PetscCall(PetscObjectReference((PetscObject)isrow));
3269: PetscCall(PetscObjectReference((PetscObject)iscol));
3270: b->icol = isicol;
3272: PetscCall(PetscMalloc1(n + 1, &b->solve_work));
3273: b->maxnz = nnz_max;
3275: B->factortype = MAT_FACTOR_ILUDT;
3276: B->info.factor_mallocs = 0;
3277: B->info.fill_ratio_given = ((PetscReal)nnz_max) / ((PetscReal)ai[n]);
3278: /* end of symbolic factorization */
3280: PetscCall(ISGetIndices(isrow, &r));
3281: PetscCall(ISGetIndices(isicol, &ic));
3282: ics = ic;
3284: /* linked list for storing column indices of the active row */
3285: nlnk = n + 1;
3286: PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
3288: /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3289: PetscCall(PetscMalloc2(n, &im, n, &jtmp));
3290: /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3291: PetscCall(PetscMalloc2(n, &rtmp, n, &vtmp));
3292: PetscCall(PetscArrayzero(rtmp, n));
3294: bi[0] = 0;
3295: bdiag[0] = nnz_max - 1; /* location of diag[0] in factor B */
3296: bdiag_rev[n] = bdiag[0];
3297: bi[2 * n + 1] = bdiag[0] + 1; /* endof bj and ba array */
3298: for (i = 0; i < n; i++) {
3299: /* copy initial fill into linked list */
3300: nzi = ai[r[i] + 1] - ai[r[i]];
3301: PetscCheck(nzi, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
3302: nzi_al = adiag[r[i]] - ai[r[i]];
3303: nzi_au = ai[r[i] + 1] - adiag[r[i]] - 1;
3304: ajtmp = aj + ai[r[i]];
3305: PetscCall(PetscLLAddPerm(nzi, ajtmp, ic, n, &nlnk, lnk, lnkbt));
3307: /* load in initial (unfactored row) */
3308: aatmp = a->a + ai[r[i]];
3309: for (j = 0; j < nzi; j++) rtmp[ics[*ajtmp++]] = *aatmp++;
3311: /* add pivot rows into linked list */
3312: row = lnk[n];
3313: while (row < i) {
3314: nzi_bl = bi[row + 1] - bi[row] + 1;
3315: bjtmp = bj + bdiag[row + 1] + 1; /* points to 1st column next to the diagonal in U */
3316: PetscCall(PetscLLAddSortedLU(bjtmp, row, &nlnk, lnk, lnkbt, i, nzi_bl, im));
3317: nzi += nlnk;
3318: row = lnk[row];
3319: }
3321: /* copy data from lnk into jtmp, then initialize lnk */
3322: PetscCall(PetscLLClean(n, n, nzi, lnk, jtmp, lnkbt));
3324: /* numerical factorization */
3325: bjtmp = jtmp;
3326: row = *bjtmp++; /* 1st pivot row */
3327: while (row < i) {
3328: pc = rtmp + row;
3329: pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3330: multiplier = (*pc) * (*pv);
3331: *pc = multiplier;
3332: if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
3333: pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
3334: pv = ba + bdiag[row + 1] + 1;
3335: nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
3336: for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3337: PetscCall(PetscLogFlops(1 + 2.0 * nz));
3338: }
3339: row = *bjtmp++;
3340: }
3342: /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3343: diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */
3344: nzi_bl = 0;
3345: j = 0;
3346: while (jtmp[j] < i) { /* Note: jtmp is sorted */
3347: vtmp[j] = rtmp[jtmp[j]];
3348: rtmp[jtmp[j]] = 0.0;
3349: nzi_bl++;
3350: j++;
3351: }
3352: nzi_bu = nzi - nzi_bl - 1;
3353: while (j < nzi) {
3354: vtmp[j] = rtmp[jtmp[j]];
3355: rtmp[jtmp[j]] = 0.0;
3356: j++;
3357: }
3359: bjtmp = bj + bi[i];
3360: batmp = ba + bi[i];
3361: /* apply level dropping rule to L part */
3362: ncut = nzi_al + dtcount;
3363: if (ncut < nzi_bl) {
3364: PetscCall(PetscSortSplit(ncut, nzi_bl, vtmp, jtmp));
3365: PetscCall(PetscSortIntWithScalarArray(ncut, jtmp, vtmp));
3366: } else {
3367: ncut = nzi_bl;
3368: }
3369: for (j = 0; j < ncut; j++) {
3370: bjtmp[j] = jtmp[j];
3371: batmp[j] = vtmp[j];
3372: }
3373: bi[i + 1] = bi[i] + ncut;
3374: nzi = ncut + 1;
3376: /* apply level dropping rule to U part */
3377: ncut = nzi_au + dtcount;
3378: if (ncut < nzi_bu) {
3379: PetscCall(PetscSortSplit(ncut, nzi_bu, vtmp + nzi_bl + 1, jtmp + nzi_bl + 1));
3380: PetscCall(PetscSortIntWithScalarArray(ncut, jtmp + nzi_bl + 1, vtmp + nzi_bl + 1));
3381: } else {
3382: ncut = nzi_bu;
3383: }
3384: nzi += ncut;
3386: /* mark bdiagonal */
3387: bdiag[i + 1] = bdiag[i] - (ncut + 1);
3388: bdiag_rev[n - i - 1] = bdiag[i + 1];
3389: bi[2 * n - i] = bi[2 * n - i + 1] - (ncut + 1);
3390: bjtmp = bj + bdiag[i];
3391: batmp = ba + bdiag[i];
3392: *bjtmp = i;
3393: *batmp = diag_tmp; /* rtmp[i]; */
3394: if (*batmp == 0.0) *batmp = dt + shift;
3395: *batmp = 1.0 / (*batmp); /* invert diagonal entries for simpler triangular solves */
3397: bjtmp = bj + bdiag[i + 1] + 1;
3398: batmp = ba + bdiag[i + 1] + 1;
3399: for (k = 0; k < ncut; k++) {
3400: bjtmp[k] = jtmp[nzi_bl + 1 + k];
3401: batmp[k] = vtmp[nzi_bl + 1 + k];
3402: }
3404: im[i] = nzi; /* used by PetscLLAddSortedLU() */
3405: } /* for (i=0; i<n; i++) */
3406: PetscCheck(bi[n] < bdiag[n], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "end of L array %" PetscInt_FMT " cannot >= the beginning of U array %" PetscInt_FMT, bi[n], bdiag[n]);
3408: PetscCall(ISRestoreIndices(isrow, &r));
3409: PetscCall(ISRestoreIndices(isicol, &ic));
3411: PetscCall(PetscLLDestroy(lnk, lnkbt));
3412: PetscCall(PetscFree2(im, jtmp));
3413: PetscCall(PetscFree2(rtmp, vtmp));
3414: PetscCall(PetscFree(bdiag_rev));
3416: PetscCall(PetscLogFlops(B->cmap->n));
3417: b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3419: PetscCall(ISIdentity(isrow, &row_identity));
3420: PetscCall(ISIdentity(isicol, &icol_identity));
3421: if (row_identity && icol_identity) {
3422: B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3423: } else {
3424: B->ops->solve = MatSolve_SeqAIJ;
3425: }
3427: B->ops->solveadd = NULL;
3428: B->ops->solvetranspose = NULL;
3429: B->ops->solvetransposeadd = NULL;
3430: B->ops->matsolve = NULL;
3431: B->assembled = PETSC_TRUE;
3432: B->preallocated = PETSC_TRUE;
3433: PetscFunctionReturn(PETSC_SUCCESS);
3434: }
3436: /* a wrapper of MatILUDTFactor_SeqAIJ() */
3437: /*
3438: This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3439: */
3441: static PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info)
3442: {
3443: PetscFunctionBegin;
3444: PetscCall(MatILUDTFactor_SeqAIJ(A, row, col, info, &fact));
3445: PetscFunctionReturn(PETSC_SUCCESS);
3446: }
3448: /*
3449: same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3450: - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3451: */
3452: /*
3453: This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3454: */
3456: static PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact, Mat A, const MatFactorInfo *info)
3457: {
3458: Mat C = fact;
3459: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
3460: IS isrow = b->row, isicol = b->icol;
3461: const PetscInt *r, *ic, *ics;
3462: PetscInt i, j, k, n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
3463: PetscInt *ajtmp, *bjtmp, nz, nzl, nzu, row, *bdiag = b->diag, *pj;
3464: MatScalar *rtmp, *pc, multiplier, *v, *pv, *aa = a->a;
3465: PetscReal dt = info->dt, shift = info->shiftamount;
3466: PetscBool row_identity, col_identity;
3468: PetscFunctionBegin;
3469: PetscCall(ISGetIndices(isrow, &r));
3470: PetscCall(ISGetIndices(isicol, &ic));
3471: PetscCall(PetscMalloc1(n + 1, &rtmp));
3472: ics = ic;
3474: for (i = 0; i < n; i++) {
3475: /* initialize rtmp array */
3476: nzl = bi[i + 1] - bi[i]; /* num of nonzeros in L(i,:) */
3477: bjtmp = bj + bi[i];
3478: for (j = 0; j < nzl; j++) rtmp[*bjtmp++] = 0.0;
3479: rtmp[i] = 0.0;
3480: nzu = bdiag[i] - bdiag[i + 1]; /* num of nonzeros in U(i,:) */
3481: bjtmp = bj + bdiag[i + 1] + 1;
3482: for (j = 0; j < nzu; j++) rtmp[*bjtmp++] = 0.0;
3484: /* load in initial unfactored row of A */
3485: nz = ai[r[i] + 1] - ai[r[i]];
3486: ajtmp = aj + ai[r[i]];
3487: v = aa + ai[r[i]];
3488: for (j = 0; j < nz; j++) rtmp[ics[*ajtmp++]] = v[j];
3490: /* numerical factorization */
3491: bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */
3492: nzl = bi[i + 1] - bi[i]; /* num of entries in L(i,:) */
3493: k = 0;
3494: while (k < nzl) {
3495: row = *bjtmp++;
3496: pc = rtmp + row;
3497: pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3498: multiplier = (*pc) * (*pv);
3499: *pc = multiplier;
3500: if (PetscAbsScalar(multiplier) > dt) {
3501: pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
3502: pv = b->a + bdiag[row + 1] + 1;
3503: nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
3504: for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3505: PetscCall(PetscLogFlops(1 + 2.0 * nz));
3506: }
3507: k++;
3508: }
3510: /* finished row so stick it into b->a */
3511: /* L-part */
3512: pv = b->a + bi[i];
3513: pj = bj + bi[i];
3514: nzl = bi[i + 1] - bi[i];
3515: for (j = 0; j < nzl; j++) pv[j] = rtmp[pj[j]];
3517: /* diagonal: invert diagonal entries for simpler triangular solves */
3518: if (rtmp[i] == 0.0) rtmp[i] = dt + shift;
3519: b->a[bdiag[i]] = 1.0 / rtmp[i];
3521: /* U-part */
3522: pv = b->a + bdiag[i + 1] + 1;
3523: pj = bj + bdiag[i + 1] + 1;
3524: nzu = bdiag[i] - bdiag[i + 1] - 1;
3525: for (j = 0; j < nzu; j++) pv[j] = rtmp[pj[j]];
3526: }
3528: PetscCall(PetscFree(rtmp));
3529: PetscCall(ISRestoreIndices(isicol, &ic));
3530: PetscCall(ISRestoreIndices(isrow, &r));
3532: PetscCall(ISIdentity(isrow, &row_identity));
3533: PetscCall(ISIdentity(isicol, &col_identity));
3534: if (row_identity && col_identity) {
3535: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3536: } else {
3537: C->ops->solve = MatSolve_SeqAIJ;
3538: }
3539: C->ops->solveadd = NULL;
3540: C->ops->solvetranspose = NULL;
3541: C->ops->solvetransposeadd = NULL;
3542: C->ops->matsolve = NULL;
3543: C->assembled = PETSC_TRUE;
3544: C->preallocated = PETSC_TRUE;
3546: PetscCall(PetscLogFlops(C->cmap->n));
3547: PetscFunctionReturn(PETSC_SUCCESS);
3548: }
3549: #endif