Actual source code: aijfact.c

  1: #include <../src/mat/impls/aij/seq/aij.h>
  2: #include <../src/mat/impls/sbaij/seq/sbaij.h>
  3: #include <petscbt.h>
  4: #include <../src/mat/utils/freespace.h>

  6: /*
  7:       Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix

  9:       This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
 10: */
 11: #if 0
 12: PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat, MatOrderingType type, IS *irow, IS *icol)
 13: {
 14:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)mat->data;
 15:   PetscInt           i, j, jj, k, kk, n = mat->rmap->n, current = 0, newcurrent = 0, *order;
 16:   const PetscInt    *ai = a->i, *aj = a->j;
 17:   const PetscScalar *aa = a->a;
 18:   PetscBool         *done;
 19:   PetscReal          best, past = 0, future;

 21:   PetscFunctionBegin;
 22:   /* pick initial row */
 23:   best = -1;
 24:   for (i = 0; i < n; i++) {
 25:     future = 0.0;
 26:     for (j = ai[i]; j < ai[i + 1]; j++) {
 27:       if (aj[j] != i) future += PetscAbsScalar(aa[j]);
 28:       else past = PetscAbsScalar(aa[j]);
 29:     }
 30:     if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
 31:     if (past / future > best) {
 32:       best    = past / future;
 33:       current = i;
 34:     }
 35:   }

 37:   PetscCall(PetscMalloc1(n, &done));
 38:   PetscCall(PetscArrayzero(done, n));
 39:   PetscCall(PetscMalloc1(n, &order));
 40:   order[0] = current;
 41:   for (i = 0; i < n - 1; i++) {
 42:     done[current] = PETSC_TRUE;
 43:     best          = -1;
 44:     /* loop over all neighbors of current pivot */
 45:     for (j = ai[current]; j < ai[current + 1]; j++) {
 46:       jj = aj[j];
 47:       if (done[jj]) continue;
 48:       /* loop over columns of potential next row computing weights for below and above diagonal */
 49:       past = future = 0.0;
 50:       for (k = ai[jj]; k < ai[jj + 1]; k++) {
 51:         kk = aj[k];
 52:         if (done[kk]) past += PetscAbsScalar(aa[k]);
 53:         else if (kk != jj) future += PetscAbsScalar(aa[k]);
 54:       }
 55:       if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
 56:       if (past / future > best) {
 57:         best       = past / future;
 58:         newcurrent = jj;
 59:       }
 60:     }
 61:     if (best == -1) { /* no neighbors to select from so select best of all that remain */
 62:       best = -1;
 63:       for (k = 0; k < n; k++) {
 64:         if (done[k]) continue;
 65:         future = 0.0;
 66:         past   = 0.0;
 67:         for (j = ai[k]; j < ai[k + 1]; j++) {
 68:           kk = aj[j];
 69:           if (done[kk]) past += PetscAbsScalar(aa[j]);
 70:           else if (kk != k) future += PetscAbsScalar(aa[j]);
 71:         }
 72:         if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
 73:         if (past / future > best) {
 74:           best       = past / future;
 75:           newcurrent = k;
 76:         }
 77:       }
 78:     }
 79:     PetscCheck(current != newcurrent, PETSC_COMM_SELF, PETSC_ERR_PLIB, "newcurrent cannot be current");
 80:     current      = newcurrent;
 81:     order[i + 1] = current;
 82:   }
 83:   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, order, PETSC_COPY_VALUES, irow));
 84:   *icol = *irow;
 85:   PetscCall(PetscObjectReference((PetscObject)*irow));
 86:   PetscCall(PetscFree(done));
 87:   PetscCall(PetscFree(order));
 88:   PetscFunctionReturn(PETSC_SUCCESS);
 89: }
 90: #endif

 92: static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type)
 93: {
 94:   PetscFunctionBegin;
 95:   *type = MATSOLVERPETSC;
 96:   PetscFunctionReturn(PETSC_SUCCESS);
 97: }

 99: PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B)
100: {
101:   PetscInt n = A->rmap->n;

103:   PetscFunctionBegin;
104: #if defined(PETSC_USE_COMPLEX)
105:   if ((ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
106:     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY or MAT_FACTOR_ICC are not supported. Use MAT_FACTOR_LU instead.\n"));
107:     *B = NULL;
108:     PetscFunctionReturn(PETSC_SUCCESS);
109:   }
110: #endif

112:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
113:   PetscCall(MatSetSizes(*B, n, n, n, n));
114:   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
115:     PetscCall(MatSetType(*B, MATSEQAIJ));

117:     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
118:     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;

120:     PetscCall(MatSetBlockSizesFromMats(*B, A, A));
121:     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU]));
122:     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU]));
123:     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT]));
124:   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
125:     PetscCall(MatSetType(*B, MATSEQSBAIJ));
126:     PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL));

128:     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
129:     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
130:     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY]));
131:     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC]));
132:   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
133:   (*B)->factortype = ftype;

135:   PetscCall(PetscFree((*B)->solvertype));
136:   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype));
137:   (*B)->canuseordering = PETSC_TRUE;
138:   PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc));
139:   PetscFunctionReturn(PETSC_SUCCESS);
140: }

142: #if 0
143: // currently unused
144: static PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
145: {
146:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
147:   IS                 isicol;
148:   const PetscInt    *r, *ic;
149:   PetscInt           i, n = A->rmap->n, *ai = a->i, *aj = a->j;
150:   PetscInt          *bi, *bj, *ajtmp;
151:   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
152:   PetscReal          f;
153:   PetscInt           nlnk, *lnk, k, **bi_ptr;
154:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
155:   PetscBT            lnkbt;
156:   PetscBool          missing;

158:   PetscFunctionBegin;
159:   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
160:   PetscCall(MatMissingDiagonal(A, &missing, &i));
161:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);

163:   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
164:   PetscCall(ISGetIndices(isrow, &r));
165:   PetscCall(ISGetIndices(isicol, &ic));

167:   /* get new row pointers */
168:   PetscCall(PetscMalloc1(n + 1, &bi));
169:   bi[0] = 0;

171:   /* bdiag is location of diagonal in factor */
172:   PetscCall(PetscMalloc1(n + 1, &bdiag));
173:   bdiag[0] = 0;

175:   /* linked list for storing column indices of the active row */
176:   nlnk = n + 1;
177:   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));

179:   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));

181:   /* initial FreeSpace size is f*(ai[n]+1) */
182:   f = info->fill;
183:   if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
184:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
185:   current_space = free_space;

187:   for (i = 0; i < n; i++) {
188:     /* copy previous fill into linked list */
189:     nzi   = 0;
190:     nnz   = ai[r[i] + 1] - ai[r[i]];
191:     ajtmp = aj + ai[r[i]];
192:     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
193:     nzi += nlnk;

195:     /* add pivot rows into linked list */
196:     row = lnk[n];
197:     while (row < i) {
198:       nzbd  = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
199:       ajtmp = bi_ptr[row] + nzbd;       /* points to the entry next to the diagonal */
200:       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
201:       nzi += nlnk;
202:       row = lnk[row];
203:     }
204:     bi[i + 1] = bi[i] + nzi;
205:     im[i]     = nzi;

207:     /* mark bdiag */
208:     nzbd = 0;
209:     nnz  = nzi;
210:     k    = lnk[n];
211:     while (nnz-- && k < i) {
212:       nzbd++;
213:       k = lnk[k];
214:     }
215:     bdiag[i] = bi[i] + nzbd;

217:     /* if free space is not available, make more free space */
218:     if (current_space->local_remaining < nzi) {
219:       nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */
220:       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
221:       reallocs++;
222:     }

224:     /* copy data into free space, then initialize lnk */
225:     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));

227:     bi_ptr[i] = current_space->array;
228:     current_space->array += nzi;
229:     current_space->local_used += nzi;
230:     current_space->local_remaining -= nzi;
231:   }
232:   #if defined(PETSC_USE_INFO)
233:   if (ai[n] != 0) {
234:     PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
235:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
236:     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
237:     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
238:     PetscCall(PetscInfo(A, "for best performance.\n"));
239:   } else {
240:     PetscCall(PetscInfo(A, "Empty matrix\n"));
241:   }
242:   #endif

244:   PetscCall(ISRestoreIndices(isrow, &r));
245:   PetscCall(ISRestoreIndices(isicol, &ic));

247:   /* destroy list of free space and other temporary array(s) */
248:   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
249:   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
250:   PetscCall(PetscLLDestroy(lnk, lnkbt));
251:   PetscCall(PetscFree2(bi_ptr, im));

253:   /* put together the new matrix */
254:   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
255:   b = (Mat_SeqAIJ *)(B)->data;

257:   b->free_a       = PETSC_TRUE;
258:   b->free_ij      = PETSC_TRUE;
259:   b->singlemalloc = PETSC_FALSE;

261:   PetscCall(PetscMalloc1(bi[n] + 1, &b->a));
262:   b->j    = bj;
263:   b->i    = bi;
264:   b->diag = bdiag;
265:   b->ilen = NULL;
266:   b->imax = NULL;
267:   b->row  = isrow;
268:   b->col  = iscol;
269:   PetscCall(PetscObjectReference((PetscObject)isrow));
270:   PetscCall(PetscObjectReference((PetscObject)iscol));
271:   b->icol = isicol;
272:   PetscCall(PetscMalloc1(n + 1, &b->solve_work));

274:   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
275:   b->maxnz = b->nz = bi[n];

277:   (B)->factortype            = MAT_FACTOR_LU;
278:   (B)->info.factor_mallocs   = reallocs;
279:   (B)->info.fill_ratio_given = f;

281:   if (ai[n]) {
282:     (B)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
283:   } else {
284:     (B)->info.fill_ratio_needed = 0.0;
285:   }
286:   (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
287:   if (a->inode.size) (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
288:   PetscFunctionReturn(PETSC_SUCCESS);
289: }
290: #endif

292: PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
293: {
294:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
295:   IS                 isicol;
296:   const PetscInt    *r, *ic, *ai = a->i, *aj = a->j, *ajtmp;
297:   PetscInt           i, n = A->rmap->n;
298:   PetscInt          *bi, *bj;
299:   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
300:   PetscReal          f;
301:   PetscInt           nlnk, *lnk, k, **bi_ptr;
302:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
303:   PetscBT            lnkbt;
304:   PetscBool          missing;

306:   PetscFunctionBegin;
307:   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
308:   PetscCall(MatMissingDiagonal(A, &missing, &i));
309:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);

311:   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
312:   PetscCall(ISGetIndices(isrow, &r));
313:   PetscCall(ISGetIndices(isicol, &ic));

315:   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
316:   PetscCall(PetscMalloc1(n + 1, &bi));
317:   PetscCall(PetscMalloc1(n + 1, &bdiag));
318:   bi[0] = bdiag[0] = 0;

320:   /* linked list for storing column indices of the active row */
321:   nlnk = n + 1;
322:   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));

324:   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));

326:   /* initial FreeSpace size is f*(ai[n]+1) */
327:   f = info->fill;
328:   if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
329:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
330:   current_space = free_space;

332:   for (i = 0; i < n; i++) {
333:     /* copy previous fill into linked list */
334:     nzi   = 0;
335:     nnz   = ai[r[i] + 1] - ai[r[i]];
336:     ajtmp = aj + ai[r[i]];
337:     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
338:     nzi += nlnk;

340:     /* add pivot rows into linked list */
341:     row = lnk[n];
342:     while (row < i) {
343:       nzbd  = bdiag[row] + 1;     /* num of entries in the row with column index <= row */
344:       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
345:       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
346:       nzi += nlnk;
347:       row = lnk[row];
348:     }
349:     bi[i + 1] = bi[i] + nzi;
350:     im[i]     = nzi;

352:     /* mark bdiag */
353:     nzbd = 0;
354:     nnz  = nzi;
355:     k    = lnk[n];
356:     while (nnz-- && k < i) {
357:       nzbd++;
358:       k = lnk[k];
359:     }
360:     bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */

362:     /* if free space is not available, make more free space */
363:     if (current_space->local_remaining < nzi) {
364:       /* estimated additional space needed */
365:       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi));
366:       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
367:       reallocs++;
368:     }

370:     /* copy data into free space, then initialize lnk */
371:     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));

373:     bi_ptr[i] = current_space->array;
374:     current_space->array += nzi;
375:     current_space->local_used += nzi;
376:     current_space->local_remaining -= nzi;
377:   }

379:   PetscCall(ISRestoreIndices(isrow, &r));
380:   PetscCall(ISRestoreIndices(isicol, &ic));

382:   /*   copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
383:   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
384:   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
385:   PetscCall(PetscLLDestroy(lnk, lnkbt));
386:   PetscCall(PetscFree2(bi_ptr, im));

388:   /* put together the new matrix */
389:   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
390:   b = (Mat_SeqAIJ *)(B)->data;

392:   b->free_a       = PETSC_TRUE;
393:   b->free_ij      = PETSC_TRUE;
394:   b->singlemalloc = PETSC_FALSE;

396:   PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a));

398:   b->j    = bj;
399:   b->i    = bi;
400:   b->diag = bdiag;
401:   b->ilen = NULL;
402:   b->imax = NULL;
403:   b->row  = isrow;
404:   b->col  = iscol;
405:   PetscCall(PetscObjectReference((PetscObject)isrow));
406:   PetscCall(PetscObjectReference((PetscObject)iscol));
407:   b->icol = isicol;
408:   PetscCall(PetscMalloc1(n + 1, &b->solve_work));

410:   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
411:   b->maxnz = b->nz = bdiag[0] + 1;

413:   B->factortype            = MAT_FACTOR_LU;
414:   B->info.factor_mallocs   = reallocs;
415:   B->info.fill_ratio_given = f;

417:   if (ai[n]) {
418:     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
419:   } else {
420:     B->info.fill_ratio_needed = 0.0;
421:   }
422: #if defined(PETSC_USE_INFO)
423:   if (ai[n] != 0) {
424:     PetscReal af = B->info.fill_ratio_needed;
425:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
426:     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
427:     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
428:     PetscCall(PetscInfo(A, "for best performance.\n"));
429:   } else {
430:     PetscCall(PetscInfo(A, "Empty matrix\n"));
431:   }
432: #endif
433:   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
434:   if (a->inode.size) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
435:   PetscCall(MatSeqAIJCheckInode_FactorLU(B));
436:   PetscFunctionReturn(PETSC_SUCCESS);
437: }

439: /*
440:     Trouble in factorization, should we dump the original matrix?
441: */
442: PetscErrorCode MatFactorDumpMatrix(Mat A)
443: {
444:   PetscBool flg = PETSC_FALSE;

446:   PetscFunctionBegin;
447:   PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL));
448:   if (flg) {
449:     PetscViewer viewer;
450:     char        filename[PETSC_MAX_PATH_LEN];

452:     PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank));
453:     PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer));
454:     PetscCall(MatView(A, viewer));
455:     PetscCall(PetscViewerDestroy(&viewer));
456:   }
457:   PetscFunctionReturn(PETSC_SUCCESS);
458: }

460: PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
461: {
462:   Mat              C = B;
463:   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
464:   IS               isrow = b->row, isicol = b->icol;
465:   const PetscInt  *r, *ic, *ics;
466:   const PetscInt   n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag;
467:   PetscInt         i, j, k, nz, nzL, row, *pj;
468:   const PetscInt  *ajtmp, *bjtmp;
469:   MatScalar       *rtmp, *pc, multiplier, *pv;
470:   const MatScalar *aa = a->a, *v;
471:   PetscBool        row_identity, col_identity;
472:   FactorShiftCtx   sctx;
473:   const PetscInt  *ddiag;
474:   PetscReal        rs;
475:   MatScalar        d;

477:   PetscFunctionBegin;
478:   /* MatPivotSetUp(): initialize shift context sctx */
479:   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));

481:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
482:     ddiag          = a->diag;
483:     sctx.shift_top = info->zeropivot;
484:     for (i = 0; i < n; i++) {
485:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
486:       d  = (aa)[ddiag[i]];
487:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
488:       v  = aa + ai[i];
489:       nz = ai[i + 1] - ai[i];
490:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
491:       if (rs > sctx.shift_top) sctx.shift_top = rs;
492:     }
493:     sctx.shift_top *= 1.1;
494:     sctx.nshift_max = 5;
495:     sctx.shift_lo   = 0.;
496:     sctx.shift_hi   = 1.;
497:   }

499:   PetscCall(ISGetIndices(isrow, &r));
500:   PetscCall(ISGetIndices(isicol, &ic));
501:   PetscCall(PetscMalloc1(n + 1, &rtmp));
502:   ics = ic;

504:   do {
505:     sctx.newshift = PETSC_FALSE;
506:     for (i = 0; i < n; i++) {
507:       /* zero rtmp */
508:       /* L part */
509:       nz    = bi[i + 1] - bi[i];
510:       bjtmp = bj + bi[i];
511:       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;

513:       /* U part */
514:       nz    = bdiag[i] - bdiag[i + 1];
515:       bjtmp = bj + bdiag[i + 1] + 1;
516:       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;

518:       /* load in initial (unfactored row) */
519:       nz    = ai[r[i] + 1] - ai[r[i]];
520:       ajtmp = aj + ai[r[i]];
521:       v     = aa + ai[r[i]];
522:       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
523:       /* ZeropivotApply() */
524:       rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */

526:       /* elimination */
527:       bjtmp = bj + bi[i];
528:       row   = *bjtmp++;
529:       nzL   = bi[i + 1] - bi[i];
530:       for (k = 0; k < nzL; k++) {
531:         pc = rtmp + row;
532:         if (*pc != 0.0) {
533:           pv         = b->a + bdiag[row];
534:           multiplier = *pc * (*pv);
535:           *pc        = multiplier;

537:           pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */
538:           pv = b->a + bdiag[row + 1] + 1;
539:           nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */

541:           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
542:           PetscCall(PetscLogFlops(1 + 2.0 * nz));
543:         }
544:         row = *bjtmp++;
545:       }

547:       /* finished row so stick it into b->a */
548:       rs = 0.0;
549:       /* L part */
550:       pv = b->a + bi[i];
551:       pj = b->j + bi[i];
552:       nz = bi[i + 1] - bi[i];
553:       for (j = 0; j < nz; j++) {
554:         pv[j] = rtmp[pj[j]];
555:         rs += PetscAbsScalar(pv[j]);
556:       }

558:       /* U part */
559:       pv = b->a + bdiag[i + 1] + 1;
560:       pj = b->j + bdiag[i + 1] + 1;
561:       nz = bdiag[i] - bdiag[i + 1] - 1;
562:       for (j = 0; j < nz; j++) {
563:         pv[j] = rtmp[pj[j]];
564:         rs += PetscAbsScalar(pv[j]);
565:       }

567:       sctx.rs = rs;
568:       sctx.pv = rtmp[i];
569:       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
570:       if (sctx.newshift) break; /* break for-loop */
571:       rtmp[i] = sctx.pv;        /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */

573:       /* Mark diagonal and invert diagonal for simpler triangular solves */
574:       pv  = b->a + bdiag[i];
575:       *pv = 1.0 / rtmp[i];

577:     } /* endof for (i=0; i<n; i++) { */

579:     /* MatPivotRefine() */
580:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
581:       /*
582:        * if no shift in this attempt & shifting & started shifting & can refine,
583:        * then try lower shift
584:        */
585:       sctx.shift_hi       = sctx.shift_fraction;
586:       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
587:       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
588:       sctx.newshift       = PETSC_TRUE;
589:       sctx.nshift++;
590:     }
591:   } while (sctx.newshift);

593:   PetscCall(PetscFree(rtmp));
594:   PetscCall(ISRestoreIndices(isicol, &ic));
595:   PetscCall(ISRestoreIndices(isrow, &r));

597:   PetscCall(ISIdentity(isrow, &row_identity));
598:   PetscCall(ISIdentity(isicol, &col_identity));
599:   if (b->inode.size) {
600:     C->ops->solve = MatSolve_SeqAIJ_Inode;
601:   } else if (row_identity && col_identity) {
602:     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
603:   } else {
604:     C->ops->solve = MatSolve_SeqAIJ;
605:   }
606:   C->ops->solveadd          = MatSolveAdd_SeqAIJ;
607:   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
608:   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
609:   C->ops->matsolve          = MatMatSolve_SeqAIJ;
610:   C->assembled              = PETSC_TRUE;
611:   C->preallocated           = PETSC_TRUE;

613:   PetscCall(PetscLogFlops(C->cmap->n));

615:   /* MatShiftView(A,info,&sctx) */
616:   if (sctx.nshift) {
617:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
618:       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
619:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
620:       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
621:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
622:       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
623:     }
624:   }
625:   PetscFunctionReturn(PETSC_SUCCESS);
626: }

628: static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat, Mat, Mat);
629: static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat, Vec, Vec);
630: static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat, Vec, Vec, Vec);

632: PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
633: {
634:   Mat              C = B;
635:   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
636:   IS               isrow = b->row, isicol = b->icol;
637:   const PetscInt  *r, *ic, *ics;
638:   PetscInt         nz, row, i, j, n = A->rmap->n, diag;
639:   const PetscInt  *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
640:   const PetscInt  *ajtmp, *bjtmp, *diag_offset = b->diag, *pj;
641:   MatScalar       *pv, *rtmp, *pc, multiplier, d;
642:   const MatScalar *v, *aa = a->a;
643:   PetscReal        rs = 0.0;
644:   FactorShiftCtx   sctx;
645:   const PetscInt  *ddiag;
646:   PetscBool        row_identity, col_identity;

648:   PetscFunctionBegin;
649:   /* MatPivotSetUp(): initialize shift context sctx */
650:   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));

652:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
653:     ddiag          = a->diag;
654:     sctx.shift_top = info->zeropivot;
655:     for (i = 0; i < n; i++) {
656:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
657:       d  = (aa)[ddiag[i]];
658:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
659:       v  = aa + ai[i];
660:       nz = ai[i + 1] - ai[i];
661:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
662:       if (rs > sctx.shift_top) sctx.shift_top = rs;
663:     }
664:     sctx.shift_top *= 1.1;
665:     sctx.nshift_max = 5;
666:     sctx.shift_lo   = 0.;
667:     sctx.shift_hi   = 1.;
668:   }

670:   PetscCall(ISGetIndices(isrow, &r));
671:   PetscCall(ISGetIndices(isicol, &ic));
672:   PetscCall(PetscMalloc1(n + 1, &rtmp));
673:   ics = ic;

675:   do {
676:     sctx.newshift = PETSC_FALSE;
677:     for (i = 0; i < n; i++) {
678:       nz    = bi[i + 1] - bi[i];
679:       bjtmp = bj + bi[i];
680:       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;

682:       /* load in initial (unfactored row) */
683:       nz    = ai[r[i] + 1] - ai[r[i]];
684:       ajtmp = aj + ai[r[i]];
685:       v     = aa + ai[r[i]];
686:       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
687:       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */

689:       row = *bjtmp++;
690:       while (row < i) {
691:         pc = rtmp + row;
692:         if (*pc != 0.0) {
693:           pv         = b->a + diag_offset[row];
694:           pj         = b->j + diag_offset[row] + 1;
695:           multiplier = *pc / *pv++;
696:           *pc        = multiplier;
697:           nz         = bi[row + 1] - diag_offset[row] - 1;
698:           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
699:           PetscCall(PetscLogFlops(1 + 2.0 * nz));
700:         }
701:         row = *bjtmp++;
702:       }
703:       /* finished row so stick it into b->a */
704:       pv   = b->a + bi[i];
705:       pj   = b->j + bi[i];
706:       nz   = bi[i + 1] - bi[i];
707:       diag = diag_offset[i] - bi[i];
708:       rs   = 0.0;
709:       for (j = 0; j < nz; j++) {
710:         pv[j] = rtmp[pj[j]];
711:         rs += PetscAbsScalar(pv[j]);
712:       }
713:       rs -= PetscAbsScalar(pv[diag]);

715:       sctx.rs = rs;
716:       sctx.pv = pv[diag];
717:       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
718:       if (sctx.newshift) break;
719:       pv[diag] = sctx.pv;
720:     }

722:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
723:       /*
724:        * if no shift in this attempt & shifting & started shifting & can refine,
725:        * then try lower shift
726:        */
727:       sctx.shift_hi       = sctx.shift_fraction;
728:       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
729:       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
730:       sctx.newshift       = PETSC_TRUE;
731:       sctx.nshift++;
732:     }
733:   } while (sctx.newshift);

735:   /* invert diagonal entries for simpler triangular solves */
736:   for (i = 0; i < n; i++) b->a[diag_offset[i]] = 1.0 / b->a[diag_offset[i]];
737:   PetscCall(PetscFree(rtmp));
738:   PetscCall(ISRestoreIndices(isicol, &ic));
739:   PetscCall(ISRestoreIndices(isrow, &r));

741:   PetscCall(ISIdentity(isrow, &row_identity));
742:   PetscCall(ISIdentity(isicol, &col_identity));
743:   if (row_identity && col_identity) {
744:     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
745:   } else {
746:     C->ops->solve = MatSolve_SeqAIJ_inplace;
747:   }
748:   C->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
749:   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
750:   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
751:   C->ops->matsolve          = MatMatSolve_SeqAIJ_inplace;

753:   C->assembled    = PETSC_TRUE;
754:   C->preallocated = PETSC_TRUE;

756:   PetscCall(PetscLogFlops(C->cmap->n));
757:   if (sctx.nshift) {
758:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
759:       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
760:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
761:       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
762:     }
763:   }
764:   (C)->ops->solve          = MatSolve_SeqAIJ_inplace;
765:   (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;

767:   PetscCall(MatSeqAIJCheckInode(C));
768:   PetscFunctionReturn(PETSC_SUCCESS);
769: }

771: static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat, Vec, Vec);

773: /*
774:    This routine implements inplace ILU(0) with row or/and column permutations.
775:    Input:
776:      A - original matrix
777:    Output;
778:      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
779:          a->j (col index) is permuted by the inverse of colperm, then sorted
780:          a->a reordered accordingly with a->j
781:          a->diag (ptr to diagonal elements) is updated.
782: */
783: PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info)
784: {
785:   Mat_SeqAIJ      *a     = (Mat_SeqAIJ *)A->data;
786:   IS               isrow = a->row, isicol = a->icol;
787:   const PetscInt  *r, *ic, *ics;
788:   PetscInt         i, j, n = A->rmap->n, *ai = a->i, *aj = a->j;
789:   PetscInt        *ajtmp, nz, row;
790:   PetscInt        *diag = a->diag, nbdiag, *pj;
791:   PetscScalar     *rtmp, *pc, multiplier, d;
792:   MatScalar       *pv, *v;
793:   PetscReal        rs;
794:   FactorShiftCtx   sctx;
795:   const MatScalar *aa = a->a, *vtmp;

797:   PetscFunctionBegin;
798:   PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address");

800:   /* MatPivotSetUp(): initialize shift context sctx */
801:   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));

803:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
804:     const PetscInt *ddiag = a->diag;
805:     sctx.shift_top        = info->zeropivot;
806:     for (i = 0; i < n; i++) {
807:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
808:       d    = (aa)[ddiag[i]];
809:       rs   = -PetscAbsScalar(d) - PetscRealPart(d);
810:       vtmp = aa + ai[i];
811:       nz   = ai[i + 1] - ai[i];
812:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]);
813:       if (rs > sctx.shift_top) sctx.shift_top = rs;
814:     }
815:     sctx.shift_top *= 1.1;
816:     sctx.nshift_max = 5;
817:     sctx.shift_lo   = 0.;
818:     sctx.shift_hi   = 1.;
819:   }

821:   PetscCall(ISGetIndices(isrow, &r));
822:   PetscCall(ISGetIndices(isicol, &ic));
823:   PetscCall(PetscMalloc1(n + 1, &rtmp));
824:   PetscCall(PetscArrayzero(rtmp, n + 1));
825:   ics = ic;

827: #if defined(MV)
828:   sctx.shift_top      = 0.;
829:   sctx.nshift_max     = 0;
830:   sctx.shift_lo       = 0.;
831:   sctx.shift_hi       = 0.;
832:   sctx.shift_fraction = 0.;

834:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
835:     sctx.shift_top = 0.;
836:     for (i = 0; i < n; i++) {
837:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
838:       d  = (a->a)[diag[i]];
839:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
840:       v  = a->a + ai[i];
841:       nz = ai[i + 1] - ai[i];
842:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
843:       if (rs > sctx.shift_top) sctx.shift_top = rs;
844:     }
845:     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
846:     sctx.shift_top *= 1.1;
847:     sctx.nshift_max = 5;
848:     sctx.shift_lo   = 0.;
849:     sctx.shift_hi   = 1.;
850:   }

852:   sctx.shift_amount = 0.;
853:   sctx.nshift       = 0;
854: #endif

856:   do {
857:     sctx.newshift = PETSC_FALSE;
858:     for (i = 0; i < n; i++) {
859:       /* load in initial unfactored row */
860:       nz    = ai[r[i] + 1] - ai[r[i]];
861:       ajtmp = aj + ai[r[i]];
862:       v     = a->a + ai[r[i]];
863:       /* sort permuted ajtmp and values v accordingly */
864:       for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]];
865:       PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v));

867:       diag[r[i]] = ai[r[i]];
868:       for (j = 0; j < nz; j++) {
869:         rtmp[ajtmp[j]] = v[j];
870:         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
871:       }
872:       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */

874:       row = *ajtmp++;
875:       while (row < i) {
876:         pc = rtmp + row;
877:         if (*pc != 0.0) {
878:           pv = a->a + diag[r[row]];
879:           pj = aj + diag[r[row]] + 1;

881:           multiplier = *pc / *pv++;
882:           *pc        = multiplier;
883:           nz         = ai[r[row] + 1] - diag[r[row]] - 1;
884:           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
885:           PetscCall(PetscLogFlops(1 + 2.0 * nz));
886:         }
887:         row = *ajtmp++;
888:       }
889:       /* finished row so overwrite it onto a->a */
890:       pv     = a->a + ai[r[i]];
891:       pj     = aj + ai[r[i]];
892:       nz     = ai[r[i] + 1] - ai[r[i]];
893:       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */

895:       rs = 0.0;
896:       for (j = 0; j < nz; j++) {
897:         pv[j] = rtmp[pj[j]];
898:         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
899:       }

901:       sctx.rs = rs;
902:       sctx.pv = pv[nbdiag];
903:       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
904:       if (sctx.newshift) break;
905:       pv[nbdiag] = sctx.pv;
906:     }

908:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
909:       /*
910:        * if no shift in this attempt & shifting & started shifting & can refine,
911:        * then try lower shift
912:        */
913:       sctx.shift_hi       = sctx.shift_fraction;
914:       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
915:       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
916:       sctx.newshift       = PETSC_TRUE;
917:       sctx.nshift++;
918:     }
919:   } while (sctx.newshift);

921:   /* invert diagonal entries for simpler triangular solves */
922:   for (i = 0; i < n; i++) a->a[diag[r[i]]] = 1.0 / a->a[diag[r[i]]];

924:   PetscCall(PetscFree(rtmp));
925:   PetscCall(ISRestoreIndices(isicol, &ic));
926:   PetscCall(ISRestoreIndices(isrow, &r));

928:   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
929:   A->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
930:   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
931:   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;

933:   A->assembled    = PETSC_TRUE;
934:   A->preallocated = PETSC_TRUE;

936:   PetscCall(PetscLogFlops(A->cmap->n));
937:   if (sctx.nshift) {
938:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
939:       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
940:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
941:       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
942:     }
943:   }
944:   PetscFunctionReturn(PETSC_SUCCESS);
945: }

947: PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info)
948: {
949:   Mat C;

951:   PetscFunctionBegin;
952:   PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C));
953:   PetscCall(MatLUFactorSymbolic(C, A, row, col, info));
954:   PetscCall(MatLUFactorNumeric(C, A, info));

956:   A->ops->solve          = C->ops->solve;
957:   A->ops->solvetranspose = C->ops->solvetranspose;

959:   PetscCall(MatHeaderMerge(A, &C));
960:   PetscFunctionReturn(PETSC_SUCCESS);
961: }

963: PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
964: {
965:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
966:   IS                 iscol = a->col, isrow = a->row;
967:   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
968:   PetscInt           nz;
969:   const PetscInt    *rout, *cout, *r, *c;
970:   PetscScalar       *x, *tmp, *tmps, sum;
971:   const PetscScalar *b;
972:   const MatScalar   *aa = a->a, *v;

974:   PetscFunctionBegin;
975:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);

977:   PetscCall(VecGetArrayRead(bb, &b));
978:   PetscCall(VecGetArrayWrite(xx, &x));
979:   tmp = a->solve_work;

981:   PetscCall(ISGetIndices(isrow, &rout));
982:   r = rout;
983:   PetscCall(ISGetIndices(iscol, &cout));
984:   c = cout + (n - 1);

986:   /* forward solve the lower triangular */
987:   tmp[0] = b[*r++];
988:   tmps   = tmp;
989:   for (i = 1; i < n; i++) {
990:     v   = aa + ai[i];
991:     vi  = aj + ai[i];
992:     nz  = a->diag[i] - ai[i];
993:     sum = b[*r++];
994:     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
995:     tmp[i] = sum;
996:   }

998:   /* backward solve the upper triangular */
999:   for (i = n - 1; i >= 0; i--) {
1000:     v   = aa + a->diag[i] + 1;
1001:     vi  = aj + a->diag[i] + 1;
1002:     nz  = ai[i + 1] - a->diag[i] - 1;
1003:     sum = tmp[i];
1004:     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1005:     x[*c--] = tmp[i] = sum * aa[a->diag[i]];
1006:   }

1008:   PetscCall(ISRestoreIndices(isrow, &rout));
1009:   PetscCall(ISRestoreIndices(iscol, &cout));
1010:   PetscCall(VecRestoreArrayRead(bb, &b));
1011:   PetscCall(VecRestoreArrayWrite(xx, &x));
1012:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1013:   PetscFunctionReturn(PETSC_SUCCESS);
1014: }

1016: static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X)
1017: {
1018:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1019:   IS                 iscol = a->col, isrow = a->row;
1020:   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
1021:   PetscInt           nz, neq, ldb, ldx;
1022:   const PetscInt    *rout, *cout, *r, *c;
1023:   PetscScalar       *x, *tmp = a->solve_work, *tmps, sum;
1024:   const PetscScalar *b, *aa  = a->a, *v;
1025:   PetscBool          isdense;

1027:   PetscFunctionBegin;
1028:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1029:   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1030:   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1031:   if (X != B) {
1032:     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1033:     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1034:   }
1035:   PetscCall(MatDenseGetArrayRead(B, &b));
1036:   PetscCall(MatDenseGetLDA(B, &ldb));
1037:   PetscCall(MatDenseGetArray(X, &x));
1038:   PetscCall(MatDenseGetLDA(X, &ldx));
1039:   PetscCall(ISGetIndices(isrow, &rout));
1040:   r = rout;
1041:   PetscCall(ISGetIndices(iscol, &cout));
1042:   c = cout;
1043:   for (neq = 0; neq < B->cmap->n; neq++) {
1044:     /* forward solve the lower triangular */
1045:     tmp[0] = b[r[0]];
1046:     tmps   = tmp;
1047:     for (i = 1; i < n; i++) {
1048:       v   = aa + ai[i];
1049:       vi  = aj + ai[i];
1050:       nz  = a->diag[i] - ai[i];
1051:       sum = b[r[i]];
1052:       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1053:       tmp[i] = sum;
1054:     }
1055:     /* backward solve the upper triangular */
1056:     for (i = n - 1; i >= 0; i--) {
1057:       v   = aa + a->diag[i] + 1;
1058:       vi  = aj + a->diag[i] + 1;
1059:       nz  = ai[i + 1] - a->diag[i] - 1;
1060:       sum = tmp[i];
1061:       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1062:       x[c[i]] = tmp[i] = sum * aa[a->diag[i]];
1063:     }
1064:     b += ldb;
1065:     x += ldx;
1066:   }
1067:   PetscCall(ISRestoreIndices(isrow, &rout));
1068:   PetscCall(ISRestoreIndices(iscol, &cout));
1069:   PetscCall(MatDenseRestoreArrayRead(B, &b));
1070:   PetscCall(MatDenseRestoreArray(X, &x));
1071:   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1072:   PetscFunctionReturn(PETSC_SUCCESS);
1073: }

1075: PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X)
1076: {
1077:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1078:   IS                 iscol = a->col, isrow = a->row;
1079:   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
1080:   PetscInt           nz, neq, ldb, ldx;
1081:   const PetscInt    *rout, *cout, *r, *c;
1082:   PetscScalar       *x, *tmp = a->solve_work, sum;
1083:   const PetscScalar *b, *aa  = a->a, *v;
1084:   PetscBool          isdense;

1086:   PetscFunctionBegin;
1087:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1088:   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1089:   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1090:   if (X != B) {
1091:     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1092:     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1093:   }
1094:   PetscCall(MatDenseGetArrayRead(B, &b));
1095:   PetscCall(MatDenseGetLDA(B, &ldb));
1096:   PetscCall(MatDenseGetArray(X, &x));
1097:   PetscCall(MatDenseGetLDA(X, &ldx));
1098:   PetscCall(ISGetIndices(isrow, &rout));
1099:   r = rout;
1100:   PetscCall(ISGetIndices(iscol, &cout));
1101:   c = cout;
1102:   for (neq = 0; neq < B->cmap->n; neq++) {
1103:     /* forward solve the lower triangular */
1104:     tmp[0] = b[r[0]];
1105:     v      = aa;
1106:     vi     = aj;
1107:     for (i = 1; i < n; i++) {
1108:       nz  = ai[i + 1] - ai[i];
1109:       sum = b[r[i]];
1110:       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
1111:       tmp[i] = sum;
1112:       v += nz;
1113:       vi += nz;
1114:     }
1115:     /* backward solve the upper triangular */
1116:     for (i = n - 1; i >= 0; i--) {
1117:       v   = aa + adiag[i + 1] + 1;
1118:       vi  = aj + adiag[i + 1] + 1;
1119:       nz  = adiag[i] - adiag[i + 1] - 1;
1120:       sum = tmp[i];
1121:       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
1122:       x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
1123:     }
1124:     b += ldb;
1125:     x += ldx;
1126:   }
1127:   PetscCall(ISRestoreIndices(isrow, &rout));
1128:   PetscCall(ISRestoreIndices(iscol, &cout));
1129:   PetscCall(MatDenseRestoreArrayRead(B, &b));
1130:   PetscCall(MatDenseRestoreArray(X, &x));
1131:   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1132:   PetscFunctionReturn(PETSC_SUCCESS);
1133: }

1135: static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx)
1136: {
1137:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1138:   IS                 iscol = a->col, isrow = a->row;
1139:   const PetscInt    *r, *c, *rout, *cout;
1140:   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
1141:   PetscInt           nz, row;
1142:   PetscScalar       *x, *tmp, *tmps, sum;
1143:   const PetscScalar *b;
1144:   const MatScalar   *aa = a->a, *v;

1146:   PetscFunctionBegin;
1147:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);

1149:   PetscCall(VecGetArrayRead(bb, &b));
1150:   PetscCall(VecGetArrayWrite(xx, &x));
1151:   tmp = a->solve_work;

1153:   PetscCall(ISGetIndices(isrow, &rout));
1154:   r = rout;
1155:   PetscCall(ISGetIndices(iscol, &cout));
1156:   c = cout + (n - 1);

1158:   /* forward solve the lower triangular */
1159:   tmp[0] = b[*r++];
1160:   tmps   = tmp;
1161:   for (row = 1; row < n; row++) {
1162:     i   = rout[row]; /* permuted row */
1163:     v   = aa + ai[i];
1164:     vi  = aj + ai[i];
1165:     nz  = a->diag[i] - ai[i];
1166:     sum = b[*r++];
1167:     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1168:     tmp[row] = sum;
1169:   }

1171:   /* backward solve the upper triangular */
1172:   for (row = n - 1; row >= 0; row--) {
1173:     i   = rout[row]; /* permuted row */
1174:     v   = aa + a->diag[i] + 1;
1175:     vi  = aj + a->diag[i] + 1;
1176:     nz  = ai[i + 1] - a->diag[i] - 1;
1177:     sum = tmp[row];
1178:     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1179:     x[*c--] = tmp[row] = sum * aa[a->diag[i]];
1180:   }

1182:   PetscCall(ISRestoreIndices(isrow, &rout));
1183:   PetscCall(ISRestoreIndices(iscol, &cout));
1184:   PetscCall(VecRestoreArrayRead(bb, &b));
1185:   PetscCall(VecRestoreArrayWrite(xx, &x));
1186:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1187:   PetscFunctionReturn(PETSC_SUCCESS);
1188: }

1190: #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1191: static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx)
1192: {
1193:   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
1194:   PetscInt           n  = A->rmap->n;
1195:   const PetscInt    *ai = a->i, *aj = a->j, *adiag = a->diag;
1196:   PetscScalar       *x;
1197:   const PetscScalar *b;
1198:   const MatScalar   *aa = a->a;
1199: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1200:   PetscInt         adiag_i, i, nz, ai_i;
1201:   const PetscInt  *vi;
1202:   const MatScalar *v;
1203:   PetscScalar      sum;
1204: #endif

1206:   PetscFunctionBegin;
1207:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);

1209:   PetscCall(VecGetArrayRead(bb, &b));
1210:   PetscCall(VecGetArrayWrite(xx, &x));

1212: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1213:   fortransolveaij_(&n, x, ai, aj, adiag, aa, b);
1214: #else
1215:   /* forward solve the lower triangular */
1216:   x[0] = b[0];
1217:   for (i = 1; i < n; i++) {
1218:     ai_i = ai[i];
1219:     v    = aa + ai_i;
1220:     vi   = aj + ai_i;
1221:     nz   = adiag[i] - ai_i;
1222:     sum  = b[i];
1223:     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1224:     x[i] = sum;
1225:   }

1227:   /* backward solve the upper triangular */
1228:   for (i = n - 1; i >= 0; i--) {
1229:     adiag_i = adiag[i];
1230:     v       = aa + adiag_i + 1;
1231:     vi      = aj + adiag_i + 1;
1232:     nz      = ai[i + 1] - adiag_i - 1;
1233:     sum     = x[i];
1234:     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1235:     x[i] = sum * aa[adiag_i];
1236:   }
1237: #endif
1238:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1239:   PetscCall(VecRestoreArrayRead(bb, &b));
1240:   PetscCall(VecRestoreArrayWrite(xx, &x));
1241:   PetscFunctionReturn(PETSC_SUCCESS);
1242: }

1244: static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx)
1245: {
1246:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1247:   IS                 iscol = a->col, isrow = a->row;
1248:   PetscInt           i, n                  = A->rmap->n, j;
1249:   PetscInt           nz;
1250:   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j;
1251:   PetscScalar       *x, *tmp, sum;
1252:   const PetscScalar *b;
1253:   const MatScalar   *aa = a->a, *v;

1255:   PetscFunctionBegin;
1256:   if (yy != xx) PetscCall(VecCopy(yy, xx));

1258:   PetscCall(VecGetArrayRead(bb, &b));
1259:   PetscCall(VecGetArray(xx, &x));
1260:   tmp = a->solve_work;

1262:   PetscCall(ISGetIndices(isrow, &rout));
1263:   r = rout;
1264:   PetscCall(ISGetIndices(iscol, &cout));
1265:   c = cout + (n - 1);

1267:   /* forward solve the lower triangular */
1268:   tmp[0] = b[*r++];
1269:   for (i = 1; i < n; i++) {
1270:     v   = aa + ai[i];
1271:     vi  = aj + ai[i];
1272:     nz  = a->diag[i] - ai[i];
1273:     sum = b[*r++];
1274:     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1275:     tmp[i] = sum;
1276:   }

1278:   /* backward solve the upper triangular */
1279:   for (i = n - 1; i >= 0; i--) {
1280:     v   = aa + a->diag[i] + 1;
1281:     vi  = aj + a->diag[i] + 1;
1282:     nz  = ai[i + 1] - a->diag[i] - 1;
1283:     sum = tmp[i];
1284:     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1285:     tmp[i] = sum * aa[a->diag[i]];
1286:     x[*c--] += tmp[i];
1287:   }

1289:   PetscCall(ISRestoreIndices(isrow, &rout));
1290:   PetscCall(ISRestoreIndices(iscol, &cout));
1291:   PetscCall(VecRestoreArrayRead(bb, &b));
1292:   PetscCall(VecRestoreArray(xx, &x));
1293:   PetscCall(PetscLogFlops(2.0 * a->nz));
1294:   PetscFunctionReturn(PETSC_SUCCESS);
1295: }

1297: PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx)
1298: {
1299:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1300:   IS                 iscol = a->col, isrow = a->row;
1301:   PetscInt           i, n                  = A->rmap->n, j;
1302:   PetscInt           nz;
1303:   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
1304:   PetscScalar       *x, *tmp, sum;
1305:   const PetscScalar *b;
1306:   const MatScalar   *aa = a->a, *v;

1308:   PetscFunctionBegin;
1309:   if (yy != xx) PetscCall(VecCopy(yy, xx));

1311:   PetscCall(VecGetArrayRead(bb, &b));
1312:   PetscCall(VecGetArray(xx, &x));
1313:   tmp = a->solve_work;

1315:   PetscCall(ISGetIndices(isrow, &rout));
1316:   r = rout;
1317:   PetscCall(ISGetIndices(iscol, &cout));
1318:   c = cout;

1320:   /* forward solve the lower triangular */
1321:   tmp[0] = b[r[0]];
1322:   v      = aa;
1323:   vi     = aj;
1324:   for (i = 1; i < n; i++) {
1325:     nz  = ai[i + 1] - ai[i];
1326:     sum = b[r[i]];
1327:     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1328:     tmp[i] = sum;
1329:     v += nz;
1330:     vi += nz;
1331:   }

1333:   /* backward solve the upper triangular */
1334:   v  = aa + adiag[n - 1];
1335:   vi = aj + adiag[n - 1];
1336:   for (i = n - 1; i >= 0; i--) {
1337:     nz  = adiag[i] - adiag[i + 1] - 1;
1338:     sum = tmp[i];
1339:     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1340:     tmp[i] = sum * v[nz];
1341:     x[c[i]] += tmp[i];
1342:     v += nz + 1;
1343:     vi += nz + 1;
1344:   }

1346:   PetscCall(ISRestoreIndices(isrow, &rout));
1347:   PetscCall(ISRestoreIndices(iscol, &cout));
1348:   PetscCall(VecRestoreArrayRead(bb, &b));
1349:   PetscCall(VecRestoreArray(xx, &x));
1350:   PetscCall(PetscLogFlops(2.0 * a->nz));
1351:   PetscFunctionReturn(PETSC_SUCCESS);
1352: }

1354: PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
1355: {
1356:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1357:   IS                 iscol = a->col, isrow = a->row;
1358:   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1359:   PetscInt           i, n = A->rmap->n, j;
1360:   PetscInt           nz;
1361:   PetscScalar       *x, *tmp, s1;
1362:   const MatScalar   *aa = a->a, *v;
1363:   const PetscScalar *b;

1365:   PetscFunctionBegin;
1366:   PetscCall(VecGetArrayRead(bb, &b));
1367:   PetscCall(VecGetArrayWrite(xx, &x));
1368:   tmp = a->solve_work;

1370:   PetscCall(ISGetIndices(isrow, &rout));
1371:   r = rout;
1372:   PetscCall(ISGetIndices(iscol, &cout));
1373:   c = cout;

1375:   /* copy the b into temp work space according to permutation */
1376:   for (i = 0; i < n; i++) tmp[i] = b[c[i]];

1378:   /* forward solve the U^T */
1379:   for (i = 0; i < n; i++) {
1380:     v  = aa + diag[i];
1381:     vi = aj + diag[i] + 1;
1382:     nz = ai[i + 1] - diag[i] - 1;
1383:     s1 = tmp[i];
1384:     s1 *= (*v++); /* multiply by inverse of diagonal entry */
1385:     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1386:     tmp[i] = s1;
1387:   }

1389:   /* backward solve the L^T */
1390:   for (i = n - 1; i >= 0; i--) {
1391:     v  = aa + diag[i] - 1;
1392:     vi = aj + diag[i] - 1;
1393:     nz = diag[i] - ai[i];
1394:     s1 = tmp[i];
1395:     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1396:   }

1398:   /* copy tmp into x according to permutation */
1399:   for (i = 0; i < n; i++) x[r[i]] = tmp[i];

1401:   PetscCall(ISRestoreIndices(isrow, &rout));
1402:   PetscCall(ISRestoreIndices(iscol, &cout));
1403:   PetscCall(VecRestoreArrayRead(bb, &b));
1404:   PetscCall(VecRestoreArrayWrite(xx, &x));

1406:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1407:   PetscFunctionReturn(PETSC_SUCCESS);
1408: }

1410: PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx)
1411: {
1412:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1413:   IS                 iscol = a->col, isrow = a->row;
1414:   const PetscInt    *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1415:   PetscInt           i, n = A->rmap->n, j;
1416:   PetscInt           nz;
1417:   PetscScalar       *x, *tmp, s1;
1418:   const MatScalar   *aa = a->a, *v;
1419:   const PetscScalar *b;

1421:   PetscFunctionBegin;
1422:   PetscCall(VecGetArrayRead(bb, &b));
1423:   PetscCall(VecGetArrayWrite(xx, &x));
1424:   tmp = a->solve_work;

1426:   PetscCall(ISGetIndices(isrow, &rout));
1427:   r = rout;
1428:   PetscCall(ISGetIndices(iscol, &cout));
1429:   c = cout;

1431:   /* copy the b into temp work space according to permutation */
1432:   for (i = 0; i < n; i++) tmp[i] = b[c[i]];

1434:   /* forward solve the U^T */
1435:   for (i = 0; i < n; i++) {
1436:     v  = aa + adiag[i + 1] + 1;
1437:     vi = aj + adiag[i + 1] + 1;
1438:     nz = adiag[i] - adiag[i + 1] - 1;
1439:     s1 = tmp[i];
1440:     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1441:     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1442:     tmp[i] = s1;
1443:   }

1445:   /* backward solve the L^T */
1446:   for (i = n - 1; i >= 0; i--) {
1447:     v  = aa + ai[i];
1448:     vi = aj + ai[i];
1449:     nz = ai[i + 1] - ai[i];
1450:     s1 = tmp[i];
1451:     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1452:   }

1454:   /* copy tmp into x according to permutation */
1455:   for (i = 0; i < n; i++) x[r[i]] = tmp[i];

1457:   PetscCall(ISRestoreIndices(isrow, &rout));
1458:   PetscCall(ISRestoreIndices(iscol, &cout));
1459:   PetscCall(VecRestoreArrayRead(bb, &b));
1460:   PetscCall(VecRestoreArrayWrite(xx, &x));

1462:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1463:   PetscFunctionReturn(PETSC_SUCCESS);
1464: }

1466: PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx)
1467: {
1468:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1469:   IS                 iscol = a->col, isrow = a->row;
1470:   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1471:   PetscInt           i, n = A->rmap->n, j;
1472:   PetscInt           nz;
1473:   PetscScalar       *x, *tmp, s1;
1474:   const MatScalar   *aa = a->a, *v;
1475:   const PetscScalar *b;

1477:   PetscFunctionBegin;
1478:   if (zz != xx) PetscCall(VecCopy(zz, xx));
1479:   PetscCall(VecGetArrayRead(bb, &b));
1480:   PetscCall(VecGetArray(xx, &x));
1481:   tmp = a->solve_work;

1483:   PetscCall(ISGetIndices(isrow, &rout));
1484:   r = rout;
1485:   PetscCall(ISGetIndices(iscol, &cout));
1486:   c = cout;

1488:   /* copy the b into temp work space according to permutation */
1489:   for (i = 0; i < n; i++) tmp[i] = b[c[i]];

1491:   /* forward solve the U^T */
1492:   for (i = 0; i < n; i++) {
1493:     v  = aa + diag[i];
1494:     vi = aj + diag[i] + 1;
1495:     nz = ai[i + 1] - diag[i] - 1;
1496:     s1 = tmp[i];
1497:     s1 *= (*v++); /* multiply by inverse of diagonal entry */
1498:     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1499:     tmp[i] = s1;
1500:   }

1502:   /* backward solve the L^T */
1503:   for (i = n - 1; i >= 0; i--) {
1504:     v  = aa + diag[i] - 1;
1505:     vi = aj + diag[i] - 1;
1506:     nz = diag[i] - ai[i];
1507:     s1 = tmp[i];
1508:     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1509:   }

1511:   /* copy tmp into x according to permutation */
1512:   for (i = 0; i < n; i++) x[r[i]] += tmp[i];

1514:   PetscCall(ISRestoreIndices(isrow, &rout));
1515:   PetscCall(ISRestoreIndices(iscol, &cout));
1516:   PetscCall(VecRestoreArrayRead(bb, &b));
1517:   PetscCall(VecRestoreArray(xx, &x));

1519:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1520:   PetscFunctionReturn(PETSC_SUCCESS);
1521: }

1523: PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx)
1524: {
1525:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1526:   IS                 iscol = a->col, isrow = a->row;
1527:   const PetscInt    *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1528:   PetscInt           i, n = A->rmap->n, j;
1529:   PetscInt           nz;
1530:   PetscScalar       *x, *tmp, s1;
1531:   const MatScalar   *aa = a->a, *v;
1532:   const PetscScalar *b;

1534:   PetscFunctionBegin;
1535:   if (zz != xx) PetscCall(VecCopy(zz, xx));
1536:   PetscCall(VecGetArrayRead(bb, &b));
1537:   PetscCall(VecGetArray(xx, &x));
1538:   tmp = a->solve_work;

1540:   PetscCall(ISGetIndices(isrow, &rout));
1541:   r = rout;
1542:   PetscCall(ISGetIndices(iscol, &cout));
1543:   c = cout;

1545:   /* copy the b into temp work space according to permutation */
1546:   for (i = 0; i < n; i++) tmp[i] = b[c[i]];

1548:   /* forward solve the U^T */
1549:   for (i = 0; i < n; i++) {
1550:     v  = aa + adiag[i + 1] + 1;
1551:     vi = aj + adiag[i + 1] + 1;
1552:     nz = adiag[i] - adiag[i + 1] - 1;
1553:     s1 = tmp[i];
1554:     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1555:     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1556:     tmp[i] = s1;
1557:   }

1559:   /* backward solve the L^T */
1560:   for (i = n - 1; i >= 0; i--) {
1561:     v  = aa + ai[i];
1562:     vi = aj + ai[i];
1563:     nz = ai[i + 1] - ai[i];
1564:     s1 = tmp[i];
1565:     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1566:   }

1568:   /* copy tmp into x according to permutation */
1569:   for (i = 0; i < n; i++) x[r[i]] += tmp[i];

1571:   PetscCall(ISRestoreIndices(isrow, &rout));
1572:   PetscCall(ISRestoreIndices(iscol, &cout));
1573:   PetscCall(VecRestoreArrayRead(bb, &b));
1574:   PetscCall(VecRestoreArray(xx, &x));

1576:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1577:   PetscFunctionReturn(PETSC_SUCCESS);
1578: }

1580: /*
1581:    ilu() under revised new data structure.
1582:    Factored arrays bj and ba are stored as
1583:      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)

1585:    bi=fact->i is an array of size n+1, in which
1586:      bi[i]:  points to 1st entry of L(i,:),i=0,...,n-1
1587:      bi[n]:  points to L(n-1,n-1)+1

1589:   bdiag=fact->diag is an array of size n+1,in which
1590:      bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1591:      bdiag[n]: points to entry of U(n-1,0)-1

1593:    U(i,:) contains bdiag[i] as its last entry, i.e.,
1594:     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1595: */
1596: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1597: {
1598:   Mat_SeqAIJ    *a = (Mat_SeqAIJ *)A->data, *b;
1599:   const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag = a->diag;
1600:   PetscInt       i, j, k = 0, nz, *bi, *bj, *bdiag;
1601:   IS             isicol;

1603:   PetscFunctionBegin;
1604:   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1605:   PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE));
1606:   b = (Mat_SeqAIJ *)(fact)->data;

1608:   /* allocate matrix arrays for new data structure */
1609:   PetscCall(PetscMalloc3(ai[n] + 1, &b->a, ai[n] + 1, &b->j, n + 1, &b->i));

1611:   b->singlemalloc = PETSC_TRUE;
1612:   if (!b->diag) PetscCall(PetscMalloc1(n + 1, &b->diag));
1613:   bdiag = b->diag;

1615:   if (n > 0) PetscCall(PetscArrayzero(b->a, ai[n]));

1617:   /* set bi and bj with new data structure */
1618:   bi = b->i;
1619:   bj = b->j;

1621:   /* L part */
1622:   bi[0] = 0;
1623:   for (i = 0; i < n; i++) {
1624:     nz        = adiag[i] - ai[i];
1625:     bi[i + 1] = bi[i] + nz;
1626:     aj        = a->j + ai[i];
1627:     for (j = 0; j < nz; j++) bj[k++] = aj[j];
1628:   }

1630:   /* U part */
1631:   bdiag[n] = bi[n] - 1;
1632:   for (i = n - 1; i >= 0; i--) {
1633:     nz = ai[i + 1] - adiag[i] - 1;
1634:     aj = a->j + adiag[i] + 1;
1635:     for (j = 0; j < nz; j++) bj[k++] = aj[j];
1636:     /* diag[i] */
1637:     bj[k++]  = i;
1638:     bdiag[i] = bdiag[i + 1] + nz + 1;
1639:   }

1641:   fact->factortype             = MAT_FACTOR_ILU;
1642:   fact->info.factor_mallocs    = 0;
1643:   fact->info.fill_ratio_given  = info->fill;
1644:   fact->info.fill_ratio_needed = 1.0;
1645:   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1646:   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));

1648:   b       = (Mat_SeqAIJ *)(fact)->data;
1649:   b->row  = isrow;
1650:   b->col  = iscol;
1651:   b->icol = isicol;
1652:   PetscCall(PetscMalloc1(fact->rmap->n + 1, &b->solve_work));
1653:   PetscCall(PetscObjectReference((PetscObject)isrow));
1654:   PetscCall(PetscObjectReference((PetscObject)iscol));
1655:   PetscFunctionReturn(PETSC_SUCCESS);
1656: }

1658: PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1659: {
1660:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
1661:   IS                 isicol;
1662:   const PetscInt    *r, *ic;
1663:   PetscInt           n = A->rmap->n, *ai = a->i, *aj = a->j;
1664:   PetscInt          *bi, *cols, nnz, *cols_lvl;
1665:   PetscInt          *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1666:   PetscInt           i, levels, diagonal_fill;
1667:   PetscBool          col_identity, row_identity, missing;
1668:   PetscReal          f;
1669:   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
1670:   PetscBT            lnkbt;
1671:   PetscInt           nzi, *bj, **bj_ptr, **bjlvl_ptr;
1672:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
1673:   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;

1675:   PetscFunctionBegin;
1676:   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
1677:   PetscCall(MatMissingDiagonal(A, &missing, &i));
1678:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);

1680:   levels = (PetscInt)info->levels;
1681:   PetscCall(ISIdentity(isrow, &row_identity));
1682:   PetscCall(ISIdentity(iscol, &col_identity));
1683:   if (!levels && row_identity && col_identity) {
1684:     /* special case: ilu(0) with natural ordering */
1685:     PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info));
1686:     if (a->inode.size) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1687:     PetscFunctionReturn(PETSC_SUCCESS);
1688:   }

1690:   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1691:   PetscCall(ISGetIndices(isrow, &r));
1692:   PetscCall(ISGetIndices(isicol, &ic));

1694:   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1695:   PetscCall(PetscMalloc1(n + 1, &bi));
1696:   PetscCall(PetscMalloc1(n + 1, &bdiag));
1697:   bi[0] = bdiag[0] = 0;
1698:   PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));

1700:   /* create a linked list for storing column indices of the active row */
1701:   nlnk = n + 1;
1702:   PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));

1704:   /* initial FreeSpace size is f*(ai[n]+1) */
1705:   f             = info->fill;
1706:   diagonal_fill = (PetscInt)info->diagonal_fill;
1707:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1708:   current_space = free_space;
1709:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1710:   current_space_lvl = free_space_lvl;
1711:   for (i = 0; i < n; i++) {
1712:     nzi = 0;
1713:     /* copy current row into linked list */
1714:     nnz = ai[r[i] + 1] - ai[r[i]];
1715:     PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1716:     cols   = aj + ai[r[i]];
1717:     lnk[i] = -1; /* marker to indicate if diagonal exists */
1718:     PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1719:     nzi += nlnk;

1721:     /* make sure diagonal entry is included */
1722:     if (diagonal_fill && lnk[i] == -1) {
1723:       fm = n;
1724:       while (lnk[fm] < i) fm = lnk[fm];
1725:       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1726:       lnk[fm]    = i;
1727:       lnk_lvl[i] = 0;
1728:       nzi++;
1729:       dcount++;
1730:     }

1732:     /* add pivot rows into the active row */
1733:     nzbd = 0;
1734:     prow = lnk[n];
1735:     while (prow < i) {
1736:       nnz      = bdiag[prow];
1737:       cols     = bj_ptr[prow] + nnz + 1;
1738:       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1739:       nnz      = bi[prow + 1] - bi[prow] - nnz - 1;
1740:       PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1741:       nzi += nlnk;
1742:       prow = lnk[prow];
1743:       nzbd++;
1744:     }
1745:     bdiag[i]  = nzbd;
1746:     bi[i + 1] = bi[i] + nzi;
1747:     /* if free space is not available, make more free space */
1748:     if (current_space->local_remaining < nzi) {
1749:       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */
1750:       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
1751:       PetscCall(PetscFreeSpaceGet(nnz, &current_space_lvl));
1752:       reallocs++;
1753:     }

1755:     /* copy data into free_space and free_space_lvl, then initialize lnk */
1756:     PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
1757:     bj_ptr[i]    = current_space->array;
1758:     bjlvl_ptr[i] = current_space_lvl->array;

1760:     /* make sure the active row i has diagonal entry */
1761:     PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);

1763:     current_space->array += nzi;
1764:     current_space->local_used += nzi;
1765:     current_space->local_remaining -= nzi;
1766:     current_space_lvl->array += nzi;
1767:     current_space_lvl->local_used += nzi;
1768:     current_space_lvl->local_remaining -= nzi;
1769:   }

1771:   PetscCall(ISRestoreIndices(isrow, &r));
1772:   PetscCall(ISRestoreIndices(isicol, &ic));
1773:   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1774:   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
1775:   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));

1777:   PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
1778:   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
1779:   PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));

1781: #if defined(PETSC_USE_INFO)
1782:   {
1783:     PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1784:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
1785:     PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
1786:     PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
1787:     PetscCall(PetscInfo(A, "for best performance.\n"));
1788:     if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
1789:   }
1790: #endif
1791:   /* put together the new matrix */
1792:   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
1793:   b = (Mat_SeqAIJ *)(fact)->data;

1795:   b->free_a       = PETSC_TRUE;
1796:   b->free_ij      = PETSC_TRUE;
1797:   b->singlemalloc = PETSC_FALSE;

1799:   PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a));

1801:   b->j    = bj;
1802:   b->i    = bi;
1803:   b->diag = bdiag;
1804:   b->ilen = NULL;
1805:   b->imax = NULL;
1806:   b->row  = isrow;
1807:   b->col  = iscol;
1808:   PetscCall(PetscObjectReference((PetscObject)isrow));
1809:   PetscCall(PetscObjectReference((PetscObject)iscol));
1810:   b->icol = isicol;

1812:   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
1813:   /* In b structure:  Free imax, ilen, old a, old j.
1814:      Allocate bdiag, solve_work, new a, new j */
1815:   b->maxnz = b->nz = bdiag[0] + 1;

1817:   (fact)->info.factor_mallocs    = reallocs;
1818:   (fact)->info.fill_ratio_given  = f;
1819:   (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1820:   (fact)->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1821:   if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1822:   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1823:   PetscFunctionReturn(PETSC_SUCCESS);
1824: }

1826: #if 0
1827: // unused
1828: static PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1829: {
1830:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
1831:   IS                 isicol;
1832:   const PetscInt    *r, *ic;
1833:   PetscInt           n = A->rmap->n, *ai = a->i, *aj = a->j;
1834:   PetscInt          *bi, *cols, nnz, *cols_lvl;
1835:   PetscInt          *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1836:   PetscInt           i, levels, diagonal_fill;
1837:   PetscBool          col_identity, row_identity;
1838:   PetscReal          f;
1839:   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
1840:   PetscBT            lnkbt;
1841:   PetscInt           nzi, *bj, **bj_ptr, **bjlvl_ptr;
1842:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
1843:   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1844:   PetscBool          missing;

1846:   PetscFunctionBegin;
1847:   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
1848:   PetscCall(MatMissingDiagonal(A, &missing, &i));
1849:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);

1851:   f             = info->fill;
1852:   levels        = (PetscInt)info->levels;
1853:   diagonal_fill = (PetscInt)info->diagonal_fill;

1855:   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));

1857:   PetscCall(ISIdentity(isrow, &row_identity));
1858:   PetscCall(ISIdentity(iscol, &col_identity));
1859:   if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1860:     PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_TRUE));

1862:     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
1863:     if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1864:     fact->factortype               = MAT_FACTOR_ILU;
1865:     (fact)->info.factor_mallocs    = 0;
1866:     (fact)->info.fill_ratio_given  = info->fill;
1867:     (fact)->info.fill_ratio_needed = 1.0;

1869:     b       = (Mat_SeqAIJ *)(fact)->data;
1870:     b->row  = isrow;
1871:     b->col  = iscol;
1872:     b->icol = isicol;
1873:     PetscCall(PetscMalloc1((fact)->rmap->n + 1, &b->solve_work));
1874:     PetscCall(PetscObjectReference((PetscObject)isrow));
1875:     PetscCall(PetscObjectReference((PetscObject)iscol));
1876:     PetscFunctionReturn(PETSC_SUCCESS);
1877:   }

1879:   PetscCall(ISGetIndices(isrow, &r));
1880:   PetscCall(ISGetIndices(isicol, &ic));

1882:   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1883:   PetscCall(PetscMalloc1(n + 1, &bi));
1884:   PetscCall(PetscMalloc1(n + 1, &bdiag));
1885:   bi[0] = bdiag[0] = 0;

1887:   PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));

1889:   /* create a linked list for storing column indices of the active row */
1890:   nlnk = n + 1;
1891:   PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));

1893:   /* initial FreeSpace size is f*(ai[n]+1) */
1894:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1895:   current_space = free_space;
1896:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1897:   current_space_lvl = free_space_lvl;

1899:   for (i = 0; i < n; i++) {
1900:     nzi = 0;
1901:     /* copy current row into linked list */
1902:     nnz = ai[r[i] + 1] - ai[r[i]];
1903:     PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1904:     cols   = aj + ai[r[i]];
1905:     lnk[i] = -1; /* marker to indicate if diagonal exists */
1906:     PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1907:     nzi += nlnk;

1909:     /* make sure diagonal entry is included */
1910:     if (diagonal_fill && lnk[i] == -1) {
1911:       fm = n;
1912:       while (lnk[fm] < i) fm = lnk[fm];
1913:       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1914:       lnk[fm]    = i;
1915:       lnk_lvl[i] = 0;
1916:       nzi++;
1917:       dcount++;
1918:     }

1920:     /* add pivot rows into the active row */
1921:     nzbd = 0;
1922:     prow = lnk[n];
1923:     while (prow < i) {
1924:       nnz      = bdiag[prow];
1925:       cols     = bj_ptr[prow] + nnz + 1;
1926:       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1927:       nnz      = bi[prow + 1] - bi[prow] - nnz - 1;
1928:       PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1929:       nzi += nlnk;
1930:       prow = lnk[prow];
1931:       nzbd++;
1932:     }
1933:     bdiag[i]  = nzbd;
1934:     bi[i + 1] = bi[i] + nzi;

1936:     /* if free space is not available, make more free space */
1937:     if (current_space->local_remaining < nzi) {
1938:       nnz = PetscIntMultTruncate(nzi, n - i); /* estimated and max additional space needed */
1939:       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
1940:       PetscCall(PetscFreeSpaceGet(nnz, &current_space_lvl));
1941:       reallocs++;
1942:     }

1944:     /* copy data into free_space and free_space_lvl, then initialize lnk */
1945:     PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
1946:     bj_ptr[i]    = current_space->array;
1947:     bjlvl_ptr[i] = current_space_lvl->array;

1949:     /* make sure the active row i has diagonal entry */
1950:     PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);

1952:     current_space->array += nzi;
1953:     current_space->local_used += nzi;
1954:     current_space->local_remaining -= nzi;
1955:     current_space_lvl->array += nzi;
1956:     current_space_lvl->local_used += nzi;
1957:     current_space_lvl->local_remaining -= nzi;
1958:   }

1960:   PetscCall(ISRestoreIndices(isrow, &r));
1961:   PetscCall(ISRestoreIndices(isicol, &ic));

1963:   /* destroy list of free space and other temporary arrays */
1964:   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
1965:   PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); /* copy free_space -> bj */
1966:   PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
1967:   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
1968:   PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));

1970:   #if defined(PETSC_USE_INFO)
1971:   {
1972:     PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
1973:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
1974:     PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
1975:     PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
1976:     PetscCall(PetscInfo(A, "for best performance.\n"));
1977:     if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
1978:   }
1979:   #endif

1981:   /* put together the new matrix */
1982:   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
1983:   b = (Mat_SeqAIJ *)(fact)->data;

1985:   b->free_a       = PETSC_TRUE;
1986:   b->free_ij      = PETSC_TRUE;
1987:   b->singlemalloc = PETSC_FALSE;

1989:   PetscCall(PetscMalloc1(bi[n], &b->a));
1990:   b->j = bj;
1991:   b->i = bi;
1992:   for (i = 0; i < n; i++) bdiag[i] += bi[i];
1993:   b->diag = bdiag;
1994:   b->ilen = NULL;
1995:   b->imax = NULL;
1996:   b->row  = isrow;
1997:   b->col  = iscol;
1998:   PetscCall(PetscObjectReference((PetscObject)isrow));
1999:   PetscCall(PetscObjectReference((PetscObject)iscol));
2000:   b->icol = isicol;
2001:   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
2002:   /* In b structure:  Free imax, ilen, old a, old j.
2003:      Allocate bdiag, solve_work, new a, new j */
2004:   b->maxnz = b->nz = bi[n];

2006:   (fact)->info.factor_mallocs    = reallocs;
2007:   (fact)->info.fill_ratio_given  = f;
2008:   (fact)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
2009:   (fact)->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ_inplace;
2010:   if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2011:   PetscFunctionReturn(PETSC_SUCCESS);
2012: }
2013: #endif

2015: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
2016: {
2017:   Mat             C  = B;
2018:   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *)A->data;
2019:   Mat_SeqSBAIJ   *b  = (Mat_SeqSBAIJ *)C->data;
2020:   IS              ip = b->row, iip = b->icol;
2021:   const PetscInt *rip, *riip;
2022:   PetscInt        i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
2023:   PetscInt       *ai = a->i, *aj = a->j;
2024:   PetscInt        k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
2025:   MatScalar      *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
2026:   PetscBool       perm_identity;
2027:   FactorShiftCtx  sctx;
2028:   PetscReal       rs;
2029:   MatScalar       d, *v;

2031:   PetscFunctionBegin;
2032:   /* MatPivotSetUp(): initialize shift context sctx */
2033:   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));

2035:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2036:     sctx.shift_top = info->zeropivot;
2037:     for (i = 0; i < mbs; i++) {
2038:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2039:       d  = (aa)[a->diag[i]];
2040:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2041:       v  = aa + ai[i];
2042:       nz = ai[i + 1] - ai[i];
2043:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
2044:       if (rs > sctx.shift_top) sctx.shift_top = rs;
2045:     }
2046:     sctx.shift_top *= 1.1;
2047:     sctx.nshift_max = 5;
2048:     sctx.shift_lo   = 0.;
2049:     sctx.shift_hi   = 1.;
2050:   }

2052:   PetscCall(ISGetIndices(ip, &rip));
2053:   PetscCall(ISGetIndices(iip, &riip));

2055:   /* allocate working arrays
2056:      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2057:      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2058:   */
2059:   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r));

2061:   do {
2062:     sctx.newshift = PETSC_FALSE;

2064:     for (i = 0; i < mbs; i++) c2r[i] = mbs;
2065:     if (mbs) il[0] = 0;

2067:     for (k = 0; k < mbs; k++) {
2068:       /* zero rtmp */
2069:       nz    = bi[k + 1] - bi[k];
2070:       bjtmp = bj + bi[k];
2071:       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;

2073:       /* load in initial unfactored row */
2074:       bval = ba + bi[k];
2075:       jmin = ai[rip[k]];
2076:       jmax = ai[rip[k] + 1];
2077:       for (j = jmin; j < jmax; j++) {
2078:         col = riip[aj[j]];
2079:         if (col >= k) { /* only take upper triangular entry */
2080:           rtmp[col] = aa[j];
2081:           *bval++   = 0.0; /* for in-place factorization */
2082:         }
2083:       }
2084:       /* shift the diagonal of the matrix: ZeropivotApply() */
2085:       rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */

2087:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2088:       dk = rtmp[k];
2089:       i  = c2r[k]; /* first row to be added to k_th row  */

2091:       while (i < k) {
2092:         nexti = c2r[i]; /* next row to be added to k_th row */

2094:         /* compute multiplier, update diag(k) and U(i,k) */
2095:         ili   = il[i];                   /* index of first nonzero element in U(i,k:bms-1) */
2096:         uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
2097:         dk += uikdi * ba[ili];           /* update diag[k] */
2098:         ba[ili] = uikdi;                 /* -U(i,k) */

2100:         /* add multiple of row i to k-th row */
2101:         jmin = ili + 1;
2102:         jmax = bi[i + 1];
2103:         if (jmin < jmax) {
2104:           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
2105:           /* update il and c2r for row i */
2106:           il[i]  = jmin;
2107:           j      = bj[jmin];
2108:           c2r[i] = c2r[j];
2109:           c2r[j] = i;
2110:         }
2111:         i = nexti;
2112:       }

2114:       /* copy data into U(k,:) */
2115:       rs   = 0.0;
2116:       jmin = bi[k];
2117:       jmax = bi[k + 1] - 1;
2118:       if (jmin < jmax) {
2119:         for (j = jmin; j < jmax; j++) {
2120:           col   = bj[j];
2121:           ba[j] = rtmp[col];
2122:           rs += PetscAbsScalar(ba[j]);
2123:         }
2124:         /* add the k-th row into il and c2r */
2125:         il[k]  = jmin;
2126:         i      = bj[jmin];
2127:         c2r[k] = c2r[i];
2128:         c2r[i] = k;
2129:       }

2131:       /* MatPivotCheck() */
2132:       sctx.rs = rs;
2133:       sctx.pv = dk;
2134:       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
2135:       if (sctx.newshift) break;
2136:       dk = sctx.pv;

2138:       ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
2139:     }
2140:   } while (sctx.newshift);

2142:   PetscCall(PetscFree3(rtmp, il, c2r));
2143:   PetscCall(ISRestoreIndices(ip, &rip));
2144:   PetscCall(ISRestoreIndices(iip, &riip));

2146:   PetscCall(ISIdentity(ip, &perm_identity));
2147:   if (perm_identity) {
2148:     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2149:     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2150:     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2151:     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2152:   } else {
2153:     B->ops->solve          = MatSolve_SeqSBAIJ_1;
2154:     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
2155:     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1;
2156:     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1;
2157:   }

2159:   C->assembled    = PETSC_TRUE;
2160:   C->preallocated = PETSC_TRUE;

2162:   PetscCall(PetscLogFlops(C->rmap->n));

2164:   /* MatPivotView() */
2165:   if (sctx.nshift) {
2166:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2167:       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
2168:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2169:       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2170:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
2171:       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
2172:     }
2173:   }
2174:   PetscFunctionReturn(PETSC_SUCCESS);
2175: }

2177: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
2178: {
2179:   Mat             C  = B;
2180:   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *)A->data;
2181:   Mat_SeqSBAIJ   *b  = (Mat_SeqSBAIJ *)C->data;
2182:   IS              ip = b->row, iip = b->icol;
2183:   const PetscInt *rip, *riip;
2184:   PetscInt        i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp;
2185:   PetscInt       *ai = a->i, *aj = a->j;
2186:   PetscInt        k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
2187:   MatScalar      *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
2188:   PetscBool       perm_identity;
2189:   FactorShiftCtx  sctx;
2190:   PetscReal       rs;
2191:   MatScalar       d, *v;

2193:   PetscFunctionBegin;
2194:   /* MatPivotSetUp(): initialize shift context sctx */
2195:   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));

2197:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2198:     sctx.shift_top = info->zeropivot;
2199:     for (i = 0; i < mbs; i++) {
2200:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2201:       d  = (aa)[a->diag[i]];
2202:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2203:       v  = aa + ai[i];
2204:       nz = ai[i + 1] - ai[i];
2205:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
2206:       if (rs > sctx.shift_top) sctx.shift_top = rs;
2207:     }
2208:     sctx.shift_top *= 1.1;
2209:     sctx.nshift_max = 5;
2210:     sctx.shift_lo   = 0.;
2211:     sctx.shift_hi   = 1.;
2212:   }

2214:   PetscCall(ISGetIndices(ip, &rip));
2215:   PetscCall(ISGetIndices(iip, &riip));

2217:   /* initialization */
2218:   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));

2220:   do {
2221:     sctx.newshift = PETSC_FALSE;

2223:     for (i = 0; i < mbs; i++) jl[i] = mbs;
2224:     il[0] = 0;

2226:     for (k = 0; k < mbs; k++) {
2227:       /* zero rtmp */
2228:       nz    = bi[k + 1] - bi[k];
2229:       bjtmp = bj + bi[k];
2230:       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;

2232:       bval = ba + bi[k];
2233:       /* initialize k-th row by the perm[k]-th row of A */
2234:       jmin = ai[rip[k]];
2235:       jmax = ai[rip[k] + 1];
2236:       for (j = jmin; j < jmax; j++) {
2237:         col = riip[aj[j]];
2238:         if (col >= k) { /* only take upper triangular entry */
2239:           rtmp[col] = aa[j];
2240:           *bval++   = 0.0; /* for in-place factorization */
2241:         }
2242:       }
2243:       /* shift the diagonal of the matrix */
2244:       if (sctx.nshift) rtmp[k] += sctx.shift_amount;

2246:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2247:       dk = rtmp[k];
2248:       i  = jl[k]; /* first row to be added to k_th row  */

2250:       while (i < k) {
2251:         nexti = jl[i]; /* next row to be added to k_th row */

2253:         /* compute multiplier, update diag(k) and U(i,k) */
2254:         ili   = il[i];                /* index of first nonzero element in U(i,k:bms-1) */
2255:         uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
2256:         dk += uikdi * ba[ili];
2257:         ba[ili] = uikdi; /* -U(i,k) */

2259:         /* add multiple of row i to k-th row */
2260:         jmin = ili + 1;
2261:         jmax = bi[i + 1];
2262:         if (jmin < jmax) {
2263:           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
2264:           /* update il and jl for row i */
2265:           il[i] = jmin;
2266:           j     = bj[jmin];
2267:           jl[i] = jl[j];
2268:           jl[j] = i;
2269:         }
2270:         i = nexti;
2271:       }

2273:       /* shift the diagonals when zero pivot is detected */
2274:       /* compute rs=sum of abs(off-diagonal) */
2275:       rs   = 0.0;
2276:       jmin = bi[k] + 1;
2277:       nz   = bi[k + 1] - jmin;
2278:       bcol = bj + jmin;
2279:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(rtmp[bcol[j]]);

2281:       sctx.rs = rs;
2282:       sctx.pv = dk;
2283:       PetscCall(MatPivotCheck(B, A, info, &sctx, k));
2284:       if (sctx.newshift) break;
2285:       dk = sctx.pv;

2287:       /* copy data into U(k,:) */
2288:       ba[bi[k]] = 1.0 / dk; /* U(k,k) */
2289:       jmin      = bi[k] + 1;
2290:       jmax      = bi[k + 1];
2291:       if (jmin < jmax) {
2292:         for (j = jmin; j < jmax; j++) {
2293:           col   = bj[j];
2294:           ba[j] = rtmp[col];
2295:         }
2296:         /* add the k-th row into il and jl */
2297:         il[k] = jmin;
2298:         i     = bj[jmin];
2299:         jl[k] = jl[i];
2300:         jl[i] = k;
2301:       }
2302:     }
2303:   } while (sctx.newshift);

2305:   PetscCall(PetscFree3(rtmp, il, jl));
2306:   PetscCall(ISRestoreIndices(ip, &rip));
2307:   PetscCall(ISRestoreIndices(iip, &riip));

2309:   PetscCall(ISIdentity(ip, &perm_identity));
2310:   if (perm_identity) {
2311:     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2312:     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2313:     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2314:     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2315:   } else {
2316:     B->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
2317:     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2318:     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
2319:     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
2320:   }

2322:   C->assembled    = PETSC_TRUE;
2323:   C->preallocated = PETSC_TRUE;

2325:   PetscCall(PetscLogFlops(C->rmap->n));
2326:   if (sctx.nshift) {
2327:     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2328:       PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2329:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2330:       PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2331:     }
2332:   }
2333:   PetscFunctionReturn(PETSC_SUCCESS);
2334: }

2336: /*
2337:    icc() under revised new data structure.
2338:    Factored arrays bj and ba are stored as
2339:      U(0,:),...,U(i,:),U(n-1,:)

2341:    ui=fact->i is an array of size n+1, in which
2342:    ui+
2343:      ui[i]:  points to 1st entry of U(i,:),i=0,...,n-1
2344:      ui[n]:  points to U(n-1,n-1)+1

2346:   udiag=fact->diag is an array of size n,in which
2347:      udiag[i]: points to diagonal of U(i,:), i=0,...,n-1

2349:    U(i,:) contains udiag[i] as its last entry, i.e.,
2350:     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2351: */

2353: PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2354: {
2355:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2356:   Mat_SeqSBAIJ      *b;
2357:   PetscBool          perm_identity, missing;
2358:   PetscInt           reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2359:   const PetscInt    *rip, *riip;
2360:   PetscInt           jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2361:   PetscInt           nlnk, *lnk, *lnk_lvl = NULL, d;
2362:   PetscInt           ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2363:   PetscReal          fill = info->fill, levels = info->levels;
2364:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2365:   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2366:   PetscBT            lnkbt;
2367:   IS                 iperm;

2369:   PetscFunctionBegin;
2370:   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2371:   PetscCall(MatMissingDiagonal(A, &missing, &d));
2372:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
2373:   PetscCall(ISIdentity(perm, &perm_identity));
2374:   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));

2376:   PetscCall(PetscMalloc1(am + 1, &ui));
2377:   PetscCall(PetscMalloc1(am + 1, &udiag));
2378:   ui[0] = 0;

2380:   /* ICC(0) without matrix ordering: simply rearrange column indices */
2381:   if (!levels && perm_identity) {
2382:     for (i = 0; i < am; i++) {
2383:       ncols     = ai[i + 1] - a->diag[i];
2384:       ui[i + 1] = ui[i] + ncols;
2385:       udiag[i]  = ui[i + 1] - 1; /* points to the last entry of U(i,:) */
2386:     }
2387:     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2388:     cols = uj;
2389:     for (i = 0; i < am; i++) {
2390:       aj    = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2391:       ncols = ai[i + 1] - a->diag[i] - 1;
2392:       for (j = 0; j < ncols; j++) *cols++ = aj[j];
2393:       *cols++ = i; /* diagonal is located as the last entry of U(i,:) */
2394:     }
2395:   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2396:     PetscCall(ISGetIndices(iperm, &riip));
2397:     PetscCall(ISGetIndices(perm, &rip));

2399:     /* initialization */
2400:     PetscCall(PetscMalloc1(am + 1, &ajtmp));

2402:     /* jl: linked list for storing indices of the pivot rows
2403:        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2404:     PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
2405:     for (i = 0; i < am; i++) {
2406:       jl[i] = am;
2407:       il[i] = 0;
2408:     }

2410:     /* create and initialize a linked list for storing column indices of the active row k */
2411:     nlnk = am + 1;
2412:     PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));

2414:     /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2415:     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2416:     current_space = free_space;
2417:     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl));
2418:     current_space_lvl = free_space_lvl;

2420:     for (k = 0; k < am; k++) { /* for each active row k */
2421:       /* initialize lnk by the column indices of row rip[k] of A */
2422:       nzk   = 0;
2423:       ncols = ai[rip[k] + 1] - ai[rip[k]];
2424:       PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2425:       ncols_upper = 0;
2426:       for (j = 0; j < ncols; j++) {
2427:         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2428:         if (riip[i] >= k) {         /* only take upper triangular entry */
2429:           ajtmp[ncols_upper] = i;
2430:           ncols_upper++;
2431:         }
2432:       }
2433:       PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
2434:       nzk += nlnk;

2436:       /* update lnk by computing fill-in for each pivot row to be merged in */
2437:       prow = jl[k]; /* 1st pivot row */

2439:       while (prow < k) {
2440:         nextprow = jl[prow];

2442:         /* merge prow into k-th row */
2443:         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2444:         jmax  = ui[prow + 1];
2445:         ncols = jmax - jmin;
2446:         i     = jmin - ui[prow];
2447:         cols  = uj_ptr[prow] + i;     /* points to the 2nd nzero entry in U(prow,k:am-1) */
2448:         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2449:         j     = *(uj - 1);
2450:         PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
2451:         nzk += nlnk;

2453:         /* update il and jl for prow */
2454:         if (jmin < jmax) {
2455:           il[prow] = jmin;
2456:           j        = *cols;
2457:           jl[prow] = jl[j];
2458:           jl[j]    = prow;
2459:         }
2460:         prow = nextprow;
2461:       }

2463:       /* if free space is not available, make more free space */
2464:       if (current_space->local_remaining < nzk) {
2465:         i = am - k + 1;                                    /* num of unfactored rows */
2466:         i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2467:         PetscCall(PetscFreeSpaceGet(i, &current_space));
2468:         PetscCall(PetscFreeSpaceGet(i, &current_space_lvl));
2469:         reallocs++;
2470:       }

2472:       /* copy data into free_space and free_space_lvl, then initialize lnk */
2473:       PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
2474:       PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));

2476:       /* add the k-th row into il and jl */
2477:       if (nzk > 1) {
2478:         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2479:         jl[k] = jl[i];
2480:         jl[i] = k;
2481:         il[k] = ui[k] + 1;
2482:       }
2483:       uj_ptr[k]     = current_space->array;
2484:       uj_lvl_ptr[k] = current_space_lvl->array;

2486:       current_space->array += nzk;
2487:       current_space->local_used += nzk;
2488:       current_space->local_remaining -= nzk;

2490:       current_space_lvl->array += nzk;
2491:       current_space_lvl->local_used += nzk;
2492:       current_space_lvl->local_remaining -= nzk;

2494:       ui[k + 1] = ui[k] + nzk;
2495:     }

2497:     PetscCall(ISRestoreIndices(perm, &rip));
2498:     PetscCall(ISRestoreIndices(iperm, &riip));
2499:     PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
2500:     PetscCall(PetscFree(ajtmp));

2502:     /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2503:     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2504:     PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor  */
2505:     PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2506:     PetscCall(PetscFreeSpaceDestroy(free_space_lvl));

2508:   } /* end of case: levels>0 || (levels=0 && !perm_identity) */

2510:   /* put together the new matrix in MATSEQSBAIJ format */
2511:   b               = (Mat_SeqSBAIJ *)(fact)->data;
2512:   b->singlemalloc = PETSC_FALSE;

2514:   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));

2516:   b->j         = uj;
2517:   b->i         = ui;
2518:   b->diag      = udiag;
2519:   b->free_diag = PETSC_TRUE;
2520:   b->ilen      = NULL;
2521:   b->imax      = NULL;
2522:   b->row       = perm;
2523:   b->col       = perm;
2524:   PetscCall(PetscObjectReference((PetscObject)perm));
2525:   PetscCall(PetscObjectReference((PetscObject)perm));
2526:   b->icol          = iperm;
2527:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */

2529:   PetscCall(PetscMalloc1(am + 1, &b->solve_work));

2531:   b->maxnz = b->nz = ui[am];
2532:   b->free_a        = PETSC_TRUE;
2533:   b->free_ij       = PETSC_TRUE;

2535:   fact->info.factor_mallocs   = reallocs;
2536:   fact->info.fill_ratio_given = fill;
2537:   if (ai[am] != 0) {
2538:     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2539:     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2540:   } else {
2541:     fact->info.fill_ratio_needed = 0.0;
2542:   }
2543: #if defined(PETSC_USE_INFO)
2544:   if (ai[am] != 0) {
2545:     PetscReal af = fact->info.fill_ratio_needed;
2546:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2547:     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2548:     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2549:   } else {
2550:     PetscCall(PetscInfo(A, "Empty matrix\n"));
2551:   }
2552: #endif
2553:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2554:   PetscFunctionReturn(PETSC_SUCCESS);
2555: }

2557: #if 0
2558: // unused
2559: static PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2560: {
2561:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2562:   Mat_SeqSBAIJ      *b;
2563:   PetscBool          perm_identity, missing;
2564:   PetscInt           reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2565:   const PetscInt    *rip, *riip;
2566:   PetscInt           jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2567:   PetscInt           nlnk, *lnk, *lnk_lvl = NULL, d;
2568:   PetscInt           ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2569:   PetscReal          fill = info->fill, levels = info->levels;
2570:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2571:   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2572:   PetscBT            lnkbt;
2573:   IS                 iperm;

2575:   PetscFunctionBegin;
2576:   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2577:   PetscCall(MatMissingDiagonal(A, &missing, &d));
2578:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
2579:   PetscCall(ISIdentity(perm, &perm_identity));
2580:   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));

2582:   PetscCall(PetscMalloc1(am + 1, &ui));
2583:   PetscCall(PetscMalloc1(am + 1, &udiag));
2584:   ui[0] = 0;

2586:   /* ICC(0) without matrix ordering: simply copies fill pattern */
2587:   if (!levels && perm_identity) {
2588:     for (i = 0; i < am; i++) {
2589:       ui[i + 1] = ui[i] + ai[i + 1] - a->diag[i];
2590:       udiag[i]  = ui[i];
2591:     }
2592:     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2593:     cols = uj;
2594:     for (i = 0; i < am; i++) {
2595:       aj    = a->j + a->diag[i];
2596:       ncols = ui[i + 1] - ui[i];
2597:       for (j = 0; j < ncols; j++) *cols++ = *aj++;
2598:     }
2599:   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2600:     PetscCall(ISGetIndices(iperm, &riip));
2601:     PetscCall(ISGetIndices(perm, &rip));

2603:     /* initialization */
2604:     PetscCall(PetscMalloc1(am + 1, &ajtmp));

2606:     /* jl: linked list for storing indices of the pivot rows
2607:        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2608:     PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
2609:     for (i = 0; i < am; i++) {
2610:       jl[i] = am;
2611:       il[i] = 0;
2612:     }

2614:     /* create and initialize a linked list for storing column indices of the active row k */
2615:     nlnk = am + 1;
2616:     PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));

2618:     /* initial FreeSpace size is fill*(ai[am]+1) */
2619:     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space));
2620:     current_space = free_space;
2621:     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space_lvl));
2622:     current_space_lvl = free_space_lvl;

2624:     for (k = 0; k < am; k++) { /* for each active row k */
2625:       /* initialize lnk by the column indices of row rip[k] of A */
2626:       nzk   = 0;
2627:       ncols = ai[rip[k] + 1] - ai[rip[k]];
2628:       PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2629:       ncols_upper = 0;
2630:       for (j = 0; j < ncols; j++) {
2631:         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2632:         if (riip[i] >= k) {         /* only take upper triangular entry */
2633:           ajtmp[ncols_upper] = i;
2634:           ncols_upper++;
2635:         }
2636:       }
2637:       PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
2638:       nzk += nlnk;

2640:       /* update lnk by computing fill-in for each pivot row to be merged in */
2641:       prow = jl[k]; /* 1st pivot row */

2643:       while (prow < k) {
2644:         nextprow = jl[prow];

2646:         /* merge prow into k-th row */
2647:         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2648:         jmax  = ui[prow + 1];
2649:         ncols = jmax - jmin;
2650:         i     = jmin - ui[prow];
2651:         cols  = uj_ptr[prow] + i;     /* points to the 2nd nzero entry in U(prow,k:am-1) */
2652:         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2653:         j     = *(uj - 1);
2654:         PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
2655:         nzk += nlnk;

2657:         /* update il and jl for prow */
2658:         if (jmin < jmax) {
2659:           il[prow] = jmin;
2660:           j        = *cols;
2661:           jl[prow] = jl[j];
2662:           jl[j]    = prow;
2663:         }
2664:         prow = nextprow;
2665:       }

2667:       /* if free space is not available, make more free space */
2668:       if (current_space->local_remaining < nzk) {
2669:         i = am - k + 1;                                    /* num of unfactored rows */
2670:         i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2671:         PetscCall(PetscFreeSpaceGet(i, &current_space));
2672:         PetscCall(PetscFreeSpaceGet(i, &current_space_lvl));
2673:         reallocs++;
2674:       }

2676:       /* copy data into free_space and free_space_lvl, then initialize lnk */
2677:       PetscCheck(nzk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
2678:       PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));

2680:       /* add the k-th row into il and jl */
2681:       if (nzk > 1) {
2682:         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2683:         jl[k] = jl[i];
2684:         jl[i] = k;
2685:         il[k] = ui[k] + 1;
2686:       }
2687:       uj_ptr[k]     = current_space->array;
2688:       uj_lvl_ptr[k] = current_space_lvl->array;

2690:       current_space->array += nzk;
2691:       current_space->local_used += nzk;
2692:       current_space->local_remaining -= nzk;

2694:       current_space_lvl->array += nzk;
2695:       current_space_lvl->local_used += nzk;
2696:       current_space_lvl->local_remaining -= nzk;

2698:       ui[k + 1] = ui[k] + nzk;
2699:     }

2701:   #if defined(PETSC_USE_INFO)
2702:     if (ai[am] != 0) {
2703:       PetscReal af = (PetscReal)ui[am] / ((PetscReal)ai[am]);
2704:       PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2705:       PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2706:       PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2707:     } else {
2708:       PetscCall(PetscInfo(A, "Empty matrix\n"));
2709:     }
2710:   #endif

2712:     PetscCall(ISRestoreIndices(perm, &rip));
2713:     PetscCall(ISRestoreIndices(iperm, &riip));
2714:     PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
2715:     PetscCall(PetscFree(ajtmp));

2717:     /* destroy list of free space and other temporary array(s) */
2718:     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2719:     PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
2720:     PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2721:     PetscCall(PetscFreeSpaceDestroy(free_space_lvl));

2723:   } /* end of case: levels>0 || (levels=0 && !perm_identity) */

2725:   /* put together the new matrix in MATSEQSBAIJ format */

2727:   b               = (Mat_SeqSBAIJ *)fact->data;
2728:   b->singlemalloc = PETSC_FALSE;

2730:   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));

2732:   b->j         = uj;
2733:   b->i         = ui;
2734:   b->diag      = udiag;
2735:   b->free_diag = PETSC_TRUE;
2736:   b->ilen      = NULL;
2737:   b->imax      = NULL;
2738:   b->row       = perm;
2739:   b->col       = perm;

2741:   PetscCall(PetscObjectReference((PetscObject)perm));
2742:   PetscCall(PetscObjectReference((PetscObject)perm));

2744:   b->icol          = iperm;
2745:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2746:   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
2747:   b->maxnz = b->nz = ui[am];
2748:   b->free_a        = PETSC_TRUE;
2749:   b->free_ij       = PETSC_TRUE;

2751:   fact->info.factor_mallocs   = reallocs;
2752:   fact->info.fill_ratio_given = fill;
2753:   if (ai[am] != 0) {
2754:     fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]);
2755:   } else {
2756:     fact->info.fill_ratio_needed = 0.0;
2757:   }
2758:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2759:   PetscFunctionReturn(PETSC_SUCCESS);
2760: }
2761: #endif

2763: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2764: {
2765:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2766:   Mat_SeqSBAIJ      *b;
2767:   PetscBool          perm_identity, missing;
2768:   PetscReal          fill = info->fill;
2769:   const PetscInt    *rip, *riip;
2770:   PetscInt           i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
2771:   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
2772:   PetscInt           nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
2773:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2774:   PetscBT            lnkbt;
2775:   IS                 iperm;

2777:   PetscFunctionBegin;
2778:   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2779:   PetscCall(MatMissingDiagonal(A, &missing, &i));
2780:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);

2782:   /* check whether perm is the identity mapping */
2783:   PetscCall(ISIdentity(perm, &perm_identity));
2784:   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2785:   PetscCall(ISGetIndices(iperm, &riip));
2786:   PetscCall(ISGetIndices(perm, &rip));

2788:   /* initialization */
2789:   PetscCall(PetscMalloc1(am + 1, &ui));
2790:   PetscCall(PetscMalloc1(am + 1, &udiag));
2791:   ui[0] = 0;

2793:   /* jl: linked list for storing indices of the pivot rows
2794:      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2795:   PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
2796:   for (i = 0; i < am; i++) {
2797:     jl[i] = am;
2798:     il[i] = 0;
2799:   }

2801:   /* create and initialize a linked list for storing column indices of the active row k */
2802:   nlnk = am + 1;
2803:   PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));

2805:   /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2806:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2807:   current_space = free_space;

2809:   for (k = 0; k < am; k++) { /* for each active row k */
2810:     /* initialize lnk by the column indices of row rip[k] of A */
2811:     nzk   = 0;
2812:     ncols = ai[rip[k] + 1] - ai[rip[k]];
2813:     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2814:     ncols_upper = 0;
2815:     for (j = 0; j < ncols; j++) {
2816:       i = riip[*(aj + ai[rip[k]] + j)];
2817:       if (i >= k) { /* only take upper triangular entry */
2818:         cols[ncols_upper] = i;
2819:         ncols_upper++;
2820:       }
2821:     }
2822:     PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
2823:     nzk += nlnk;

2825:     /* update lnk by computing fill-in for each pivot row to be merged in */
2826:     prow = jl[k]; /* 1st pivot row */

2828:     while (prow < k) {
2829:       nextprow = jl[prow];
2830:       /* merge prow into k-th row */
2831:       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2832:       jmax   = ui[prow + 1];
2833:       ncols  = jmax - jmin;
2834:       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2835:       PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
2836:       nzk += nlnk;

2838:       /* update il and jl for prow */
2839:       if (jmin < jmax) {
2840:         il[prow] = jmin;
2841:         j        = *uj_ptr;
2842:         jl[prow] = jl[j];
2843:         jl[j]    = prow;
2844:       }
2845:       prow = nextprow;
2846:     }

2848:     /* if free space is not available, make more free space */
2849:     if (current_space->local_remaining < nzk) {
2850:       i = am - k + 1;                                    /* num of unfactored rows */
2851:       i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2852:       PetscCall(PetscFreeSpaceGet(i, &current_space));
2853:       reallocs++;
2854:     }

2856:     /* copy data into free space, then initialize lnk */
2857:     PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));

2859:     /* add the k-th row into il and jl */
2860:     if (nzk > 1) {
2861:       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2862:       jl[k] = jl[i];
2863:       jl[i] = k;
2864:       il[k] = ui[k] + 1;
2865:     }
2866:     ui_ptr[k] = current_space->array;

2868:     current_space->array += nzk;
2869:     current_space->local_used += nzk;
2870:     current_space->local_remaining -= nzk;

2872:     ui[k + 1] = ui[k] + nzk;
2873:   }

2875:   PetscCall(ISRestoreIndices(perm, &rip));
2876:   PetscCall(ISRestoreIndices(iperm, &riip));
2877:   PetscCall(PetscFree4(ui_ptr, jl, il, cols));

2879:   /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2880:   PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2881:   PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */
2882:   PetscCall(PetscLLDestroy(lnk, lnkbt));

2884:   /* put together the new matrix in MATSEQSBAIJ format */

2886:   b               = (Mat_SeqSBAIJ *)fact->data;
2887:   b->singlemalloc = PETSC_FALSE;
2888:   b->free_a       = PETSC_TRUE;
2889:   b->free_ij      = PETSC_TRUE;

2891:   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));

2893:   b->j         = uj;
2894:   b->i         = ui;
2895:   b->diag      = udiag;
2896:   b->free_diag = PETSC_TRUE;
2897:   b->ilen      = NULL;
2898:   b->imax      = NULL;
2899:   b->row       = perm;
2900:   b->col       = perm;

2902:   PetscCall(PetscObjectReference((PetscObject)perm));
2903:   PetscCall(PetscObjectReference((PetscObject)perm));

2905:   b->icol          = iperm;
2906:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */

2908:   PetscCall(PetscMalloc1(am + 1, &b->solve_work));

2910:   b->maxnz = b->nz = ui[am];

2912:   fact->info.factor_mallocs   = reallocs;
2913:   fact->info.fill_ratio_given = fill;
2914:   if (ai[am] != 0) {
2915:     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2916:     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2917:   } else {
2918:     fact->info.fill_ratio_needed = 0.0;
2919:   }
2920: #if defined(PETSC_USE_INFO)
2921:   if (ai[am] != 0) {
2922:     PetscReal af = fact->info.fill_ratio_needed;
2923:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2924:     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2925:     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2926:   } else {
2927:     PetscCall(PetscInfo(A, "Empty matrix\n"));
2928:   }
2929: #endif
2930:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2931:   PetscFunctionReturn(PETSC_SUCCESS);
2932: }

2934: #if 0
2935: // unused
2936: static PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2937: {
2938:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2939:   Mat_SeqSBAIJ      *b;
2940:   PetscBool          perm_identity, missing;
2941:   PetscReal          fill = info->fill;
2942:   const PetscInt    *rip, *riip;
2943:   PetscInt           i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
2944:   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
2945:   PetscInt           nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr;
2946:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2947:   PetscBT            lnkbt;
2948:   IS                 iperm;

2950:   PetscFunctionBegin;
2951:   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2952:   PetscCall(MatMissingDiagonal(A, &missing, &i));
2953:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);

2955:   /* check whether perm is the identity mapping */
2956:   PetscCall(ISIdentity(perm, &perm_identity));
2957:   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2958:   PetscCall(ISGetIndices(iperm, &riip));
2959:   PetscCall(ISGetIndices(perm, &rip));

2961:   /* initialization */
2962:   PetscCall(PetscMalloc1(am + 1, &ui));
2963:   ui[0] = 0;

2965:   /* jl: linked list for storing indices of the pivot rows
2966:      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2967:   PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
2968:   for (i = 0; i < am; i++) {
2969:     jl[i] = am;
2970:     il[i] = 0;
2971:   }

2973:   /* create and initialize a linked list for storing column indices of the active row k */
2974:   nlnk = am + 1;
2975:   PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));

2977:   /* initial FreeSpace size is fill*(ai[am]+1) */
2978:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space));
2979:   current_space = free_space;

2981:   for (k = 0; k < am; k++) { /* for each active row k */
2982:     /* initialize lnk by the column indices of row rip[k] of A */
2983:     nzk   = 0;
2984:     ncols = ai[rip[k] + 1] - ai[rip[k]];
2985:     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2986:     ncols_upper = 0;
2987:     for (j = 0; j < ncols; j++) {
2988:       i = riip[*(aj + ai[rip[k]] + j)];
2989:       if (i >= k) { /* only take upper triangular entry */
2990:         cols[ncols_upper] = i;
2991:         ncols_upper++;
2992:       }
2993:     }
2994:     PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
2995:     nzk += nlnk;

2997:     /* update lnk by computing fill-in for each pivot row to be merged in */
2998:     prow = jl[k]; /* 1st pivot row */

3000:     while (prow < k) {
3001:       nextprow = jl[prow];
3002:       /* merge prow into k-th row */
3003:       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
3004:       jmax   = ui[prow + 1];
3005:       ncols  = jmax - jmin;
3006:       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3007:       PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
3008:       nzk += nlnk;

3010:       /* update il and jl for prow */
3011:       if (jmin < jmax) {
3012:         il[prow] = jmin;
3013:         j        = *uj_ptr;
3014:         jl[prow] = jl[j];
3015:         jl[j]    = prow;
3016:       }
3017:       prow = nextprow;
3018:     }

3020:     /* if free space is not available, make more free space */
3021:     if (current_space->local_remaining < nzk) {
3022:       i = am - k + 1;                     /* num of unfactored rows */
3023:       i = PetscMin(i * nzk, i * (i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3024:       PetscCall(PetscFreeSpaceGet(i, &current_space));
3025:       reallocs++;
3026:     }

3028:     /* copy data into free space, then initialize lnk */
3029:     PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));

3031:     /* add the k-th row into il and jl */
3032:     if (nzk - 1 > 0) {
3033:       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3034:       jl[k] = jl[i];
3035:       jl[i] = k;
3036:       il[k] = ui[k] + 1;
3037:     }
3038:     ui_ptr[k] = current_space->array;

3040:     current_space->array += nzk;
3041:     current_space->local_used += nzk;
3042:     current_space->local_remaining -= nzk;

3044:     ui[k + 1] = ui[k] + nzk;
3045:   }

3047:   #if defined(PETSC_USE_INFO)
3048:   if (ai[am] != 0) {
3049:     PetscReal af = (PetscReal)(ui[am]) / ((PetscReal)ai[am]);
3050:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
3051:     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
3052:     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
3053:   } else {
3054:     PetscCall(PetscInfo(A, "Empty matrix\n"));
3055:   }
3056:   #endif

3058:   PetscCall(ISRestoreIndices(perm, &rip));
3059:   PetscCall(ISRestoreIndices(iperm, &riip));
3060:   PetscCall(PetscFree4(ui_ptr, jl, il, cols));

3062:   /* destroy list of free space and other temporary array(s) */
3063:   PetscCall(PetscMalloc1(ui[am] + 1, &uj));
3064:   PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
3065:   PetscCall(PetscLLDestroy(lnk, lnkbt));

3067:   /* put together the new matrix in MATSEQSBAIJ format */

3069:   b               = (Mat_SeqSBAIJ *)fact->data;
3070:   b->singlemalloc = PETSC_FALSE;
3071:   b->free_a       = PETSC_TRUE;
3072:   b->free_ij      = PETSC_TRUE;

3074:   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));

3076:   b->j    = uj;
3077:   b->i    = ui;
3078:   b->diag = NULL;
3079:   b->ilen = NULL;
3080:   b->imax = NULL;
3081:   b->row  = perm;
3082:   b->col  = perm;

3084:   PetscCall(PetscObjectReference((PetscObject)perm));
3085:   PetscCall(PetscObjectReference((PetscObject)perm));

3087:   b->icol          = iperm;
3088:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */

3090:   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
3091:   b->maxnz = b->nz = ui[am];

3093:   fact->info.factor_mallocs   = reallocs;
3094:   fact->info.fill_ratio_given = fill;
3095:   if (ai[am] != 0) {
3096:     fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]);
3097:   } else {
3098:     fact->info.fill_ratio_needed = 0.0;
3099:   }
3100:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3101:   PetscFunctionReturn(PETSC_SUCCESS);
3102: }
3103: #endif

3105: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx)
3106: {
3107:   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
3108:   PetscInt           n  = A->rmap->n;
3109:   const PetscInt    *ai = a->i, *aj = a->j, *adiag = a->diag, *vi;
3110:   PetscScalar       *x, sum;
3111:   const PetscScalar *b;
3112:   const MatScalar   *aa = a->a, *v;
3113:   PetscInt           i, nz;

3115:   PetscFunctionBegin;
3116:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);

3118:   PetscCall(VecGetArrayRead(bb, &b));
3119:   PetscCall(VecGetArrayWrite(xx, &x));

3121:   /* forward solve the lower triangular */
3122:   x[0] = b[0];
3123:   v    = aa;
3124:   vi   = aj;
3125:   for (i = 1; i < n; i++) {
3126:     nz  = ai[i + 1] - ai[i];
3127:     sum = b[i];
3128:     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
3129:     v += nz;
3130:     vi += nz;
3131:     x[i] = sum;
3132:   }

3134:   /* backward solve the upper triangular */
3135:   for (i = n - 1; i >= 0; i--) {
3136:     v   = aa + adiag[i + 1] + 1;
3137:     vi  = aj + adiag[i + 1] + 1;
3138:     nz  = adiag[i] - adiag[i + 1] - 1;
3139:     sum = x[i];
3140:     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
3141:     x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3142:   }

3144:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
3145:   PetscCall(VecRestoreArrayRead(bb, &b));
3146:   PetscCall(VecRestoreArrayWrite(xx, &x));
3147:   PetscFunctionReturn(PETSC_SUCCESS);
3148: }

3150: PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx)
3151: {
3152:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
3153:   IS                 iscol = a->col, isrow = a->row;
3154:   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz;
3155:   const PetscInt    *rout, *cout, *r, *c;
3156:   PetscScalar       *x, *tmp, sum;
3157:   const PetscScalar *b;
3158:   const MatScalar   *aa = a->a, *v;

3160:   PetscFunctionBegin;
3161:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);

3163:   PetscCall(VecGetArrayRead(bb, &b));
3164:   PetscCall(VecGetArrayWrite(xx, &x));
3165:   tmp = a->solve_work;

3167:   PetscCall(ISGetIndices(isrow, &rout));
3168:   r = rout;
3169:   PetscCall(ISGetIndices(iscol, &cout));
3170:   c = cout;

3172:   /* forward solve the lower triangular */
3173:   tmp[0] = b[r[0]];
3174:   v      = aa;
3175:   vi     = aj;
3176:   for (i = 1; i < n; i++) {
3177:     nz  = ai[i + 1] - ai[i];
3178:     sum = b[r[i]];
3179:     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3180:     tmp[i] = sum;
3181:     v += nz;
3182:     vi += nz;
3183:   }

3185:   /* backward solve the upper triangular */
3186:   for (i = n - 1; i >= 0; i--) {
3187:     v   = aa + adiag[i + 1] + 1;
3188:     vi  = aj + adiag[i + 1] + 1;
3189:     nz  = adiag[i] - adiag[i + 1] - 1;
3190:     sum = tmp[i];
3191:     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3192:     x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
3193:   }

3195:   PetscCall(ISRestoreIndices(isrow, &rout));
3196:   PetscCall(ISRestoreIndices(iscol, &cout));
3197:   PetscCall(VecRestoreArrayRead(bb, &b));
3198:   PetscCall(VecRestoreArrayWrite(xx, &x));
3199:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
3200:   PetscFunctionReturn(PETSC_SUCCESS);
3201: }

3203: #if 0
3204: // unused
3205: /*
3206:     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3207: */
3208: static PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A, IS isrow, IS iscol, const MatFactorInfo *info, Mat *fact)
3209: {
3210:   Mat             B = *fact;
3211:   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)A->data, *b;
3212:   IS              isicol;
3213:   const PetscInt *r, *ic;
3214:   PetscInt        i, n = A->rmap->n, *ai = a->i, *aj = a->j, *ajtmp, *adiag;
3215:   PetscInt       *bi, *bj, *bdiag, *bdiag_rev;
3216:   PetscInt        row, nzi, nzi_bl, nzi_bu, *im, nzi_al, nzi_au;
3217:   PetscInt        nlnk, *lnk;
3218:   PetscBT         lnkbt;
3219:   PetscBool       row_identity, icol_identity;
3220:   MatScalar      *aatmp, *pv, *batmp, *ba, *rtmp, *pc, multiplier, *vtmp, diag_tmp;
3221:   const PetscInt *ics;
3222:   PetscInt        j, nz, *pj, *bjtmp, k, ncut, *jtmp;
3223:   PetscReal       dt = info->dt, shift = info->shiftamount;
3224:   PetscInt        dtcount = (PetscInt)info->dtcount, nnz_max;
3225:   PetscBool       missing;

3227:   PetscFunctionBegin;
3228:   if (dt == (PetscReal)PETSC_DEFAULT) dt = 0.005;
3229:   if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5 * a->rmax);

3231:   /* symbolic factorization, can be reused */
3232:   PetscCall(MatMissingDiagonal(A, &missing, &i));
3233:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
3234:   adiag = a->diag;

3236:   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));

3238:   /* bdiag is location of diagonal in factor */
3239:   PetscCall(PetscMalloc1(n + 1, &bdiag));     /* becomes b->diag */
3240:   PetscCall(PetscMalloc1(n + 1, &bdiag_rev)); /* temporary */

3242:   /* allocate row pointers bi */
3243:   PetscCall(PetscMalloc1(2 * n + 2, &bi));

3245:   /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3246:   if (dtcount > n - 1) dtcount = n - 1; /* diagonal is excluded */
3247:   nnz_max = ai[n] + 2 * n * dtcount + 2;

3249:   PetscCall(PetscMalloc1(nnz_max + 1, &bj));
3250:   PetscCall(PetscMalloc1(nnz_max + 1, &ba));

3252:   /* put together the new matrix */
3253:   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
3254:   b = (Mat_SeqAIJ *)B->data;

3256:   b->free_a       = PETSC_TRUE;
3257:   b->free_ij      = PETSC_TRUE;
3258:   b->singlemalloc = PETSC_FALSE;

3260:   b->a    = ba;
3261:   b->j    = bj;
3262:   b->i    = bi;
3263:   b->diag = bdiag;
3264:   b->ilen = NULL;
3265:   b->imax = NULL;
3266:   b->row  = isrow;
3267:   b->col  = iscol;
3268:   PetscCall(PetscObjectReference((PetscObject)isrow));
3269:   PetscCall(PetscObjectReference((PetscObject)iscol));
3270:   b->icol = isicol;

3272:   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
3273:   b->maxnz = nnz_max;

3275:   B->factortype            = MAT_FACTOR_ILUDT;
3276:   B->info.factor_mallocs   = 0;
3277:   B->info.fill_ratio_given = ((PetscReal)nnz_max) / ((PetscReal)ai[n]);
3278:   /*  end of symbolic factorization */

3280:   PetscCall(ISGetIndices(isrow, &r));
3281:   PetscCall(ISGetIndices(isicol, &ic));
3282:   ics = ic;

3284:   /* linked list for storing column indices of the active row */
3285:   nlnk = n + 1;
3286:   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));

3288:   /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3289:   PetscCall(PetscMalloc2(n, &im, n, &jtmp));
3290:   /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3291:   PetscCall(PetscMalloc2(n, &rtmp, n, &vtmp));
3292:   PetscCall(PetscArrayzero(rtmp, n));

3294:   bi[0]         = 0;
3295:   bdiag[0]      = nnz_max - 1; /* location of diag[0] in factor B */
3296:   bdiag_rev[n]  = bdiag[0];
3297:   bi[2 * n + 1] = bdiag[0] + 1; /* endof bj and ba array */
3298:   for (i = 0; i < n; i++) {
3299:     /* copy initial fill into linked list */
3300:     nzi = ai[r[i] + 1] - ai[r[i]];
3301:     PetscCheck(nzi, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
3302:     nzi_al = adiag[r[i]] - ai[r[i]];
3303:     nzi_au = ai[r[i] + 1] - adiag[r[i]] - 1;
3304:     ajtmp  = aj + ai[r[i]];
3305:     PetscCall(PetscLLAddPerm(nzi, ajtmp, ic, n, &nlnk, lnk, lnkbt));

3307:     /* load in initial (unfactored row) */
3308:     aatmp = a->a + ai[r[i]];
3309:     for (j = 0; j < nzi; j++) rtmp[ics[*ajtmp++]] = *aatmp++;

3311:     /* add pivot rows into linked list */
3312:     row = lnk[n];
3313:     while (row < i) {
3314:       nzi_bl = bi[row + 1] - bi[row] + 1;
3315:       bjtmp  = bj + bdiag[row + 1] + 1; /* points to 1st column next to the diagonal in U */
3316:       PetscCall(PetscLLAddSortedLU(bjtmp, row, &nlnk, lnk, lnkbt, i, nzi_bl, im));
3317:       nzi += nlnk;
3318:       row = lnk[row];
3319:     }

3321:     /* copy data from lnk into jtmp, then initialize lnk */
3322:     PetscCall(PetscLLClean(n, n, nzi, lnk, jtmp, lnkbt));

3324:     /* numerical factorization */
3325:     bjtmp = jtmp;
3326:     row   = *bjtmp++; /* 1st pivot row */
3327:     while (row < i) {
3328:       pc         = rtmp + row;
3329:       pv         = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3330:       multiplier = (*pc) * (*pv);
3331:       *pc        = multiplier;
3332:       if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
3333:         pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
3334:         pv = ba + bdiag[row + 1] + 1;
3335:         nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
3336:         for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3337:         PetscCall(PetscLogFlops(1 + 2.0 * nz));
3338:       }
3339:       row = *bjtmp++;
3340:     }

3342:     /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3343:     diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */
3344:     nzi_bl   = 0;
3345:     j        = 0;
3346:     while (jtmp[j] < i) { /* Note: jtmp is sorted */
3347:       vtmp[j]       = rtmp[jtmp[j]];
3348:       rtmp[jtmp[j]] = 0.0;
3349:       nzi_bl++;
3350:       j++;
3351:     }
3352:     nzi_bu = nzi - nzi_bl - 1;
3353:     while (j < nzi) {
3354:       vtmp[j]       = rtmp[jtmp[j]];
3355:       rtmp[jtmp[j]] = 0.0;
3356:       j++;
3357:     }

3359:     bjtmp = bj + bi[i];
3360:     batmp = ba + bi[i];
3361:     /* apply level dropping rule to L part */
3362:     ncut = nzi_al + dtcount;
3363:     if (ncut < nzi_bl) {
3364:       PetscCall(PetscSortSplit(ncut, nzi_bl, vtmp, jtmp));
3365:       PetscCall(PetscSortIntWithScalarArray(ncut, jtmp, vtmp));
3366:     } else {
3367:       ncut = nzi_bl;
3368:     }
3369:     for (j = 0; j < ncut; j++) {
3370:       bjtmp[j] = jtmp[j];
3371:       batmp[j] = vtmp[j];
3372:     }
3373:     bi[i + 1] = bi[i] + ncut;
3374:     nzi       = ncut + 1;

3376:     /* apply level dropping rule to U part */
3377:     ncut = nzi_au + dtcount;
3378:     if (ncut < nzi_bu) {
3379:       PetscCall(PetscSortSplit(ncut, nzi_bu, vtmp + nzi_bl + 1, jtmp + nzi_bl + 1));
3380:       PetscCall(PetscSortIntWithScalarArray(ncut, jtmp + nzi_bl + 1, vtmp + nzi_bl + 1));
3381:     } else {
3382:       ncut = nzi_bu;
3383:     }
3384:     nzi += ncut;

3386:     /* mark bdiagonal */
3387:     bdiag[i + 1]         = bdiag[i] - (ncut + 1);
3388:     bdiag_rev[n - i - 1] = bdiag[i + 1];
3389:     bi[2 * n - i]        = bi[2 * n - i + 1] - (ncut + 1);
3390:     bjtmp                = bj + bdiag[i];
3391:     batmp                = ba + bdiag[i];
3392:     *bjtmp               = i;
3393:     *batmp               = diag_tmp; /* rtmp[i]; */
3394:     if (*batmp == 0.0) *batmp = dt + shift;
3395:     *batmp = 1.0 / (*batmp); /* invert diagonal entries for simpler triangular solves */

3397:     bjtmp = bj + bdiag[i + 1] + 1;
3398:     batmp = ba + bdiag[i + 1] + 1;
3399:     for (k = 0; k < ncut; k++) {
3400:       bjtmp[k] = jtmp[nzi_bl + 1 + k];
3401:       batmp[k] = vtmp[nzi_bl + 1 + k];
3402:     }

3404:     im[i] = nzi; /* used by PetscLLAddSortedLU() */
3405:   }              /* for (i=0; i<n; i++) */
3406:   PetscCheck(bi[n] < bdiag[n], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "end of L array %" PetscInt_FMT " cannot >= the beginning of U array %" PetscInt_FMT, bi[n], bdiag[n]);

3408:   PetscCall(ISRestoreIndices(isrow, &r));
3409:   PetscCall(ISRestoreIndices(isicol, &ic));

3411:   PetscCall(PetscLLDestroy(lnk, lnkbt));
3412:   PetscCall(PetscFree2(im, jtmp));
3413:   PetscCall(PetscFree2(rtmp, vtmp));
3414:   PetscCall(PetscFree(bdiag_rev));

3416:   PetscCall(PetscLogFlops(B->cmap->n));
3417:   b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];

3419:   PetscCall(ISIdentity(isrow, &row_identity));
3420:   PetscCall(ISIdentity(isicol, &icol_identity));
3421:   if (row_identity && icol_identity) {
3422:     B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3423:   } else {
3424:     B->ops->solve = MatSolve_SeqAIJ;
3425:   }

3427:   B->ops->solveadd          = NULL;
3428:   B->ops->solvetranspose    = NULL;
3429:   B->ops->solvetransposeadd = NULL;
3430:   B->ops->matsolve          = NULL;
3431:   B->assembled              = PETSC_TRUE;
3432:   B->preallocated           = PETSC_TRUE;
3433:   PetscFunctionReturn(PETSC_SUCCESS);
3434: }

3436: /* a wrapper of MatILUDTFactor_SeqAIJ() */
3437: /*
3438:     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3439: */

3441: static PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info)
3442: {
3443:   PetscFunctionBegin;
3444:   PetscCall(MatILUDTFactor_SeqAIJ(A, row, col, info, &fact));
3445:   PetscFunctionReturn(PETSC_SUCCESS);
3446: }

3448: /*
3449:    same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3450:    - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3451: */
3452: /*
3453:     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3454: */

3456: static PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact, Mat A, const MatFactorInfo *info)
3457: {
3458:   Mat             C = fact;
3459:   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
3460:   IS              isrow = b->row, isicol = b->icol;
3461:   const PetscInt *r, *ic, *ics;
3462:   PetscInt        i, j, k, n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
3463:   PetscInt       *ajtmp, *bjtmp, nz, nzl, nzu, row, *bdiag = b->diag, *pj;
3464:   MatScalar      *rtmp, *pc, multiplier, *v, *pv, *aa = a->a;
3465:   PetscReal       dt = info->dt, shift = info->shiftamount;
3466:   PetscBool       row_identity, col_identity;

3468:   PetscFunctionBegin;
3469:   PetscCall(ISGetIndices(isrow, &r));
3470:   PetscCall(ISGetIndices(isicol, &ic));
3471:   PetscCall(PetscMalloc1(n + 1, &rtmp));
3472:   ics = ic;

3474:   for (i = 0; i < n; i++) {
3475:     /* initialize rtmp array */
3476:     nzl   = bi[i + 1] - bi[i]; /* num of nonzeros in L(i,:) */
3477:     bjtmp = bj + bi[i];
3478:     for (j = 0; j < nzl; j++) rtmp[*bjtmp++] = 0.0;
3479:     rtmp[i] = 0.0;
3480:     nzu     = bdiag[i] - bdiag[i + 1]; /* num of nonzeros in U(i,:) */
3481:     bjtmp   = bj + bdiag[i + 1] + 1;
3482:     for (j = 0; j < nzu; j++) rtmp[*bjtmp++] = 0.0;

3484:     /* load in initial unfactored row of A */
3485:     nz    = ai[r[i] + 1] - ai[r[i]];
3486:     ajtmp = aj + ai[r[i]];
3487:     v     = aa + ai[r[i]];
3488:     for (j = 0; j < nz; j++) rtmp[ics[*ajtmp++]] = v[j];

3490:     /* numerical factorization */
3491:     bjtmp = bj + bi[i];        /* point to 1st entry of L(i,:) */
3492:     nzl   = bi[i + 1] - bi[i]; /* num of entries in L(i,:) */
3493:     k     = 0;
3494:     while (k < nzl) {
3495:       row        = *bjtmp++;
3496:       pc         = rtmp + row;
3497:       pv         = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3498:       multiplier = (*pc) * (*pv);
3499:       *pc        = multiplier;
3500:       if (PetscAbsScalar(multiplier) > dt) {
3501:         pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
3502:         pv = b->a + bdiag[row + 1] + 1;
3503:         nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
3504:         for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3505:         PetscCall(PetscLogFlops(1 + 2.0 * nz));
3506:       }
3507:       k++;
3508:     }

3510:     /* finished row so stick it into b->a */
3511:     /* L-part */
3512:     pv  = b->a + bi[i];
3513:     pj  = bj + bi[i];
3514:     nzl = bi[i + 1] - bi[i];
3515:     for (j = 0; j < nzl; j++) pv[j] = rtmp[pj[j]];

3517:     /* diagonal: invert diagonal entries for simpler triangular solves */
3518:     if (rtmp[i] == 0.0) rtmp[i] = dt + shift;
3519:     b->a[bdiag[i]] = 1.0 / rtmp[i];

3521:     /* U-part */
3522:     pv  = b->a + bdiag[i + 1] + 1;
3523:     pj  = bj + bdiag[i + 1] + 1;
3524:     nzu = bdiag[i] - bdiag[i + 1] - 1;
3525:     for (j = 0; j < nzu; j++) pv[j] = rtmp[pj[j]];
3526:   }

3528:   PetscCall(PetscFree(rtmp));
3529:   PetscCall(ISRestoreIndices(isicol, &ic));
3530:   PetscCall(ISRestoreIndices(isrow, &r));

3532:   PetscCall(ISIdentity(isrow, &row_identity));
3533:   PetscCall(ISIdentity(isicol, &col_identity));
3534:   if (row_identity && col_identity) {
3535:     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3536:   } else {
3537:     C->ops->solve = MatSolve_SeqAIJ;
3538:   }
3539:   C->ops->solveadd          = NULL;
3540:   C->ops->solvetranspose    = NULL;
3541:   C->ops->solvetransposeadd = NULL;
3542:   C->ops->matsolve          = NULL;
3543:   C->assembled              = PETSC_TRUE;
3544:   C->preallocated           = PETSC_TRUE;

3546:   PetscCall(PetscLogFlops(C->cmap->n));
3547:   PetscFunctionReturn(PETSC_SUCCESS);
3548: }
3549: #endif