Actual source code: baijfact3.c
1: /*
2: Factorization code for BAIJ format.
3: */
4: #include <../src/mat/impls/baij/seq/baij.h>
5: #include <petsc/private/kernels/blockinvert.h>
7: /*
8: This is used to set the numeric factorization for both LU and ILU symbolic factorization
9: */
10: PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat fact, PetscBool natural)
11: {
12: PetscFunctionBegin;
13: if (natural) {
14: switch (fact->rmap->bs) {
15: case 1:
16: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
17: break;
18: case 2:
19: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
20: break;
21: case 3:
22: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
23: break;
24: case 4:
25: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
26: break;
27: case 5:
28: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
29: break;
30: case 6:
31: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
32: break;
33: case 7:
34: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
35: break;
36: case 9:
37: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
38: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_9_NaturalOrdering;
39: #else
40: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
41: #endif
42: break;
43: case 15:
44: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering;
45: break;
46: default:
47: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
48: break;
49: }
50: } else {
51: switch (fact->rmap->bs) {
52: case 1:
53: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
54: break;
55: case 2:
56: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
57: break;
58: case 3:
59: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
60: break;
61: case 4:
62: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
63: break;
64: case 5:
65: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
66: break;
67: case 6:
68: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
69: break;
70: case 7:
71: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
72: break;
73: default:
74: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
75: break;
76: }
77: }
78: PetscFunctionReturn(PETSC_SUCCESS);
79: }
81: PetscErrorCode MatSeqBAIJSetNumericFactorization_inplace(Mat inA, PetscBool natural)
82: {
83: PetscFunctionBegin;
84: if (natural) {
85: switch (inA->rmap->bs) {
86: case 1:
87: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
88: break;
89: case 2:
90: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace;
91: break;
92: case 3:
93: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_inplace;
94: break;
95: case 4:
96: #if defined(PETSC_USE_REAL_MAT_SINGLE)
97: {
98: PetscBool sse_enabled_local;
99: PetscCall(PetscSSEIsEnabled(inA->comm, &sse_enabled_local, NULL));
100: if (sse_enabled_local) {
101: #if defined(PETSC_HAVE_SSE)
102: int i, *AJ = a->j, nz = a->nz, n = a->mbs;
103: if (n == (unsigned short)n) {
104: unsigned short *aj = (unsigned short *)AJ;
105: for (i = 0; i < nz; i++) aj[i] = (unsigned short)AJ[i];
107: inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj;
108: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj;
110: PetscCall(PetscInfo(inA, "Using special SSE, in-place natural ordering, ushort j index factor BS=4\n"));
111: } else {
112: /* Scale the column indices for easier indexing in MatSolve. */
113: /* for (i=0;i<nz;i++) { */
114: /* AJ[i] = AJ[i]*4; */
115: /* } */
116: inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE;
117: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE;
119: PetscCall(PetscInfo(inA, "Using special SSE, in-place natural ordering, int j index factor BS=4\n"));
120: }
121: #else
122: /* This should never be reached. If so, problem in PetscSSEIsEnabled. */
123: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "SSE Hardware unavailable");
124: #endif
125: } else {
126: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
127: }
128: }
129: #else
130: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
131: #endif
132: break;
133: case 5:
134: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_inplace;
135: break;
136: case 6:
137: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_inplace;
138: break;
139: case 7:
140: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_inplace;
141: break;
142: default:
143: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
144: break;
145: }
146: } else {
147: switch (inA->rmap->bs) {
148: case 1:
149: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
150: break;
151: case 2:
152: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_inplace;
153: break;
154: case 3:
155: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_inplace;
156: break;
157: case 4:
158: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_inplace;
159: break;
160: case 5:
161: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_inplace;
162: break;
163: case 6:
164: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_inplace;
165: break;
166: case 7:
167: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_inplace;
168: break;
169: default:
170: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
171: break;
172: }
173: }
174: PetscFunctionReturn(PETSC_SUCCESS);
175: }
177: /*
178: The symbolic factorization code is identical to that for AIJ format,
179: except for very small changes since this is now a SeqBAIJ datastructure.
180: NOT good code reuse.
181: */
182: #include <petscbt.h>
183: #include <../src/mat/utils/freespace.h>
185: PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
186: {
187: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b;
188: PetscInt n = a->mbs, bs = A->rmap->bs, bs2 = a->bs2;
189: PetscBool row_identity, col_identity, both_identity;
190: IS isicol;
191: const PetscInt *r, *ic;
192: PetscInt i, *ai = a->i, *aj = a->j;
193: PetscInt *bi, *bj, *ajtmp;
194: PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
195: PetscReal f;
196: PetscInt nlnk, *lnk, k, **bi_ptr;
197: PetscFreeSpaceList free_space = NULL, current_space = NULL;
198: PetscBT lnkbt;
199: PetscBool missing;
201: PetscFunctionBegin;
202: PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
203: PetscCall(MatMissingDiagonal(A, &missing, &i));
204: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
206: if (bs > 1) { /* check shifttype */
207: PetscCheck(info->shifttype != (PetscReal)MAT_SHIFT_NONZERO && info->shifttype != (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only MAT_SHIFT_NONE and MAT_SHIFT_INBLOCKS are supported for BAIJ matrix");
208: }
210: PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
211: PetscCall(ISGetIndices(isrow, &r));
212: PetscCall(ISGetIndices(isicol, &ic));
214: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
215: PetscCall(PetscMalloc1(n + 1, &bi));
216: PetscCall(PetscMalloc1(n + 1, &bdiag));
217: bi[0] = bdiag[0] = 0;
219: /* linked list for storing column indices of the active row */
220: nlnk = n + 1;
221: PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
223: PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
225: /* initial FreeSpace size is f*(ai[n]+1) */
226: f = info->fill;
227: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
229: current_space = free_space;
231: for (i = 0; i < n; i++) {
232: /* copy previous fill into linked list */
233: nzi = 0;
234: nnz = ai[r[i] + 1] - ai[r[i]];
235: ajtmp = aj + ai[r[i]];
236: PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
237: nzi += nlnk;
239: /* add pivot rows into linked list */
240: row = lnk[n];
241: while (row < i) {
242: nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */
243: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
244: PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
245: nzi += nlnk;
246: row = lnk[row];
247: }
248: bi[i + 1] = bi[i] + nzi;
249: im[i] = nzi;
251: /* mark bdiag */
252: nzbd = 0;
253: nnz = nzi;
254: k = lnk[n];
255: while (nnz-- && k < i) {
256: nzbd++;
257: k = lnk[k];
258: }
259: bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
261: /* if free space is not available, make more free space */
262: if (current_space->local_remaining < nzi) {
263: nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - i, nzi)); /* estimated and max additional space needed */
264: PetscCall(PetscFreeSpaceGet(nnz, ¤t_space));
265: reallocs++;
266: }
268: /* copy data into free space, then initialize lnk */
269: PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
271: bi_ptr[i] = current_space->array;
272: current_space->array += nzi;
273: current_space->local_used += nzi;
274: current_space->local_remaining -= nzi;
275: }
277: PetscCall(ISRestoreIndices(isrow, &r));
278: PetscCall(ISRestoreIndices(isicol, &ic));
280: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
281: PetscCall(PetscMalloc1(bi[n] + 1, &bj));
282: PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
283: PetscCall(PetscLLDestroy(lnk, lnkbt));
284: PetscCall(PetscFree2(bi_ptr, im));
286: /* put together the new matrix */
287: PetscCall(MatSeqBAIJSetPreallocation(B, bs, MAT_SKIP_ALLOCATION, NULL));
288: b = (Mat_SeqBAIJ *)(B)->data;
290: b->free_a = PETSC_TRUE;
291: b->free_ij = PETSC_TRUE;
292: b->singlemalloc = PETSC_FALSE;
294: PetscCall(PetscMalloc1((bdiag[0] + 1) * bs2, &b->a));
295: b->j = bj;
296: b->i = bi;
297: b->diag = bdiag;
298: b->free_diag = PETSC_TRUE;
299: b->ilen = NULL;
300: b->imax = NULL;
301: b->row = isrow;
302: b->col = iscol;
303: b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
305: PetscCall(PetscObjectReference((PetscObject)isrow));
306: PetscCall(PetscObjectReference((PetscObject)iscol));
307: b->icol = isicol;
308: PetscCall(PetscMalloc1(bs * n + bs, &b->solve_work));
310: b->maxnz = b->nz = bdiag[0] + 1;
312: B->factortype = MAT_FACTOR_LU;
313: B->info.factor_mallocs = reallocs;
314: B->info.fill_ratio_given = f;
316: if (ai[n] != 0) {
317: B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
318: } else {
319: B->info.fill_ratio_needed = 0.0;
320: }
321: #if defined(PETSC_USE_INFO)
322: if (ai[n] != 0) {
323: PetscReal af = B->info.fill_ratio_needed;
324: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
325: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
326: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
327: PetscCall(PetscInfo(A, "for best performance.\n"));
328: } else {
329: PetscCall(PetscInfo(A, "Empty matrix\n"));
330: }
331: #endif
333: PetscCall(ISIdentity(isrow, &row_identity));
334: PetscCall(ISIdentity(iscol, &col_identity));
336: both_identity = (PetscBool)(row_identity && col_identity);
338: PetscCall(MatSeqBAIJSetNumericFactorization(B, both_identity));
339: PetscFunctionReturn(PETSC_SUCCESS);
340: }
342: #if 0
343: // unused
344: static PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
345: {
346: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b;
347: PetscInt n = a->mbs, bs = A->rmap->bs, bs2 = a->bs2;
348: PetscBool row_identity, col_identity, both_identity;
349: IS isicol;
350: const PetscInt *r, *ic;
351: PetscInt i, *ai = a->i, *aj = a->j;
352: PetscInt *bi, *bj, *ajtmp;
353: PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
354: PetscReal f;
355: PetscInt nlnk, *lnk, k, **bi_ptr;
356: PetscFreeSpaceList free_space = NULL, current_space = NULL;
357: PetscBT lnkbt;
358: PetscBool missing;
360: PetscFunctionBegin;
361: PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
362: PetscCall(MatMissingDiagonal(A, &missing, &i));
363: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
365: PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
366: PetscCall(ISGetIndices(isrow, &r));
367: PetscCall(ISGetIndices(isicol, &ic));
369: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
370: PetscCall(PetscMalloc1(n + 1, &bi));
371: PetscCall(PetscMalloc1(n + 1, &bdiag));
373: bi[0] = bdiag[0] = 0;
375: /* linked list for storing column indices of the active row */
376: nlnk = n + 1;
377: PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
379: PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
381: /* initial FreeSpace size is f*(ai[n]+1) */
382: f = info->fill;
383: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
384: current_space = free_space;
386: for (i = 0; i < n; i++) {
387: /* copy previous fill into linked list */
388: nzi = 0;
389: nnz = ai[r[i] + 1] - ai[r[i]];
390: ajtmp = aj + ai[r[i]];
391: PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
392: nzi += nlnk;
394: /* add pivot rows into linked list */
395: row = lnk[n];
396: while (row < i) {
397: nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
398: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
399: PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
400: nzi += nlnk;
401: row = lnk[row];
402: }
403: bi[i + 1] = bi[i] + nzi;
404: im[i] = nzi;
406: /* mark bdiag */
407: nzbd = 0;
408: nnz = nzi;
409: k = lnk[n];
410: while (nnz-- && k < i) {
411: nzbd++;
412: k = lnk[k];
413: }
414: bdiag[i] = bi[i] + nzbd;
416: /* if free space is not available, make more free space */
417: if (current_space->local_remaining < nzi) {
418: nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */
419: PetscCall(PetscFreeSpaceGet(nnz, ¤t_space));
420: reallocs++;
421: }
423: /* copy data into free space, then initialize lnk */
424: PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
426: bi_ptr[i] = current_space->array;
427: current_space->array += nzi;
428: current_space->local_used += nzi;
429: current_space->local_remaining -= nzi;
430: }
431: #if defined(PETSC_USE_INFO)
432: if (ai[n] != 0) {
433: PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
434: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
435: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
436: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
437: PetscCall(PetscInfo(A, "for best performance.\n"));
438: } else {
439: PetscCall(PetscInfo(A, "Empty matrix\n"));
440: }
441: #endif
443: PetscCall(ISRestoreIndices(isrow, &r));
444: PetscCall(ISRestoreIndices(isicol, &ic));
446: /* destroy list of free space and other temporary array(s) */
447: PetscCall(PetscMalloc1(bi[n] + 1, &bj));
448: PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
449: PetscCall(PetscLLDestroy(lnk, lnkbt));
450: PetscCall(PetscFree2(bi_ptr, im));
452: /* put together the new matrix */
453: PetscCall(MatSeqBAIJSetPreallocation(B, bs, MAT_SKIP_ALLOCATION, NULL));
454: b = (Mat_SeqBAIJ *)(B)->data;
456: b->free_a = PETSC_TRUE;
457: b->free_ij = PETSC_TRUE;
458: b->singlemalloc = PETSC_FALSE;
460: PetscCall(PetscMalloc1((bi[n] + 1) * bs2, &b->a));
461: b->j = bj;
462: b->i = bi;
463: b->diag = bdiag;
464: b->free_diag = PETSC_TRUE;
465: b->ilen = NULL;
466: b->imax = NULL;
467: b->row = isrow;
468: b->col = iscol;
469: b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
471: PetscCall(PetscObjectReference((PetscObject)isrow));
472: PetscCall(PetscObjectReference((PetscObject)iscol));
473: b->icol = isicol;
475: PetscCall(PetscMalloc1(bs * n + bs, &b->solve_work));
477: b->maxnz = b->nz = bi[n];
479: (B)->factortype = MAT_FACTOR_LU;
480: (B)->info.factor_mallocs = reallocs;
481: (B)->info.fill_ratio_given = f;
483: if (ai[n] != 0) {
484: (B)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
485: } else {
486: (B)->info.fill_ratio_needed = 0.0;
487: }
489: PetscCall(ISIdentity(isrow, &row_identity));
490: PetscCall(ISIdentity(iscol, &col_identity));
492: both_identity = (PetscBool)(row_identity && col_identity);
494: PetscCall(MatSeqBAIJSetNumericFactorization_inplace(B, both_identity));
495: PetscFunctionReturn(PETSC_SUCCESS);
496: }
497: #endif