Actual source code: pbjacobi.c
1: #include <../src/ksp/pc/impls/pbjacobi/pbjacobi.h>
3: static PetscErrorCode PCApply_PBJacobi(PC pc, Vec x, Vec y)
4: {
5: PC_PBJacobi *jac = (PC_PBJacobi *)pc->data;
6: PetscInt i, ib, jb;
7: const PetscInt m = jac->mbs;
8: const PetscInt bs = jac->bs;
9: const MatScalar *diag = jac->diag;
10: PetscScalar *yy, x0, x1, x2, x3, x4, x5, x6;
11: const PetscScalar *xx;
13: PetscFunctionBegin;
14: PetscCall(VecGetArrayRead(x, &xx));
15: PetscCall(VecGetArray(y, &yy));
16: switch (bs) {
17: case 1:
18: for (i = 0; i < m; i++) yy[i] = diag[i] * xx[i];
19: break;
20: case 2:
21: for (i = 0; i < m; i++) {
22: x0 = xx[2 * i];
23: x1 = xx[2 * i + 1];
24: yy[2 * i] = diag[0] * x0 + diag[2] * x1;
25: yy[2 * i + 1] = diag[1] * x0 + diag[3] * x1;
26: diag += 4;
27: }
28: break;
29: case 3:
30: for (i = 0; i < m; i++) {
31: x0 = xx[3 * i];
32: x1 = xx[3 * i + 1];
33: x2 = xx[3 * i + 2];
35: yy[3 * i] = diag[0] * x0 + diag[3] * x1 + diag[6] * x2;
36: yy[3 * i + 1] = diag[1] * x0 + diag[4] * x1 + diag[7] * x2;
37: yy[3 * i + 2] = diag[2] * x0 + diag[5] * x1 + diag[8] * x2;
38: diag += 9;
39: }
40: break;
41: case 4:
42: for (i = 0; i < m; i++) {
43: x0 = xx[4 * i];
44: x1 = xx[4 * i + 1];
45: x2 = xx[4 * i + 2];
46: x3 = xx[4 * i + 3];
48: yy[4 * i] = diag[0] * x0 + diag[4] * x1 + diag[8] * x2 + diag[12] * x3;
49: yy[4 * i + 1] = diag[1] * x0 + diag[5] * x1 + diag[9] * x2 + diag[13] * x3;
50: yy[4 * i + 2] = diag[2] * x0 + diag[6] * x1 + diag[10] * x2 + diag[14] * x3;
51: yy[4 * i + 3] = diag[3] * x0 + diag[7] * x1 + diag[11] * x2 + diag[15] * x3;
52: diag += 16;
53: }
54: break;
55: case 5:
56: for (i = 0; i < m; i++) {
57: x0 = xx[5 * i];
58: x1 = xx[5 * i + 1];
59: x2 = xx[5 * i + 2];
60: x3 = xx[5 * i + 3];
61: x4 = xx[5 * i + 4];
63: yy[5 * i] = diag[0] * x0 + diag[5] * x1 + diag[10] * x2 + diag[15] * x3 + diag[20] * x4;
64: yy[5 * i + 1] = diag[1] * x0 + diag[6] * x1 + diag[11] * x2 + diag[16] * x3 + diag[21] * x4;
65: yy[5 * i + 2] = diag[2] * x0 + diag[7] * x1 + diag[12] * x2 + diag[17] * x3 + diag[22] * x4;
66: yy[5 * i + 3] = diag[3] * x0 + diag[8] * x1 + diag[13] * x2 + diag[18] * x3 + diag[23] * x4;
67: yy[5 * i + 4] = diag[4] * x0 + diag[9] * x1 + diag[14] * x2 + diag[19] * x3 + diag[24] * x4;
68: diag += 25;
69: }
70: break;
71: case 6:
72: for (i = 0; i < m; i++) {
73: x0 = xx[6 * i];
74: x1 = xx[6 * i + 1];
75: x2 = xx[6 * i + 2];
76: x3 = xx[6 * i + 3];
77: x4 = xx[6 * i + 4];
78: x5 = xx[6 * i + 5];
80: yy[6 * i] = diag[0] * x0 + diag[6] * x1 + diag[12] * x2 + diag[18] * x3 + diag[24] * x4 + diag[30] * x5;
81: yy[6 * i + 1] = diag[1] * x0 + diag[7] * x1 + diag[13] * x2 + diag[19] * x3 + diag[25] * x4 + diag[31] * x5;
82: yy[6 * i + 2] = diag[2] * x0 + diag[8] * x1 + diag[14] * x2 + diag[20] * x3 + diag[26] * x4 + diag[32] * x5;
83: yy[6 * i + 3] = diag[3] * x0 + diag[9] * x1 + diag[15] * x2 + diag[21] * x3 + diag[27] * x4 + diag[33] * x5;
84: yy[6 * i + 4] = diag[4] * x0 + diag[10] * x1 + diag[16] * x2 + diag[22] * x3 + diag[28] * x4 + diag[34] * x5;
85: yy[6 * i + 5] = diag[5] * x0 + diag[11] * x1 + diag[17] * x2 + diag[23] * x3 + diag[29] * x4 + diag[35] * x5;
86: diag += 36;
87: }
88: break;
89: case 7:
90: for (i = 0; i < m; i++) {
91: x0 = xx[7 * i];
92: x1 = xx[7 * i + 1];
93: x2 = xx[7 * i + 2];
94: x3 = xx[7 * i + 3];
95: x4 = xx[7 * i + 4];
96: x5 = xx[7 * i + 5];
97: x6 = xx[7 * i + 6];
99: yy[7 * i] = diag[0] * x0 + diag[7] * x1 + diag[14] * x2 + diag[21] * x3 + diag[28] * x4 + diag[35] * x5 + diag[42] * x6;
100: yy[7 * i + 1] = diag[1] * x0 + diag[8] * x1 + diag[15] * x2 + diag[22] * x3 + diag[29] * x4 + diag[36] * x5 + diag[43] * x6;
101: yy[7 * i + 2] = diag[2] * x0 + diag[9] * x1 + diag[16] * x2 + diag[23] * x3 + diag[30] * x4 + diag[37] * x5 + diag[44] * x6;
102: yy[7 * i + 3] = diag[3] * x0 + diag[10] * x1 + diag[17] * x2 + diag[24] * x3 + diag[31] * x4 + diag[38] * x5 + diag[45] * x6;
103: yy[7 * i + 4] = diag[4] * x0 + diag[11] * x1 + diag[18] * x2 + diag[25] * x3 + diag[32] * x4 + diag[39] * x5 + diag[46] * x6;
104: yy[7 * i + 5] = diag[5] * x0 + diag[12] * x1 + diag[19] * x2 + diag[26] * x3 + diag[33] * x4 + diag[40] * x5 + diag[47] * x6;
105: yy[7 * i + 6] = diag[6] * x0 + diag[13] * x1 + diag[20] * x2 + diag[27] * x3 + diag[34] * x4 + diag[41] * x5 + diag[48] * x6;
106: diag += 49;
107: }
108: break;
109: default:
110: for (i = 0; i < m; i++) {
111: for (ib = 0; ib < bs; ib++) {
112: PetscScalar rowsum = 0;
113: for (jb = 0; jb < bs; jb++) rowsum += diag[ib + jb * bs] * xx[bs * i + jb];
114: yy[bs * i + ib] = rowsum;
115: }
116: diag += bs * bs;
117: }
118: }
119: PetscCall(VecRestoreArrayRead(x, &xx));
120: PetscCall(VecRestoreArray(y, &yy));
121: PetscCall(PetscLogFlops((2.0 * bs * bs - bs) * m)); /* 2*bs2 - bs */
122: PetscFunctionReturn(PETSC_SUCCESS);
123: }
125: static PetscErrorCode PCApplyTranspose_PBJacobi(PC pc, Vec x, Vec y)
126: {
127: PC_PBJacobi *jac = (PC_PBJacobi *)pc->data;
128: PetscInt i, ib, jb;
129: const PetscInt m = jac->mbs;
130: const PetscInt bs = jac->bs;
131: const MatScalar *diag = jac->diag;
132: PetscScalar *yy, x0, x1, x2, x3, x4, x5, x6;
133: const PetscScalar *xx;
135: PetscFunctionBegin;
136: PetscCall(VecGetArrayRead(x, &xx));
137: PetscCall(VecGetArray(y, &yy));
138: switch (bs) {
139: case 1:
140: for (i = 0; i < m; i++) yy[i] = diag[i] * xx[i];
141: break;
142: case 2:
143: for (i = 0; i < m; i++) {
144: x0 = xx[2 * i];
145: x1 = xx[2 * i + 1];
146: yy[2 * i] = diag[0] * x0 + diag[1] * x1;
147: yy[2 * i + 1] = diag[2] * x0 + diag[3] * x1;
148: diag += 4;
149: }
150: break;
151: case 3:
152: for (i = 0; i < m; i++) {
153: x0 = xx[3 * i];
154: x1 = xx[3 * i + 1];
155: x2 = xx[3 * i + 2];
157: yy[3 * i] = diag[0] * x0 + diag[1] * x1 + diag[2] * x2;
158: yy[3 * i + 1] = diag[3] * x0 + diag[4] * x1 + diag[5] * x2;
159: yy[3 * i + 2] = diag[6] * x0 + diag[7] * x1 + diag[8] * x2;
160: diag += 9;
161: }
162: break;
163: case 4:
164: for (i = 0; i < m; i++) {
165: x0 = xx[4 * i];
166: x1 = xx[4 * i + 1];
167: x2 = xx[4 * i + 2];
168: x3 = xx[4 * i + 3];
170: yy[4 * i] = diag[0] * x0 + diag[1] * x1 + diag[2] * x2 + diag[3] * x3;
171: yy[4 * i + 1] = diag[4] * x0 + diag[5] * x1 + diag[6] * x2 + diag[7] * x3;
172: yy[4 * i + 2] = diag[8] * x0 + diag[9] * x1 + diag[10] * x2 + diag[11] * x3;
173: yy[4 * i + 3] = diag[12] * x0 + diag[13] * x1 + diag[14] * x2 + diag[15] * x3;
174: diag += 16;
175: }
176: break;
177: case 5:
178: for (i = 0; i < m; i++) {
179: x0 = xx[5 * i];
180: x1 = xx[5 * i + 1];
181: x2 = xx[5 * i + 2];
182: x3 = xx[5 * i + 3];
183: x4 = xx[5 * i + 4];
185: yy[5 * i] = diag[0] * x0 + diag[1] * x1 + diag[2] * x2 + diag[3] * x3 + diag[4] * x4;
186: yy[5 * i + 1] = diag[5] * x0 + diag[6] * x1 + diag[7] * x2 + diag[8] * x3 + diag[9] * x4;
187: yy[5 * i + 2] = diag[10] * x0 + diag[11] * x1 + diag[12] * x2 + diag[13] * x3 + diag[14] * x4;
188: yy[5 * i + 3] = diag[15] * x0 + diag[16] * x1 + diag[17] * x2 + diag[18] * x3 + diag[19] * x4;
189: yy[5 * i + 4] = diag[20] * x0 + diag[21] * x1 + diag[22] * x2 + diag[23] * x3 + diag[24] * x4;
190: diag += 25;
191: }
192: break;
193: case 6:
194: for (i = 0; i < m; i++) {
195: x0 = xx[6 * i];
196: x1 = xx[6 * i + 1];
197: x2 = xx[6 * i + 2];
198: x3 = xx[6 * i + 3];
199: x4 = xx[6 * i + 4];
200: x5 = xx[6 * i + 5];
202: yy[6 * i] = diag[0] * x0 + diag[1] * x1 + diag[2] * x2 + diag[3] * x3 + diag[4] * x4 + diag[5] * x5;
203: yy[6 * i + 1] = diag[6] * x0 + diag[7] * x1 + diag[8] * x2 + diag[9] * x3 + diag[10] * x4 + diag[11] * x5;
204: yy[6 * i + 2] = diag[12] * x0 + diag[13] * x1 + diag[14] * x2 + diag[15] * x3 + diag[16] * x4 + diag[17] * x5;
205: yy[6 * i + 3] = diag[18] * x0 + diag[19] * x1 + diag[20] * x2 + diag[21] * x3 + diag[22] * x4 + diag[23] * x5;
206: yy[6 * i + 4] = diag[24] * x0 + diag[25] * x1 + diag[26] * x2 + diag[27] * x3 + diag[28] * x4 + diag[29] * x5;
207: yy[6 * i + 5] = diag[30] * x0 + diag[31] * x1 + diag[32] * x2 + diag[33] * x3 + diag[34] * x4 + diag[35] * x5;
208: diag += 36;
209: }
210: break;
211: case 7:
212: for (i = 0; i < m; i++) {
213: x0 = xx[7 * i];
214: x1 = xx[7 * i + 1];
215: x2 = xx[7 * i + 2];
216: x3 = xx[7 * i + 3];
217: x4 = xx[7 * i + 4];
218: x5 = xx[7 * i + 5];
219: x6 = xx[7 * i + 6];
221: yy[7 * i] = diag[0] * x0 + diag[1] * x1 + diag[2] * x2 + diag[3] * x3 + diag[4] * x4 + diag[5] * x5 + diag[6] * x6;
222: yy[7 * i + 1] = diag[7] * x0 + diag[8] * x1 + diag[9] * x2 + diag[10] * x3 + diag[11] * x4 + diag[12] * x5 + diag[13] * x6;
223: yy[7 * i + 2] = diag[14] * x0 + diag[15] * x1 + diag[16] * x2 + diag[17] * x3 + diag[18] * x4 + diag[19] * x5 + diag[20] * x6;
224: yy[7 * i + 3] = diag[21] * x0 + diag[22] * x1 + diag[23] * x2 + diag[24] * x3 + diag[25] * x4 + diag[26] * x5 + diag[27] * x6;
225: yy[7 * i + 4] = diag[28] * x0 + diag[29] * x1 + diag[30] * x2 + diag[31] * x3 + diag[32] * x4 + diag[33] * x5 + diag[34] * x6;
226: yy[7 * i + 5] = diag[35] * x0 + diag[36] * x1 + diag[37] * x2 + diag[38] * x3 + diag[39] * x4 + diag[40] * x5 + diag[41] * x6;
227: yy[7 * i + 6] = diag[42] * x0 + diag[43] * x1 + diag[44] * x2 + diag[45] * x3 + diag[46] * x4 + diag[47] * x5 + diag[48] * x6;
228: diag += 49;
229: }
230: break;
231: default:
232: for (i = 0; i < m; i++) {
233: for (ib = 0; ib < bs; ib++) {
234: PetscScalar rowsum = 0;
235: for (jb = 0; jb < bs; jb++) rowsum += diag[ib * bs + jb] * xx[bs * i + jb];
236: yy[bs * i + ib] = rowsum;
237: }
238: diag += bs * bs;
239: }
240: }
241: PetscCall(VecRestoreArrayRead(x, &xx));
242: PetscCall(VecRestoreArray(y, &yy));
243: PetscCall(PetscLogFlops((2.0 * bs * bs - bs) * m));
244: PetscFunctionReturn(PETSC_SUCCESS);
245: }
247: PETSC_INTERN PetscErrorCode PCSetUp_PBJacobi_Host(PC pc)
248: {
249: PC_PBJacobi *jac = (PC_PBJacobi *)pc->data;
250: Mat A = pc->pmat;
251: MatFactorError err;
252: PetscInt nlocal;
254: PetscFunctionBegin;
255: PetscCall(MatInvertBlockDiagonal(A, &jac->diag));
256: PetscCall(MatFactorGetError(A, &err));
257: if (err) pc->failedreason = (PCFailedReason)err;
259: PetscCall(MatGetBlockSize(A, &jac->bs));
260: PetscCall(MatGetLocalSize(A, &nlocal, NULL));
261: jac->mbs = nlocal / jac->bs;
262: PetscFunctionReturn(PETSC_SUCCESS);
263: }
265: static PetscErrorCode PCSetUp_PBJacobi(PC pc)
266: {
267: PetscFunctionBegin;
268: /* In PCCreate_PBJacobi() pmat might have not been set, so we wait to the last minute to do the dispatch */
269: #if defined(PETSC_HAVE_CUDA)
270: PetscBool isCuda;
271: PetscCall(PetscObjectTypeCompareAny((PetscObject)pc->pmat, &isCuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
272: #endif
273: #if defined(PETSC_HAVE_KOKKOS_KERNELS)
274: PetscBool isKok;
275: PetscCall(PetscObjectTypeCompareAny((PetscObject)pc->pmat, &isKok, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
276: #endif
278: #if defined(PETSC_HAVE_CUDA)
279: if (isCuda) PetscCall(PCSetUp_PBJacobi_CUDA(pc));
280: else
281: #endif
282: #if defined(PETSC_HAVE_KOKKOS_KERNELS)
283: if (isKok)
284: PetscCall(PCSetUp_PBJacobi_Kokkos(pc));
285: else
286: #endif
287: {
288: PetscCall(PCSetUp_PBJacobi_Host(pc));
289: }
290: PetscFunctionReturn(PETSC_SUCCESS);
291: }
293: PetscErrorCode PCDestroy_PBJacobi(PC pc)
294: {
295: PetscFunctionBegin;
296: /*
297: Free the private data structure that was hanging off the PC
298: */
299: PetscCall(PetscFree(pc->data));
300: PetscFunctionReturn(PETSC_SUCCESS);
301: }
303: static PetscErrorCode PCView_PBJacobi(PC pc, PetscViewer viewer)
304: {
305: PC_PBJacobi *jac = (PC_PBJacobi *)pc->data;
306: PetscBool iascii;
308: PetscFunctionBegin;
309: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
310: if (iascii) PetscCall(PetscViewerASCIIPrintf(viewer, " point-block size %" PetscInt_FMT "\n", jac->bs));
311: PetscFunctionReturn(PETSC_SUCCESS);
312: }
314: /*MC
315: PCPBJACOBI - Point block Jacobi preconditioner
317: Notes:
318: See `PCJACOBI` for diagonal Jacobi, `PCVPBJACOBI` for variable-size point block, and `PCBJACOBI` for large size blocks
320: This works for `MATAIJ` and `MATBAIJ` matrices and uses the blocksize provided to the matrix
322: Uses dense LU factorization with partial pivoting to invert the blocks; if a zero pivot
323: is detected a PETSc error is generated.
325: Developer Notes:
326: This should support the `PCSetErrorIfFailure()` flag set to `PETSC_TRUE` to allow
327: the factorization to continue even after a zero pivot is found resulting in a Nan and hence
328: terminating `KSP` with a `KSP_DIVERGED_NANORINF` allowing
329: a nonlinear solver/ODE integrator to recover without stopping the program as currently happens.
331: Perhaps should provide an option that allows generation of a valid preconditioner
332: even if a block is singular as the `PCJACOBI` does.
334: Level: beginner
336: .seealso: [](ch_ksp), `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCJACOBI`, `PCVPBJACOBI`, `PCBJACOBI`
337: M*/
339: PETSC_EXTERN PetscErrorCode PCCreate_PBJacobi(PC pc)
340: {
341: PC_PBJacobi *jac;
343: PetscFunctionBegin;
344: /*
345: Creates the private data structure for this preconditioner and
346: attach it to the PC object.
347: */
348: PetscCall(PetscNew(&jac));
349: pc->data = (void *)jac;
351: /*
352: Initialize the pointers to vectors to ZERO; these will be used to store
353: diagonal entries of the matrix for fast preconditioner application.
354: */
355: jac->diag = NULL;
357: /*
358: Set the pointers for the functions that are provided above.
359: Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
360: are called, they will automatically call these functions. Note we
361: choose not to provide a couple of these functions since they are
362: not needed.
363: */
364: pc->ops->apply = PCApply_PBJacobi;
365: pc->ops->applytranspose = PCApplyTranspose_PBJacobi;
366: pc->ops->setup = PCSetUp_PBJacobi;
367: pc->ops->destroy = PCDestroy_PBJacobi;
368: pc->ops->setfromoptions = NULL;
369: pc->ops->view = PCView_PBJacobi;
370: pc->ops->applyrichardson = NULL;
371: pc->ops->applysymmetricleft = NULL;
372: pc->ops->applysymmetricright = NULL;
373: PetscFunctionReturn(PETSC_SUCCESS);
374: }