Actual source code: ex19.c
1: static char help[] = "Solve a 2D 5-point stencil in parallel with Kokkos batched KSP and ASM solvers.\n\
2: Input parameters include:\n\
3: -n : number of mesh points in x direction\n\
4: -m : number of mesh points in y direction\n\
5: -num_local_blocks : number of local sub domains for block jacobi\n\n";
7: /*
8: Include "petscksp.h" so that we can use KSP solvers.
9: */
10: #include <petscksp.h>
11: #include <petscmat.h>
13: int main(int argc, char **args)
14: {
15: Vec x, b, u; /* approx solution, RHS, exact solution */
16: Mat A, Pmat, Aseq, AA; /* linear system matrix */
17: KSP ksp; /* linear solver context */
18: PetscReal norm, norm0; /* norm of solution error */
19: PetscInt i, j, Ii, J, Istart, Iend, n = 7, m = 8, its, nblocks = 2;
20: PetscBool flg, ismpi;
21: PetscScalar v;
22: PetscMPIInt size, rank;
23: IS *loc_blocks = NULL;
24: PC pc;
26: PetscFunctionBeginUser;
27: PetscCall(PetscInitialize(&argc, &args, (char *)0, help));
28: PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
29: PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank));
30: PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL));
31: PetscCall(PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL));
32: PetscCall(PetscOptionsGetInt(NULL, NULL, "-num_local_blocks", &nblocks, NULL));
33: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
34: Compute the matrix and right-hand-side vector that define
35: the linear system, Ax = b.
36: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
37: PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
38: PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, n * m, n * m));
39: PetscCall(MatSetFromOptions(A));
40: PetscCall(MatSeqAIJSetPreallocation(A, 5, NULL));
41: PetscCall(MatMPIAIJSetPreallocation(A, 5, NULL, 3, NULL));
42: /*
43: Currently, all PETSc parallel matrix formats are partitioned by
44: contiguous chunks of rows across the processors. Determine which
45: rows of the matrix are locally owned.
46: */
47: PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
48: /*
49: Set matrix elements for the 2-D, five-point stencil.
50: */
51: for (Ii = Istart; Ii < Iend; Ii++) {
52: v = -1.0;
53: i = Ii / n;
54: j = Ii - i * n;
55: if (i > 0) {
56: J = Ii - n;
57: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES));
58: }
59: if (i < m - 1) {
60: J = Ii + n;
61: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES));
62: }
63: if (j > 0) {
64: J = Ii - 1;
65: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES));
66: }
67: if (j < n - 1) {
68: J = Ii + 1;
69: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES));
70: }
71: v = 4.0;
72: PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, ADD_VALUES));
73: }
74: PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
75: PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
76: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
77: Setup ASM solver and batched KSP solver data
78: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
79: /* make explicit block matrix for batch solver */
80: PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &ismpi));
81: if (!ismpi) {
82: Aseq = A;
83: } else {
84: PetscCall(MatMPIAIJGetSeqAIJ(A, &Aseq, NULL, NULL));
85: }
86: PetscCall(PCASMCreateSubdomains(Aseq, nblocks, &loc_blocks)); // A
87: Mat nest, array[10000];
88: for (Ii = 0; Ii < 10000; Ii++) array[Ii] = NULL;
89: for (PetscInt bid = 0; bid < nblocks; bid++) {
90: Mat matblock;
91: PetscCall(MatCreateSubMatrix(Aseq, loc_blocks[bid], loc_blocks[bid], MAT_INITIAL_MATRIX, &matblock));
92: //PetscCall(MatViewFromOptions(matblock, NULL, "-view_b"));
93: array[bid * nblocks + bid] = matblock;
94: }
95: PetscCall(MatCreate(PETSC_COMM_SELF, &nest));
96: PetscCall(MatSetFromOptions(nest));
97: PetscCall(MatSetType(nest, MATNEST));
98: PetscCall(MatNestSetSubMats(nest, nblocks, NULL, nblocks, NULL, array));
99: PetscCall(MatSetUp(nest));
100: PetscCall(MatConvert(nest, MATAIJKOKKOS, MAT_INITIAL_MATRIX, &AA));
101: PetscCall(MatDestroy(&nest));
102: for (PetscInt bid = 0; bid < nblocks; bid++) PetscCall(MatDestroy(&array[bid * nblocks + bid]));
103: if (ismpi) {
104: Mat AAseq;
105: PetscCall(MatCreate(PETSC_COMM_WORLD, &Pmat));
106: PetscCall(MatSetSizes(Pmat, Iend - Istart, Iend - Istart, n * m, n * m));
107: PetscCall(MatSetFromOptions(Pmat));
108: PetscCall(MatSeqAIJSetPreallocation(Pmat, 5, NULL));
109: PetscCall(MatMPIAIJSetPreallocation(Pmat, 5, NULL, 3, NULL));
110: PetscCall(MatAssemblyBegin(Pmat, MAT_FINAL_ASSEMBLY));
111: PetscCall(MatAssemblyEnd(Pmat, MAT_FINAL_ASSEMBLY));
112: PetscCall(MatMPIAIJGetSeqAIJ(Pmat, &AAseq, NULL, NULL));
113: PetscCheck(AAseq, PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "No A mat");
114: PetscCall(MatAXPY(AAseq, 1.0, AA, DIFFERENT_NONZERO_PATTERN));
115: PetscCall(MatDestroy(&AA));
116: } else {
117: Pmat = AA;
118: }
119: PetscCall(MatViewFromOptions(Pmat, NULL, "-view_p"));
120: PetscCall(MatViewFromOptions(A, NULL, "-view_a"));
122: /* A is symmetric. Set symmetric flag to enable ICC/Cholesky preconditioner */
123: PetscCall(MatSetOption(A, MAT_SYMMETRIC, PETSC_TRUE));
124: PetscCall(MatCreateVecs(A, &u, &b));
125: PetscCall(MatCreateVecs(A, &x, NULL));
126: /*
127: Set exact solution; then compute right-hand-side vector.
128: By default we use an exact solution of a vector with all
129: elements of 1.0;
130: */
131: PetscCall(VecSet(u, 1.0));
132: PetscCall(MatMult(A, u, b));
133: /*
134: View the exact solution vector if desired
135: */
136: flg = PETSC_FALSE;
137: PetscCall(PetscOptionsGetBool(NULL, NULL, "-view_exact_sol", &flg, NULL));
138: if (flg) PetscCall(VecView(u, PETSC_VIEWER_STDOUT_WORLD));
140: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
141: Create the linear solver and set various options
142: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
143: PetscCall(KSPCreate(PETSC_COMM_WORLD, &ksp));
144: PetscCall(KSPSetOperators(ksp, A, Pmat));
145: PetscCall(KSPSetFromOptions(ksp));
146: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
147: Setup ASM solver
148: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
149: PetscCall(KSPGetPC(ksp, &pc));
150: PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCASM, &flg));
151: if (flg && nblocks > 0) {
152: for (PetscInt bid = 0, gid0 = Istart; bid < nblocks; bid++) {
153: PetscInt nn;
154: IS new_loc_blocks;
155: PetscCall(ISGetSize(loc_blocks[bid], &nn)); // size only
156: PetscCall(ISCreateStride(PETSC_COMM_SELF, nn, gid0, 1, &new_loc_blocks));
157: PetscCall(ISDestroy(&loc_blocks[bid]));
158: loc_blocks[bid] = new_loc_blocks;
159: gid0 += nn; // start of next block
160: }
161: PetscCall(PCASMSetLocalSubdomains(pc, nblocks, loc_blocks, NULL));
162: }
163: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
164: Solve the linear system
165: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
166: PetscCall(KSPSolve(ksp, b, x));
167: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
168: Check the solution and clean up
169: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
170: PetscCall(VecAXPY(x, -1.0, u));
171: PetscCall(VecNorm(x, NORM_2, &norm));
172: PetscCall(VecNorm(b, NORM_2, &norm0));
173: PetscCall(KSPGetIterationNumber(ksp, &its));
174: /*
175: Print convergence information.
176: */
177: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Relative norm of error %g iterations %" PetscInt_FMT "\n", (double)norm / norm0, its));
178: /*
179: cleanup
180: */
181: PetscCall(KSPDestroy(&ksp));
182: if (loc_blocks) {
183: if (0) {
184: for (PetscInt bid = 0; bid < nblocks; bid++) PetscCall(ISDestroy(&loc_blocks[bid]));
185: PetscCall(PetscFree(loc_blocks));
186: } else {
187: PetscCall(PCASMDestroySubdomains(nblocks, loc_blocks, NULL));
188: }
189: }
190: PetscCall(VecDestroy(&u));
191: PetscCall(VecDestroy(&x));
192: PetscCall(VecDestroy(&b));
193: PetscCall(MatDestroy(&A));
194: PetscCall(MatDestroy(&Pmat));
195: PetscCall(PetscFinalize());
196: return 0;
197: }
199: /*TEST
200: build:
201: requires: kokkos_kernels
202: testset:
203: requires: parmetis
204: args: -ksp_converged_reason -ksp_norm_type unpreconditioned -ksp_rtol 1e-4 -m 37 -n 23 -num_local_blocks 4
205: nsize: 4
206: output_file: output/ex19_0.out
207: test:
208: suffix: batch
209: args: -ksp_type cg -pc_type bjkokkos -pc_bjkokkos_ksp_max_it 60 -pc_bjkokkos_ksp_type tfqmr -pc_bjkokkos_pc_type jacobi -pc_bjkokkos_ksp_rtol 1e-3 -mat_type aijkokkos
210: test:
211: suffix: asm
212: args: -ksp_type cg -pc_type asm -sub_pc_type jacobi -sub_ksp_type tfqmr -sub_ksp_rtol 1e-3
213: test:
214: suffix: batch_bicg
215: args: -ksp_type cg -pc_type bjkokkos -pc_bjkokkos_ksp_max_it 60 -pc_bjkokkos_ksp_type bicg -pc_bjkokkos_pc_type jacobi -pc_bjkokkos_ksp_rtol 1e-3 -mat_type aijkokkos
217: test:
218: nsize: 4
219: suffix: no_metis_batch
220: args: -ksp_converged_reason -ksp_norm_type unpreconditioned -ksp_rtol 1e-6 -m 37 -n 23 -num_local_blocks 4 -ksp_type cg -pc_type bjkokkos -pc_bjkokkos_ksp_max_it 60 -pc_bjkokkos_ksp_type tfqmr -pc_bjkokkos_pc_type jacobi -pc_bjkokkos_ksp_rtol 1e-3 -mat_type aijkokkos
222: test:
223: nsize: 1
224: suffix: serial_batch
225: args: -ksp_monitor -ksp_converged_reason -ksp_norm_type unpreconditioned -ksp_rtol 1e-4 -m 37 -n 23 -num_local_blocks 16 -ksp_type cg -pc_type bjkokkos -pc_bjkokkos_ksp_max_it 60 -pc_bjkokkos_ksp_type tfqmr -pc_bjkokkos_pc_type jacobi -pc_bjkokkos_ksp_rtol 1e-6 -mat_type aijkokkos -pc_bjkokkos_ksp_converged_reason
227: TEST*/