Actual source code: ex5.c
1: static char help[] = "Solves two linear systems in parallel with KSP. The code\n\
2: illustrates repeated solution of linear systems with the same preconditioner\n\
3: method but different matrices (having the same nonzero structure). The code\n\
4: also uses multiple profiling stages. Input arguments are\n\
5: -m <size> : problem size\n\
6: -mat_nonsym : use nonsymmetric matrix (default is symmetric)\n\n";
8: /*
9: Include "petscksp.h" so that we can use KSP solvers. Note that this file
10: automatically includes:
11: petscsys.h - base PETSc routines petscvec.h - vectors
12: petscmat.h - matrices
13: petscis.h - index sets petscksp.h - Krylov subspace methods
14: petscviewer.h - viewers petscpc.h - preconditioners
15: */
16: #include <petscksp.h>
18: int main(int argc, char **args)
19: {
20: KSP ksp; /* linear solver context */
21: Mat C; /* matrix */
22: Vec x, u, b; /* approx solution, RHS, exact solution */
23: PetscReal norm, bnorm; /* norm of solution error */
24: PetscScalar v, none = -1.0;
25: PetscInt Ii, J, ldim, low, high, iglobal, Istart, Iend;
26: PetscInt i, j, m = 3, n = 2, its;
27: PetscMPIInt size, rank;
28: PetscBool mat_nonsymmetric = PETSC_FALSE;
29: PetscBool testnewC = PETSC_FALSE, testscaledMat = PETSC_FALSE, single = PETSC_FALSE;
30: PetscLogStage stages[2];
32: PetscFunctionBeginUser;
33: PetscCall(PetscInitialize(&argc, &args, (char *)0, help));
34: PetscCall(PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL));
35: PetscCall(PetscOptionsHasName(NULL, NULL, "-pc_precision", &single));
36: PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank));
37: PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
38: n = 2 * size;
40: /*
41: Set flag if we are doing a nonsymmetric problem; the default is symmetric.
42: */
43: PetscCall(PetscOptionsGetBool(NULL, NULL, "-mat_nonsym", &mat_nonsymmetric, NULL));
45: PetscCall(PetscOptionsGetBool(NULL, NULL, "-test_scaledMat", &testscaledMat, NULL));
47: /*
48: Register two stages for separate profiling of the two linear solves.
49: Use the runtime option -log_view for a printout of performance
50: statistics at the program's conlusion.
51: */
52: PetscCall(PetscLogStageRegister("Original Solve", &stages[0]));
53: PetscCall(PetscLogStageRegister("Second Solve", &stages[1]));
55: /* -------------- Stage 0: Solve Original System ---------------------- */
56: /*
57: Indicate to PETSc profiling that we're beginning the first stage
58: */
59: PetscCall(PetscLogStagePush(stages[0]));
61: /*
62: Create parallel matrix, specifying only its global dimensions.
63: When using MatCreate(), the matrix format can be specified at
64: runtime. Also, the parallel partitioning of the matrix is
65: determined by PETSc at runtime.
66: */
67: PetscCall(MatCreate(PETSC_COMM_WORLD, &C));
68: PetscCall(MatSetSizes(C, PETSC_DECIDE, PETSC_DECIDE, m * n, m * n));
69: PetscCall(MatSetFromOptions(C));
70: PetscCall(MatSetUp(C));
72: /*
73: Currently, all PETSc parallel matrix formats are partitioned by
74: contiguous chunks of rows across the processors. Determine which
75: rows of the matrix are locally owned.
76: */
77: PetscCall(MatGetOwnershipRange(C, &Istart, &Iend));
79: /*
80: Set matrix entries matrix in parallel.
81: - Each processor needs to insert only elements that it owns
82: locally (but any non-local elements will be sent to the
83: appropriate processor during matrix assembly).
84: - Always specify global row and columns of matrix entries.
85: */
86: for (Ii = Istart; Ii < Iend; Ii++) {
87: v = -1.0;
88: i = Ii / n;
89: j = Ii - i * n;
90: if (i > 0) {
91: J = Ii - n;
92: PetscCall(MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES));
93: }
94: if (i < m - 1) {
95: J = Ii + n;
96: PetscCall(MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES));
97: }
98: if (j > 0) {
99: J = Ii - 1;
100: PetscCall(MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES));
101: }
102: if (j < n - 1) {
103: J = Ii + 1;
104: PetscCall(MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES));
105: }
106: v = 4.0;
107: PetscCall(MatSetValues(C, 1, &Ii, 1, &Ii, &v, ADD_VALUES));
108: }
110: /*
111: Make the matrix nonsymmetric if desired
112: */
113: if (mat_nonsymmetric) {
114: for (Ii = Istart; Ii < Iend; Ii++) {
115: v = -1.5;
116: i = Ii / n;
117: if (i > 1) {
118: J = Ii - n - 1;
119: PetscCall(MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES));
120: }
121: }
122: } else {
123: PetscCall(MatSetOption(C, MAT_SYMMETRIC, PETSC_TRUE));
124: PetscCall(MatSetOption(C, MAT_SYMMETRY_ETERNAL, PETSC_TRUE));
125: }
127: /*
128: Assemble matrix, using the 2-step process:
129: MatAssemblyBegin(), MatAssemblyEnd()
130: Computations can be done while messages are in transition
131: by placing code between these two statements.
132: */
133: PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
134: PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
136: /*
137: Create parallel vectors.
138: - When using VecSetSizes(), we specify only the vector's global
139: dimension; the parallel partitioning is determined at runtime.
140: - Note: We form 1 vector from scratch and then duplicate as needed.
141: */
142: PetscCall(VecCreate(PETSC_COMM_WORLD, &u));
143: PetscCall(VecSetSizes(u, PETSC_DECIDE, m * n));
144: PetscCall(VecSetFromOptions(u));
145: PetscCall(VecDuplicate(u, &b));
146: PetscCall(VecDuplicate(b, &x));
148: /*
149: Currently, all parallel PETSc vectors are partitioned by
150: contiguous chunks across the processors. Determine which
151: range of entries are locally owned.
152: */
153: PetscCall(VecGetOwnershipRange(x, &low, &high));
155: /*
156: Set elements within the exact solution vector in parallel.
157: - Each processor needs to insert only elements that it owns
158: locally (but any non-local entries will be sent to the
159: appropriate processor during vector assembly).
160: - Always specify global locations of vector entries.
161: */
162: PetscCall(VecGetLocalSize(x, &ldim));
163: for (i = 0; i < ldim; i++) {
164: iglobal = i + low;
165: v = (PetscScalar)(i + 100 * rank);
166: PetscCall(VecSetValues(u, 1, &iglobal, &v, INSERT_VALUES));
167: }
169: /*
170: Assemble vector, using the 2-step process:
171: VecAssemblyBegin(), VecAssemblyEnd()
172: Computations can be done while messages are in transition,
173: by placing code between these two statements.
174: */
175: PetscCall(VecAssemblyBegin(u));
176: PetscCall(VecAssemblyEnd(u));
178: /*
179: Compute right-hand-side vector
180: */
181: PetscCall(MatMult(C, u, b));
183: /*
184: Create linear solver context
185: */
186: PetscCall(KSPCreate(PETSC_COMM_WORLD, &ksp));
188: /*
189: Set operators. Here the matrix that defines the linear system
190: also serves as the preconditioning matrix.
191: */
192: PetscCall(KSPSetOperators(ksp, C, C));
194: /*
195: Set runtime options (e.g., -ksp_type <type> -pc_type <type>)
196: */
197: PetscCall(KSPSetFromOptions(ksp));
199: /*
200: Solve linear system. Here we explicitly call KSPSetUp() for more
201: detailed performance monitoring of certain preconditioners, such
202: as ICC and ILU. This call is optional, as KSPSetUp() will
203: automatically be called within KSPSolve() if it hasn't been
204: called already.
205: */
206: PetscCall(KSPSetUp(ksp));
207: PetscCall(KSPSolve(ksp, b, x));
209: /*
210: Check the residual
211: */
212: PetscCall(VecAXPY(x, none, u));
213: PetscCall(VecNorm(x, NORM_2, &norm));
214: PetscCall(VecNorm(b, NORM_2, &bnorm));
215: PetscCall(KSPGetIterationNumber(ksp, &its));
216: if (!testscaledMat || (!single && norm / bnorm > 1.e-5)) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Relative norm of the residual %g, Iterations %" PetscInt_FMT "\n", (double)norm / (double)bnorm, its));
218: /* -------------- Stage 1: Solve Second System ---------------------- */
219: /*
220: Solve another linear system with the same method. We reuse the KSP
221: context, matrix and vector data structures, and hence save the
222: overhead of creating new ones.
224: Indicate to PETSc profiling that we're concluding the first
225: stage with PetscLogStagePop(), and beginning the second stage with
226: PetscLogStagePush().
227: */
228: PetscCall(PetscLogStagePop());
229: PetscCall(PetscLogStagePush(stages[1]));
231: /*
232: Initialize all matrix entries to zero. MatZeroEntries() retains the
233: nonzero structure of the matrix for sparse formats.
234: */
235: PetscCall(MatZeroEntries(C));
237: /*
238: Assemble matrix again. Note that we retain the same matrix data
239: structure and the same nonzero pattern; we just change the values
240: of the matrix entries.
241: */
242: for (i = 0; i < m; i++) {
243: for (j = 2 * rank; j < 2 * rank + 2; j++) {
244: v = -1.0;
245: Ii = j + n * i;
246: if (i > 0) {
247: J = Ii - n;
248: PetscCall(MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES));
249: }
250: if (i < m - 1) {
251: J = Ii + n;
252: PetscCall(MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES));
253: }
254: if (j > 0) {
255: J = Ii - 1;
256: PetscCall(MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES));
257: }
258: if (j < n - 1) {
259: J = Ii + 1;
260: PetscCall(MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES));
261: }
262: v = 6.0;
263: PetscCall(MatSetValues(C, 1, &Ii, 1, &Ii, &v, ADD_VALUES));
264: }
265: }
266: if (mat_nonsymmetric) {
267: for (Ii = Istart; Ii < Iend; Ii++) {
268: v = -1.5;
269: i = Ii / n;
270: if (i > 1) {
271: J = Ii - n - 1;
272: PetscCall(MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES));
273: }
274: }
275: }
276: PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
277: PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
279: if (testscaledMat) {
280: PetscRandom rctx;
282: /* Scale a(0,0) and a(M-1,M-1) */
283: if (rank == 0) {
284: v = 6.0 * 0.00001;
285: Ii = 0;
286: J = 0;
287: PetscCall(MatSetValues(C, 1, &Ii, 1, &J, &v, INSERT_VALUES));
288: } else if (rank == size - 1) {
289: v = 6.0 * 0.00001;
290: Ii = m * n - 1;
291: J = m * n - 1;
292: PetscCall(MatSetValues(C, 1, &Ii, 1, &J, &v, INSERT_VALUES));
293: }
294: PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
295: PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
297: /* Compute a new right-hand-side vector */
298: PetscCall(VecDestroy(&u));
299: PetscCall(VecCreate(PETSC_COMM_WORLD, &u));
300: PetscCall(VecSetSizes(u, PETSC_DECIDE, m * n));
301: PetscCall(VecSetFromOptions(u));
303: PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rctx));
304: PetscCall(PetscRandomSetFromOptions(rctx));
305: PetscCall(VecSetRandom(u, rctx));
306: PetscCall(PetscRandomDestroy(&rctx));
307: PetscCall(VecAssemblyBegin(u));
308: PetscCall(VecAssemblyEnd(u));
309: }
311: PetscCall(PetscOptionsGetBool(NULL, NULL, "-test_newMat", &testnewC, NULL));
312: if (testnewC) {
313: /*
314: User may use a new matrix C with same nonzero pattern, e.g.
315: ./ex5 -ksp_monitor -mat_type sbaij -pc_type cholesky -pc_factor_mat_solver_type mumps -test_newMat
316: */
317: Mat Ctmp;
318: PetscCall(MatDuplicate(C, MAT_COPY_VALUES, &Ctmp));
319: PetscCall(MatDestroy(&C));
320: PetscCall(MatDuplicate(Ctmp, MAT_COPY_VALUES, &C));
321: PetscCall(MatDestroy(&Ctmp));
322: }
324: PetscCall(MatMult(C, u, b));
326: /*
327: Set operators. Here the matrix that defines the linear system
328: also serves as the preconditioning matrix.
329: */
330: PetscCall(KSPSetOperators(ksp, C, C));
332: /*
333: Solve linear system
334: */
335: PetscCall(KSPSetUp(ksp));
336: PetscCall(KSPSolve(ksp, b, x));
338: /*
339: Check the residual
340: */
341: PetscCall(VecAXPY(x, none, u));
342: PetscCall(VecNorm(x, NORM_2, &norm));
343: PetscCall(KSPGetIterationNumber(ksp, &its));
344: if (!testscaledMat || (!single && norm / bnorm > PETSC_SMALL)) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Relative norm of the residual %g, Iterations %" PetscInt_FMT "\n", (double)norm / (double)bnorm, its));
346: /*
347: Free work space. All PETSc objects should be destroyed when they
348: are no longer needed.
349: */
350: PetscCall(KSPDestroy(&ksp));
351: PetscCall(VecDestroy(&u));
352: PetscCall(VecDestroy(&x));
353: PetscCall(VecDestroy(&b));
354: PetscCall(MatDestroy(&C));
356: /*
357: Indicate to PETSc profiling that we're concluding the second stage
358: */
359: PetscCall(PetscLogStagePop());
361: PetscCall(PetscFinalize());
362: return 0;
363: }
365: /*TEST
367: test:
368: args: -pc_type jacobi -ksp_monitor_short -ksp_gmres_cgs_refinement_type refine_always
370: test:
371: suffix: 2
372: nsize: 2
373: args: -pc_type jacobi -ksp_monitor_short -ksp_gmres_cgs_refinement_type refine_always -ksp_rtol .000001
375: test:
376: suffix: 5
377: nsize: 2
378: args: -ksp_gmres_cgs_refinement_type refine_always -ksp_monitor draw::draw_lg -ksp_monitor_true_residual draw::draw_lg
380: test:
381: suffix: asm
382: nsize: 4
383: args: -pc_type asm
385: test:
386: suffix: asm_baij
387: nsize: 4
388: args: -pc_type asm -mat_type baij
389: output_file: output/ex5_asm.out
391: test:
392: suffix: redundant_0
393: args: -m 1000 -pc_type redundant -pc_redundant_number 1 -redundant_ksp_type gmres -redundant_pc_type jacobi
395: test:
396: suffix: redundant_1
397: nsize: 5
398: args: -pc_type redundant -pc_redundant_number 1 -redundant_ksp_type gmres -redundant_pc_type jacobi
400: test:
401: suffix: redundant_2
402: nsize: 5
403: args: -pc_type redundant -pc_redundant_number 3 -redundant_ksp_type gmres -redundant_pc_type jacobi
405: test:
406: suffix: redundant_3
407: nsize: 5
408: args: -pc_type redundant -pc_redundant_number 5 -redundant_ksp_type gmres -redundant_pc_type jacobi
410: test:
411: suffix: redundant_4
412: nsize: 5
413: args: -pc_type redundant -pc_redundant_number 3 -redundant_ksp_type gmres -redundant_pc_type jacobi -psubcomm_type interlaced
415: test:
416: suffix: superlu_dist
417: nsize: 15
418: requires: superlu_dist
419: args: -pc_type lu -pc_factor_mat_solver_type superlu_dist -mat_superlu_dist_equil false -m 150 -mat_superlu_dist_r 3 -mat_superlu_dist_c 5 -test_scaledMat
421: test:
422: suffix: superlu_dist_2
423: nsize: 15
424: requires: superlu_dist
425: args: -pc_type lu -pc_factor_mat_solver_type superlu_dist -mat_superlu_dist_equil false -m 150 -mat_superlu_dist_r 3 -mat_superlu_dist_c 5 -test_scaledMat -mat_superlu_dist_fact SamePattern_SameRowPerm
426: output_file: output/ex5_superlu_dist.out
428: test:
429: suffix: superlu_dist_3
430: nsize: 15
431: requires: superlu_dist
432: args: -pc_type lu -pc_factor_mat_solver_type superlu_dist -mat_superlu_dist_equil false -m 500 -mat_superlu_dist_r 3 -mat_superlu_dist_c 5 -test_scaledMat -mat_superlu_dist_fact DOFACT
433: output_file: output/ex5_superlu_dist.out
435: test:
436: suffix: superlu_dist_3s
437: nsize: 15
438: requires: superlu_dist defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
439: args: -pc_type lu -pc_factor_mat_solver_type superlu_dist -mat_superlu_dist_equil false -m 500 -mat_superlu_dist_r 3 -mat_superlu_dist_c 5 -test_scaledMat -mat_superlu_dist_fact DOFACT -pc_precision single
440: output_file: output/ex5_superlu_dist.out
442: test:
443: suffix: superlu_dist_0
444: nsize: 1
445: requires: superlu_dist
446: args: -pc_type lu -pc_factor_mat_solver_type superlu_dist -test_scaledMat
447: output_file: output/ex5_superlu_dist.out
449: TEST*/