Actual source code: ex3.c
2: static char help[] = "Test PC redistribute on matrix with load imbalance. \n\
3: Modified from src/ksp/ksp/tutorials/ex2.c.\n\
4: Input parameters include:\n\
5: -random_exact_sol : use a random exact solution vector\n\
6: -view_exact_sol : write exact solution vector to stdout\n\
7: -n <mesh_y> : number of mesh points\n\n";
8: /*
9: Example:
10: mpiexec -n 8 ./ex3 -n 10000 -ksp_type cg -pc_type bjacobi -sub_pc_type icc -ksp_rtol 1.e-8 -log_view
11: mpiexec -n 8 ./ex3 -n 10000 -ksp_type preonly -pc_type redistribute -redistribute_ksp_type cg -redistribute_pc_type bjacobi -redistribute_sub_pc_type icc -redistribute_ksp_rtol 1.e-8 -log_view
12: */
14: #include <petscksp.h>
16: int main(int argc,char **args)
17: {
18: Vec x,b,u; /* approx solution, RHS, exact solution */
19: Mat A; /* linear system matrix */
20: KSP ksp; /* linear solver context */
21: PetscRandom rctx; /* random number generator context */
22: PetscReal norm; /* norm of solution error */
23: PetscInt i,j,Ii,J,Istart,Iend,m,n = 7,its,nloc,matdistribute=0;
25: PetscBool flg = PETSC_FALSE;
26: PetscScalar v;
27: PetscMPIInt rank,size;
28: #if defined(PETSC_USE_LOG)
29: PetscLogStage stage;
30: #endif
32: PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr;
33: MPI_Comm_size(PETSC_COMM_WORLD,&size);
34: MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
35: if (size < 2) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_WRONG_MPI_SIZE,"This example requires at least 2 MPI processes!");
37: PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);
38: PetscOptionsGetInt(NULL,NULL,"-matdistribute",&matdistribute,NULL);
39: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
40: Compute the matrix and right-hand-side vector that define
41: the linear system, Ax = b.
42: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
43: switch(matdistribute) {
44: case 1: /* very imbalanced process load for matrix A */
45: m = (1+size)*size;
46: nloc = (rank+1)*n;
47: if (rank == size-1) { /* proc[size-1] stores all remaining rows */
48: nloc = m*n;
49: for (i=0; i<size-1; i++){
50: nloc -= (i+1)*n;
51: }
52: }
53: break;
54: default: /* proc[0] and proc[1] load much smaller row blocks, the rest processes have same loads */
55: if (rank == 0 || rank == 1) {
56: nloc = n;
57: } else {
58: nloc = 10*n; /* 10x larger load */
59: }
60: m = 2 + (size-2)*10;
61: break;
62: }
63: MatCreate(PETSC_COMM_WORLD,&A);
64: MatSetSizes(A,nloc,nloc,PETSC_DECIDE,PETSC_DECIDE);
65: MatSetFromOptions(A);
66: MatMPIAIJSetPreallocation(A,5,NULL,5,NULL);
67: MatSeqAIJSetPreallocation(A,5,NULL);
68: MatSetUp(A);
70: MatGetOwnershipRange(A,&Istart,&Iend);
71: nloc = Iend-Istart;
72: PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] A Istart,Iend: %D %D; nloc %D\n",rank,Istart,Iend,nloc);
73: PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);
75: PetscLogStageRegister("Assembly", &stage);
76: PetscLogStagePush(stage);
77: for (Ii=Istart; Ii<Iend; Ii++) {
78: v = -1.0; i = Ii/n; j = Ii - i*n;
79: if (i>0) {J = Ii - n; MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
80: if (i<m-1) {J = Ii + n; MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
81: if (j>0) {J = Ii - 1; MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
82: if (j<n-1) {J = Ii + 1; MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
83: v = 4.0; MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);
84: }
85: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
86: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
87: PetscLogStagePop();
89: /* A is symmetric. Set symmetric flag to enable ICC/Cholesky preconditioner */
90: MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE);
92: /* Create parallel vectors. */
93: VecCreate(PETSC_COMM_WORLD,&u);
94: VecSetSizes(u,nloc,PETSC_DECIDE);
95: VecSetFromOptions(u);
96: VecDuplicate(u,&b);
97: VecDuplicate(b,&x);
99: /* Set exact solution; then compute right-hand-side vector. */
100: PetscOptionsGetBool(NULL,NULL,"-random_exact_sol",&flg,NULL);
101: if (flg) {
102: PetscRandomCreate(PETSC_COMM_WORLD,&rctx);
103: PetscRandomSetFromOptions(rctx);
104: VecSetRandom(u,rctx);
105: PetscRandomDestroy(&rctx);
106: } else {
107: VecSet(u,1.0);
108: }
109: MatMult(A,u,b);
111: /* View the exact solution vector if desired */
112: flg = PETSC_FALSE;
113: PetscOptionsGetBool(NULL,NULL,"-view_exact_sol",&flg,NULL);
114: if (flg) {VecView(u,PETSC_VIEWER_STDOUT_WORLD);}
116: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
117: Create the linear solver and set various options
118: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
119: KSPCreate(PETSC_COMM_WORLD,&ksp);
120: KSPSetOperators(ksp,A,A);
121: KSPSetTolerances(ksp,1.e-2/((m+1)*(n+1)),PETSC_DEFAULT,PETSC_DEFAULT,
122: PETSC_DEFAULT);
123: KSPSetFromOptions(ksp);
125: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
126: Solve the linear system
127: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
128: KSPSolve(ksp,b,x);
130: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
131: Check solution and clean up
132: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
133: VecAXPY(x,-1.0,u);
134: VecNorm(x,NORM_2,&norm);
135: KSPGetIterationNumber(ksp,&its);
136: PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g iterations %D\n",(double)norm,its);
138: /* Free work space. */
139: KSPDestroy(&ksp);
140: VecDestroy(&u); VecDestroy(&x);
141: VecDestroy(&b); MatDestroy(&A);
142: PetscFinalize();
143: return ierr;
144: }
147: /*TEST
149: test:
150: nsize: 8
151: args: -n 100 -ksp_type cg -pc_type bjacobi -sub_pc_type icc -ksp_rtol 1.e-8
153: test:
154: suffix: 2
155: nsize: 8
156: args: -n 100 -ksp_type preonly -pc_type redistribute -redistribute_ksp_type cg -redistribute_pc_type bjacobi -redistribute_sub_pc_type icc -redistribute_ksp_rtol 1.e-8
158: TEST*/