Actual source code: ex3.c


  2: static char help[] = "Test PC redistribute on matrix with load imbalance. \n\
  3:                       Modified from src/ksp/ksp/tutorials/ex2.c.\n\
  4: Input parameters include:\n\
  5:   -random_exact_sol : use a random exact solution vector\n\
  6:   -view_exact_sol   : write exact solution vector to stdout\n\
  7:   -n <mesh_y>       : number of mesh points\n\n";
  8: /*
  9: Example:
 10:   mpiexec -n 8 ./ex3 -n 10000 -ksp_type cg -pc_type bjacobi -sub_pc_type icc -ksp_rtol 1.e-8 -log_view
 11:   mpiexec -n 8 ./ex3 -n 10000 -ksp_type preonly -pc_type redistribute -redistribute_ksp_type cg -redistribute_pc_type bjacobi -redistribute_sub_pc_type icc -redistribute_ksp_rtol 1.e-8 -log_view
 12: */

 14: #include <petscksp.h>

 16: int main(int argc,char **args)
 17: {
 18:   Vec            x,b,u;    /* approx solution, RHS, exact solution */
 19:   Mat            A;        /* linear system matrix */
 20:   KSP            ksp;      /* linear solver context */
 21:   PetscRandom    rctx;     /* random number generator context */
 22:   PetscReal      norm;     /* norm of solution error */
 23:   PetscInt       i,j,Ii,J,Istart,Iend,m,n = 7,its,nloc,matdistribute=0;
 25:   PetscBool      flg = PETSC_FALSE;
 26:   PetscScalar    v;
 27:   PetscMPIInt    rank,size;
 28: #if defined(PETSC_USE_LOG)
 29:   PetscLogStage stage;
 30: #endif

 32:   PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr;
 33:   MPI_Comm_size(PETSC_COMM_WORLD,&size);
 34:   MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
 35:   if (size < 2) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_WRONG_MPI_SIZE,"This example requires at least 2 MPI processes!");

 37:   PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);
 38:   PetscOptionsGetInt(NULL,NULL,"-matdistribute",&matdistribute,NULL);
 39:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 40:          Compute the matrix and right-hand-side vector that define
 41:          the linear system, Ax = b.
 42:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 43:   switch(matdistribute) {
 44:   case 1: /* very imbalanced process load for matrix A */
 45:     m    = (1+size)*size;
 46:     nloc = (rank+1)*n;
 47:     if (rank == size-1) { /* proc[size-1] stores all remaining rows */
 48:       nloc = m*n;
 49:       for (i=0; i<size-1; i++){
 50:         nloc -= (i+1)*n;
 51:       }
 52:     }
 53:     break;
 54:   default: /* proc[0] and proc[1] load much smaller row blocks, the rest processes have same loads */
 55:     if (rank == 0 || rank == 1) {
 56:       nloc = n;
 57:     } else {
 58:       nloc = 10*n; /* 10x larger load */
 59:     }
 60:     m = 2 + (size-2)*10;
 61:     break;
 62:   }
 63:   MatCreate(PETSC_COMM_WORLD,&A);
 64:   MatSetSizes(A,nloc,nloc,PETSC_DECIDE,PETSC_DECIDE);
 65:   MatSetFromOptions(A);
 66:   MatMPIAIJSetPreallocation(A,5,NULL,5,NULL);
 67:   MatSeqAIJSetPreallocation(A,5,NULL);
 68:   MatSetUp(A);

 70:   MatGetOwnershipRange(A,&Istart,&Iend);
 71:   nloc = Iend-Istart;
 72:   PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] A Istart,Iend: %D %D; nloc %D\n",rank,Istart,Iend,nloc);
 73:   PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);

 75:   PetscLogStageRegister("Assembly", &stage);
 76:   PetscLogStagePush(stage);
 77:   for (Ii=Istart; Ii<Iend; Ii++) {
 78:     v = -1.0; i = Ii/n; j = Ii - i*n;
 79:     if (i>0)   {J = Ii - n; MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
 80:     if (i<m-1) {J = Ii + n; MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
 81:     if (j>0)   {J = Ii - 1; MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
 82:     if (j<n-1) {J = Ii + 1; MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
 83:     v = 4.0; MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);
 84:   }
 85:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
 86:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
 87:   PetscLogStagePop();

 89:   /* A is symmetric. Set symmetric flag to enable ICC/Cholesky preconditioner */
 90:   MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE);

 92:   /* Create parallel vectors. */
 93:   VecCreate(PETSC_COMM_WORLD,&u);
 94:   VecSetSizes(u,nloc,PETSC_DECIDE);
 95:   VecSetFromOptions(u);
 96:   VecDuplicate(u,&b);
 97:   VecDuplicate(b,&x);

 99:   /* Set exact solution; then compute right-hand-side vector. */
100:   PetscOptionsGetBool(NULL,NULL,"-random_exact_sol",&flg,NULL);
101:   if (flg) {
102:     PetscRandomCreate(PETSC_COMM_WORLD,&rctx);
103:     PetscRandomSetFromOptions(rctx);
104:     VecSetRandom(u,rctx);
105:     PetscRandomDestroy(&rctx);
106:   } else {
107:     VecSet(u,1.0);
108:   }
109:   MatMult(A,u,b);

111:   /* View the exact solution vector if desired */
112:   flg  = PETSC_FALSE;
113:   PetscOptionsGetBool(NULL,NULL,"-view_exact_sol",&flg,NULL);
114:   if (flg) {VecView(u,PETSC_VIEWER_STDOUT_WORLD);}

116:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
117:                 Create the linear solver and set various options
118:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
119:   KSPCreate(PETSC_COMM_WORLD,&ksp);
120:   KSPSetOperators(ksp,A,A);
121:   KSPSetTolerances(ksp,1.e-2/((m+1)*(n+1)),PETSC_DEFAULT,PETSC_DEFAULT,
122:                           PETSC_DEFAULT);
123:   KSPSetFromOptions(ksp);

125:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
126:                       Solve the linear system
127:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
128:   KSPSolve(ksp,b,x);

130:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
131:                       Check solution and clean up
132:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
133:   VecAXPY(x,-1.0,u);
134:   VecNorm(x,NORM_2,&norm);
135:   KSPGetIterationNumber(ksp,&its);
136:   PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g iterations %D\n",(double)norm,its);

138:   /* Free work space. */
139:   KSPDestroy(&ksp);
140:   VecDestroy(&u);  VecDestroy(&x);
141:   VecDestroy(&b);  MatDestroy(&A);
142:   PetscFinalize();
143:   return ierr;
144: }


147: /*TEST

149:    test:
150:       nsize: 8
151:       args: -n 100 -ksp_type cg -pc_type bjacobi -sub_pc_type icc -ksp_rtol 1.e-8

153:    test:
154:       suffix: 2
155:       nsize: 8
156:       args: -n 100 -ksp_type preonly -pc_type redistribute -redistribute_ksp_type cg -redistribute_pc_type bjacobi -redistribute_sub_pc_type icc -redistribute_ksp_rtol 1.e-8

158: TEST*/