Actual source code: ex52.c


  2: static char help[] = "Solves a linear system in parallel with KSP. Modified from ex2.c \n\
  3:                       Illustrate how to use external packages MUMPS, SUPERLU and STRUMPACK \n\
  4: Input parameters include:\n\
  5:   -random_exact_sol : use a random exact solution vector\n\
  6:   -view_exact_sol   : write exact solution vector to stdout\n\
  7:   -m <mesh_x>       : number of mesh points in x-direction\n\
  8:   -n <mesh_y>       : number of mesh points in y-direction\n\n";

 10: #include <petscksp.h>

 12: int main(int argc,char **args)
 13: {
 14:   Vec            x,b,u;    /* approx solution, RHS, exact solution */
 15:   Mat            A,F;
 16:   KSP            ksp;      /* linear solver context */
 17:   PC             pc;
 18:   PetscRandom    rctx;     /* random number generator context */
 19:   PetscReal      norm;     /* norm of solution error */
 20:   PetscInt       i,j,Ii,J,Istart,Iend,m = 8,n = 7,its;
 22:   PetscBool      flg=PETSC_FALSE,flg_ilu=PETSC_FALSE,flg_ch=PETSC_FALSE;
 23: #if defined(PETSC_HAVE_MUMPS)
 24:   PetscBool      flg_mumps=PETSC_FALSE,flg_mumps_ch=PETSC_FALSE;
 25: #endif
 26: #if defined(PETSC_HAVE_SUPERLU) || defined(PETSC_HAVE_SUPERLU_DIST)
 27:   PetscBool      flg_superlu=PETSC_FALSE;
 28: #endif
 29: #if defined(PETSC_HAVE_STRUMPACK)
 30:   PetscBool      flg_strumpack=PETSC_FALSE;
 31: #endif
 32:   PetscScalar    v;
 33:   PetscMPIInt    rank,size;
 34: #if defined(PETSC_USE_LOG)
 35:   PetscLogStage  stage;
 36: #endif

 38:   PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr;
 39:   MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
 40:   MPI_Comm_size(PETSC_COMM_WORLD,&size);
 41:   PetscOptionsGetInt(NULL,NULL,"-m",&m,NULL);
 42:   PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);
 43:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 44:          Compute the matrix and right-hand-side vector that define
 45:          the linear system, Ax = b.
 46:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 47:   MatCreate(PETSC_COMM_WORLD,&A);
 48:   MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,m*n,m*n);
 49:   MatSetFromOptions(A);
 50:   MatMPIAIJSetPreallocation(A,5,NULL,5,NULL);
 51:   MatSeqAIJSetPreallocation(A,5,NULL);
 52:   MatSetUp(A);

 54:   /*
 55:      Currently, all PETSc parallel matrix formats are partitioned by
 56:      contiguous chunks of rows across the processors.  Determine which
 57:      rows of the matrix are locally owned.
 58:   */
 59:   MatGetOwnershipRange(A,&Istart,&Iend);

 61:   /*
 62:      Set matrix elements for the 2-D, five-point stencil in parallel.
 63:       - Each processor needs to insert only elements that it owns
 64:         locally (but any non-local elements will be sent to the
 65:         appropriate processor during matrix assembly).
 66:       - Always specify global rows and columns of matrix entries.

 68:      Note: this uses the less common natural ordering that orders first
 69:      all the unknowns for x = h then for x = 2h etc; Hence you see J = Ii +- n
 70:      instead of J = I +- m as you might expect. The more standard ordering
 71:      would first do all variables for y = h, then y = 2h etc.

 73:    */
 74:   PetscLogStageRegister("Assembly", &stage);
 75:   PetscLogStagePush(stage);
 76:   for (Ii=Istart; Ii<Iend; Ii++) {
 77:     v = -1.0; i = Ii/n; j = Ii - i*n;
 78:     if (i>0)   {J = Ii - n; MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
 79:     if (i<m-1) {J = Ii + n; MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
 80:     if (j>0)   {J = Ii - 1; MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
 81:     if (j<n-1) {J = Ii + 1; MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
 82:     v = 4.0; MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);
 83:   }

 85:   /*
 86:      Assemble matrix, using the 2-step process:
 87:        MatAssemblyBegin(), MatAssemblyEnd()
 88:      Computations can be done while messages are in transition
 89:      by placing code between these two statements.
 90:   */
 91:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
 92:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
 93:   PetscLogStagePop();

 95:   /* A is symmetric. Set symmetric flag to enable ICC/Cholesky preconditioner */
 96:   MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE);

 98:   /*
 99:      Create parallel vectors.
100:       - We form 1 vector from scratch and then duplicate as needed.
101:       - When using VecCreate(), VecSetSizes and VecSetFromOptions()
102:         in this example, we specify only the
103:         vector's global dimension; the parallel partitioning is determined
104:         at runtime.
105:       - When solving a linear system, the vectors and matrices MUST
106:         be partitioned accordingly.  PETSc automatically generates
107:         appropriately partitioned matrices and vectors when MatCreate()
108:         and VecCreate() are used with the same communicator.
109:       - The user can alternatively specify the local vector and matrix
110:         dimensions when more sophisticated partitioning is needed
111:         (replacing the PETSC_DECIDE argument in the VecSetSizes() statement
112:         below).
113:   */
114:   VecCreate(PETSC_COMM_WORLD,&u);
115:   VecSetSizes(u,PETSC_DECIDE,m*n);
116:   VecSetFromOptions(u);
117:   VecDuplicate(u,&b);
118:   VecDuplicate(b,&x);

120:   /*
121:      Set exact solution; then compute right-hand-side vector.
122:      By default we use an exact solution of a vector with all
123:      elements of 1.0;  Alternatively, using the runtime option
124:      -random_sol forms a solution vector with random components.
125:   */
126:   PetscOptionsGetBool(NULL,NULL,"-random_exact_sol",&flg,NULL);
127:   if (flg) {
128:     PetscRandomCreate(PETSC_COMM_WORLD,&rctx);
129:     PetscRandomSetFromOptions(rctx);
130:     VecSetRandom(u,rctx);
131:     PetscRandomDestroy(&rctx);
132:   } else {
133:     VecSet(u,1.0);
134:   }
135:   MatMult(A,u,b);

137:   /*
138:      View the exact solution vector if desired
139:   */
140:   flg  = PETSC_FALSE;
141:   PetscOptionsGetBool(NULL,NULL,"-view_exact_sol",&flg,NULL);
142:   if (flg) {VecView(u,PETSC_VIEWER_STDOUT_WORLD);}

144:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
145:                 Create the linear solver and set various options
146:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

148:   /*
149:      Create linear solver context
150:   */
151:   KSPCreate(PETSC_COMM_WORLD,&ksp);
152:   KSPSetOperators(ksp,A,A);

154:   /*
155:     Example of how to use external package MUMPS
156:     Note: runtime options
157:           '-ksp_type preonly -pc_type lu -pc_factor_mat_solver_type mumps -mat_mumps_icntl_7 3 -mat_mumps_icntl_1 0.0'
158:           are equivalent to these procedural calls
159:   */
160: #if defined(PETSC_HAVE_MUMPS)
161:   flg_mumps    = PETSC_FALSE;
162:   flg_mumps_ch = PETSC_FALSE;
163:   PetscOptionsGetBool(NULL,NULL,"-use_mumps_lu",&flg_mumps,NULL);
164:   PetscOptionsGetBool(NULL,NULL,"-use_mumps_ch",&flg_mumps_ch,NULL);
165:   if (flg_mumps || flg_mumps_ch) {
166:     KSPSetType(ksp,KSPPREONLY);
167:     PetscInt  ival,icntl;
168:     PetscReal val;
169:     KSPGetPC(ksp,&pc);
170:     if (flg_mumps) {
171:       PCSetType(pc,PCLU);
172:     } else if (flg_mumps_ch) {
173:       MatSetOption(A,MAT_SPD,PETSC_TRUE); /* set MUMPS id%SYM=1 */
174:       PCSetType(pc,PCCHOLESKY);
175:     }
176:     PCFactorSetMatSolverType(pc,MATSOLVERMUMPS);
177:     PCFactorSetUpMatSolverType(pc); /* call MatGetFactor() to create F */
178:     PCFactorGetMatrix(pc,&F);

180:     /* sequential ordering */
181:     icntl = 7; ival = 2;
182:     MatMumpsSetIcntl(F,icntl,ival);

184:     /* threshold for row pivot detection */
185:     MatMumpsSetIcntl(F,24,1);
186:     icntl = 3; val = 1.e-6;
187:     MatMumpsSetCntl(F,icntl,val);

189:     /* compute determinant of A */
190:     MatMumpsSetIcntl(F,33,1);
191:   }
192: #endif

194:   /*
195:     Example of how to use external package SuperLU
196:     Note: runtime options
197:           '-ksp_type preonly -pc_type ilu -pc_factor_mat_solver_type superlu -mat_superlu_ilu_droptol 1.e-8'
198:           are equivalent to these procedual calls
199:   */
200: #if defined(PETSC_HAVE_SUPERLU) || defined(PETSC_HAVE_SUPERLU_DIST)
201:   flg_ilu     = PETSC_FALSE;
202:   flg_superlu = PETSC_FALSE;
203:   PetscOptionsGetBool(NULL,NULL,"-use_superlu_lu",&flg_superlu,NULL);
204:   PetscOptionsGetBool(NULL,NULL,"-use_superlu_ilu",&flg_ilu,NULL);
205:   if (flg_superlu || flg_ilu) {
206:     KSPSetType(ksp,KSPPREONLY);
207:     KSPGetPC(ksp,&pc);
208:     if (flg_superlu) {
209:       PCSetType(pc,PCLU);
210:     } else if (flg_ilu) {
211:       PCSetType(pc,PCILU);
212:     }
213:     if (size == 1) {
214: #if !defined(PETSC_HAVE_SUPERLU)
215:       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This test requires SUPERLU");
216: #else
217:       PCFactorSetMatSolverType(pc,MATSOLVERSUPERLU);
218: #endif
219:     } else {
220: #if !defined(PETSC_HAVE_SUPERLU_DIST)
221:       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This test requires SUPERLU_DIST");
222: #else
223:       PCFactorSetMatSolverType(pc,MATSOLVERSUPERLU_DIST);
224: #endif
225:     }
226:     PCFactorSetUpMatSolverType(pc); /* call MatGetFactor() to create F */
227:     PCFactorGetMatrix(pc,&F);
228: #if defined(PETSC_HAVE_SUPERLU)
229:     if (size == 1) {
230:       MatSuperluSetILUDropTol(F,1.e-8);
231:     }
232: #endif
233:   }
234: #endif

236:   /*
237:     Example of how to use external package STRUMPACK
238:     Note: runtime options
239:           '-pc_type lu/ilu \
240:            -pc_factor_mat_solver_type strumpack \
241:            -mat_strumpack_reordering METIS \
242:            -mat_strumpack_colperm 0 \
243:            -mat_strumpack_hss_rel_tol 1.e-3 \
244:            -mat_strumpack_hss_min_sep_size 50 \
245:            -mat_strumpack_max_rank 100 \
246:            -mat_strumpack_leaf_size 4'
247:        are equivalent to these procedural calls

249:     We refer to the STRUMPACK-sparse manual, section 5, for more info on
250:     how to tune the preconditioner.
251:   */
252: #if defined(PETSC_HAVE_STRUMPACK)
253:   flg_ilu       = PETSC_FALSE;
254:   flg_strumpack = PETSC_FALSE;
255:   PetscOptionsGetBool(NULL,NULL,"-use_strumpack_lu",&flg_strumpack,NULL);
256:   PetscOptionsGetBool(NULL,NULL,"-use_strumpack_ilu",&flg_ilu,NULL);
257:   if (flg_strumpack || flg_ilu) {
258:     KSPSetType(ksp,KSPPREONLY);
259:     KSPGetPC(ksp,&pc);
260:     if (flg_strumpack) {
261:       PCSetType(pc,PCLU);
262:     } else if (flg_ilu) {
263:       PCSetType(pc,PCILU);
264:     }
265: #if !defined(PETSC_HAVE_STRUMPACK)
266:     SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This test requires STRUMPACK");
267: #endif
268:     PCFactorSetMatSolverType(pc,MATSOLVERSTRUMPACK);
269:     PCFactorSetUpMatSolverType(pc); /* call MatGetFactor() to create F */
270:     PCFactorGetMatrix(pc,&F);
271: #if defined(PETSC_HAVE_STRUMPACK)
272:     /* Set the fill-reducing reordering.                              */
273:     MatSTRUMPACKSetReordering(F,MAT_STRUMPACK_METIS);
274:     /* Since this is a simple discretization, the diagonal is always  */
275:     /* nonzero, and there is no need for the extra MC64 permutation.  */
276:     MatSTRUMPACKSetColPerm(F,PETSC_FALSE);
277:     /* The compression tolerance used when doing low-rank compression */
278:     /* in the preconditioner. This is problem specific!               */
279:     MatSTRUMPACKSetHSSRelTol(F,1.e-3);
280:     /* Set minimum matrix size for HSS compression to 15 in order to  */
281:     /* demonstrate preconditioner on small problems. For performance  */
282:     /* a value of say 500 is better.                                  */
283:     MatSTRUMPACKSetHSSMinSepSize(F,15);
284:     /* You can further limit the fill in the preconditioner by        */
285:     /* setting a maximum rank                                         */
286:     MatSTRUMPACKSetHSSMaxRank(F,100);
287:     /* Set the size of the diagonal blocks (the leafs) in the HSS     */
288:     /* approximation. The default value should be better for real     */
289:     /* problems. This is mostly for illustration on a small problem.  */
290:     MatSTRUMPACKSetHSSLeafSize(F,4);
291: #endif
292:   }
293: #endif

295:   /*
296:     Example of how to use procedural calls that are equivalent to
297:           '-ksp_type preonly -pc_type lu/ilu -pc_factor_mat_solver_type petsc'
298:   */
299:   flg     = PETSC_FALSE;
300:   flg_ilu = PETSC_FALSE;
301:   flg_ch  = PETSC_FALSE;
302:   PetscOptionsGetBool(NULL,NULL,"-use_petsc_lu",&flg,NULL);
303:   PetscOptionsGetBool(NULL,NULL,"-use_petsc_ilu",&flg_ilu,NULL);
304:   PetscOptionsGetBool(NULL,NULL,"-use_petsc_ch",&flg_ch,NULL);
305:   if (flg || flg_ilu || flg_ch) {
306:     Vec diag;

308:     KSPSetType(ksp,KSPPREONLY);
309:     KSPGetPC(ksp,&pc);
310:     if (flg) {
311:       PCSetType(pc,PCLU);
312:     } else if (flg_ilu) {
313:       PCSetType(pc,PCILU);
314:     } else if (flg_ch) {
315:       PCSetType(pc,PCCHOLESKY);
316:     }
317:     PCFactorSetMatSolverType(pc,MATSOLVERPETSC);
318:     PCFactorSetUpMatSolverType(pc); /* call MatGetFactor() to create F */
319:     PCFactorGetMatrix(pc,&F);

321:     /* Test MatGetDiagonal() */
322:     KSPSetUp(ksp);
323:     VecDuplicate(x,&diag);
324:     MatGetDiagonal(F,diag);
325:     /* VecView(diag,PETSC_VIEWER_STDOUT_WORLD); */
326:     VecDestroy(&diag);
327:   }

329:   KSPSetFromOptions(ksp);

331:   /* Get info from matrix factors */
332:   KSPSetUp(ksp);

334: #if defined(PETSC_HAVE_MUMPS)
335:   if (flg_mumps || flg_mumps_ch) {
336:     PetscInt  icntl,infog34;
337:     PetscReal cntl,rinfo12,rinfo13;
338:     icntl = 3;
339:     MatMumpsGetCntl(F,icntl,&cntl);

341:     /* compute determinant */
342:     if (rank == 0) {
343:       MatMumpsGetInfog(F,34,&infog34);
344:       MatMumpsGetRinfog(F,12,&rinfo12);
345:       MatMumpsGetRinfog(F,13,&rinfo13);
346:       PetscPrintf(PETSC_COMM_SELF,"  Mumps row pivot threshold = %g\n",cntl);
347:       PetscPrintf(PETSC_COMM_SELF,"  Mumps determinant = (%g, %g) * 2^%D \n",(double)rinfo12,(double)rinfo13,infog34);
348:     }
349:   }
350: #endif

352:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
353:                       Solve the linear system
354:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
355:   KSPSolve(ksp,b,x);

357:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
358:                       Check solution and clean up
359:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
360:   VecAXPY(x,-1.0,u);
361:   VecNorm(x,NORM_2,&norm);
362:   KSPGetIterationNumber(ksp,&its);

364:   /*
365:      Print convergence information.  PetscPrintf() produces a single
366:      print statement from all processes that share a communicator.
367:      An alternative is PetscFPrintf(), which prints to a file.
368:   */
369:   if (norm < 1.e-12) {
370:     PetscPrintf(PETSC_COMM_WORLD,"Norm of error < 1.e-12 iterations %D\n",its);
371:   } else {
372:     PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g iterations %D\n",(double)norm,its);
373:  }

375:   /*
376:      Free work space.  All PETSc objects should be destroyed when they
377:      are no longer needed.
378:   */
379:   KSPDestroy(&ksp);
380:   VecDestroy(&u);  VecDestroy(&x);
381:   VecDestroy(&b);  MatDestroy(&A);

383:   /*
384:      Always call PetscFinalize() before exiting a program.  This routine
385:        - finalizes the PETSc libraries as well as MPI
386:        - provides summary and diagnostic information if certain runtime
387:          options are chosen (e.g., -log_view).
388:   */
389:   PetscFinalize();
390:   return ierr;
391: }

393: /*TEST

395:    test:
396:       args: -use_petsc_lu
397:       output_file: output/ex52_2.out

399:    test:
400:       suffix: mumps
401:       nsize: 3
402:       requires: mumps
403:       args: -use_mumps_lu
404:       output_file: output/ex52_1.out

406:    test:
407:       suffix: mumps_2
408:       nsize: 3
409:       requires: mumps
410:       args: -use_mumps_ch
411:       output_file: output/ex52_1.out

413:    test:
414:       suffix: mumps_3
415:       nsize: 3
416:       requires: mumps
417:       args: -use_mumps_ch -mat_type sbaij
418:       output_file: output/ex52_1.out

420:    test:
421:       suffix: mumps_omp_2
422:       nsize: 4
423:       requires: mumps hwloc openmp pthread defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY)
424:       args: -use_mumps_lu -mat_mumps_use_omp_threads 2
425:       output_file: output/ex52_1.out

427:    test:
428:       suffix: mumps_omp_3
429:       nsize: 4
430:       requires: mumps hwloc openmp pthread defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY)
431:       args: -use_mumps_ch -mat_mumps_use_omp_threads 3
432:       # Ignore the warning since we are intentionally testing the imbalanced case
433:       filter: grep -v "Warning: number of OpenMP threads"
434:       output_file: output/ex52_1.out

436:    test:
437:       suffix: mumps_omp_4
438:       nsize: 4
439:       requires: mumps hwloc openmp pthread defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY)
440:       # let petsc guess a proper number for threads
441:       args: -use_mumps_ch -mat_type sbaij -mat_mumps_use_omp_threads
442:       output_file: output/ex52_1.out

444:    test:
445:       suffix: strumpack
446:       requires: strumpack
447:       args: -use_strumpack_lu
448:       output_file: output/ex52_3.out

450:    test:
451:       suffix: strumpack_2
452:       nsize: 2
453:       requires: strumpack
454:       args: -use_strumpack_lu
455:       output_file: output/ex52_3.out

457:    test:
458:       suffix: strumpack_ilu
459:       requires: strumpack
460:       args: -use_strumpack_ilu
461:       output_file: output/ex52_3.out

463:    test:
464:       suffix: strumpack_ilu_2
465:       nsize: 2
466:       requires: strumpack
467:       args: -use_strumpack_ilu
468:       output_file: output/ex52_3.out

470:    test:
471:       suffix: superlu
472:       requires: superlu superlu_dist
473:       args: -use_superlu_lu
474:       output_file: output/ex52_2.out

476:    test:
477:       suffix: superlu_dist
478:       nsize: 2
479:       requires: superlu superlu_dist
480:       args: -use_superlu_lu
481:       output_file: output/ex52_2.out

483:    test:
484:       suffix: superlu_ilu
485:       requires: superlu superlu_dist
486:       args: -use_superlu_ilu
487:       output_file: output/ex52_2.out

489: TEST*/