Actual source code: ex5.c

petsc-3.11.4 2019-09-28
Report Typos and Errors

  2: static char help[] = "Solves two linear systems in parallel with KSP.  The code\n\
  3: illustrates repeated solution of linear systems with the same preconditioner\n\
  4: method but different matrices (having the same nonzero structure).  The code\n\
  5: also uses multiple profiling stages.  Input arguments are\n\
  6:   -m <size> : problem size\n\
  7:   -mat_nonsym : use nonsymmetric matrix (default is symmetric)\n\n";

  9: /*T
 10:    Concepts: KSP^repeatedly solving linear systems;
 11:    Concepts: PetscLog^profiling multiple stages of code;
 12:    Processors: n
 13: T*/



 17: /*
 18:   Include "petscksp.h" so that we can use KSP solvers.  Note that this file
 19:   automatically includes:
 20:      petscsys.h       - base PETSc routines   petscvec.h - vectors
 21:      petscmat.h - matrices
 22:      petscis.h     - index sets            petscksp.h - Krylov subspace methods
 23:      petscviewer.h - viewers               petscpc.h  - preconditioners
 24: */
 25:  #include <petscksp.h>

 27: int main(int argc,char **args)
 28: {
 29:   KSP            ksp;              /* linear solver context */
 30:   Mat            C;                /* matrix */
 31:   Vec            x,u,b;            /* approx solution, RHS, exact solution */
 32:   PetscReal      norm;             /* norm of solution error */
 33:   PetscScalar    v,none = -1.0;
 34:   PetscInt       Ii,J,ldim,low,high,iglobal,Istart,Iend;
 36:   PetscInt       i,j,m = 3,n = 2,its;
 37:   PetscMPIInt    size,rank;
 38:   PetscBool      mat_nonsymmetric = PETSC_FALSE;
 39:   PetscBool      testnewC = PETSC_FALSE,testscaledMat=PETSC_FALSE;
 40: #if defined(PETSC_USE_LOG)
 41:   PetscLogStage stages[2];
 42: #endif

 44:   PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr;
 45:   PetscOptionsGetInt(NULL,NULL,"-m",&m,NULL);
 46:   MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
 47:   MPI_Comm_size(PETSC_COMM_WORLD,&size);
 48:   n    = 2*size;

 50:   /*
 51:      Set flag if we are doing a nonsymmetric problem; the default is symmetric.
 52:   */
 53:   PetscOptionsGetBool(NULL,NULL,"-mat_nonsym",&mat_nonsymmetric,NULL);

 55:   PetscOptionsGetBool(NULL,NULL,"-test_scaledMat",&testscaledMat,NULL);

 57:   /*
 58:      Register two stages for separate profiling of the two linear solves.
 59:      Use the runtime option -log_view for a printout of performance
 60:      statistics at the program's conlusion.
 61:   */
 62:   PetscLogStageRegister("Original Solve",&stages[0]);
 63:   PetscLogStageRegister("Second Solve",&stages[1]);

 65:   /* -------------- Stage 0: Solve Original System ---------------------- */
 66:   /*
 67:      Indicate to PETSc profiling that we're beginning the first stage
 68:   */
 69:   PetscLogStagePush(stages[0]);

 71:   /*
 72:      Create parallel matrix, specifying only its global dimensions.
 73:      When using MatCreate(), the matrix format can be specified at
 74:      runtime. Also, the parallel partitioning of the matrix is
 75:      determined by PETSc at runtime.
 76:   */
 77:   MatCreate(PETSC_COMM_WORLD,&C);
 78:   MatSetSizes(C,PETSC_DECIDE,PETSC_DECIDE,m*n,m*n);
 79:   MatSetFromOptions(C);
 80:   MatSetUp(C);

 82:   /*
 83:      Currently, all PETSc parallel matrix formats are partitioned by
 84:      contiguous chunks of rows across the processors.  Determine which
 85:      rows of the matrix are locally owned.
 86:   */
 87:   MatGetOwnershipRange(C,&Istart,&Iend);

 89:   /*
 90:      Set matrix entries matrix in parallel.
 91:       - Each processor needs to insert only elements that it owns
 92:         locally (but any non-local elements will be sent to the
 93:         appropriate processor during matrix assembly).
 94:       - Always specify global row and columns of matrix entries.
 95:   */
 96:   for (Ii=Istart; Ii<Iend; Ii++) {
 97:     v = -1.0; i = Ii/n; j = Ii - i*n;
 98:     if (i>0)   {J = Ii - n; MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);}
 99:     if (i<m-1) {J = Ii + n; MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);}
100:     if (j>0)   {J = Ii - 1; MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);}
101:     if (j<n-1) {J = Ii + 1; MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);}
102:     v = 4.0; MatSetValues(C,1,&Ii,1,&Ii,&v,ADD_VALUES);
103:   }

105:   /*
106:      Make the matrix nonsymmetric if desired
107:   */
108:   if (mat_nonsymmetric) {
109:     for (Ii=Istart; Ii<Iend; Ii++) {
110:       v = -1.5; i = Ii/n;
111:       if (i>1)   {J = Ii-n-1; MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);}
112:     }
113:   } else {
114:     MatSetOption(C,MAT_SYMMETRIC,PETSC_TRUE);
115:     MatSetOption(C,MAT_SYMMETRY_ETERNAL,PETSC_TRUE);
116:   }

118:   /*
119:      Assemble matrix, using the 2-step process:
120:        MatAssemblyBegin(), MatAssemblyEnd()
121:      Computations can be done while messages are in transition
122:      by placing code between these two statements.
123:   */
124:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
125:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);

127:   /*
128:      Create parallel vectors.
129:       - When using VecSetSizes(), we specify only the vector's global
130:         dimension; the parallel partitioning is determined at runtime.
131:       - Note: We form 1 vector from scratch and then duplicate as needed.
132:   */
133:   VecCreate(PETSC_COMM_WORLD,&u);
134:   VecSetSizes(u,PETSC_DECIDE,m*n);
135:   VecSetFromOptions(u);
136:   VecDuplicate(u,&b);
137:   VecDuplicate(b,&x);

139:   /*
140:      Currently, all parallel PETSc vectors are partitioned by
141:      contiguous chunks across the processors.  Determine which
142:      range of entries are locally owned.
143:   */
144:   VecGetOwnershipRange(x,&low,&high);

146:   /*
147:     Set elements within the exact solution vector in parallel.
148:      - Each processor needs to insert only elements that it owns
149:        locally (but any non-local entries will be sent to the
150:        appropriate processor during vector assembly).
151:      - Always specify global locations of vector entries.
152:   */
153:   VecGetLocalSize(x,&ldim);
154:   for (i=0; i<ldim; i++) {
155:     iglobal = i + low;
156:     v       = (PetscScalar)(i + 100*rank);
157:     VecSetValues(u,1,&iglobal,&v,INSERT_VALUES);
158:   }

160:   /*
161:      Assemble vector, using the 2-step process:
162:        VecAssemblyBegin(), VecAssemblyEnd()
163:      Computations can be done while messages are in transition,
164:      by placing code between these two statements.
165:   */
166:   VecAssemblyBegin(u);
167:   VecAssemblyEnd(u);

169:   /*
170:      Compute right-hand-side vector
171:   */
172:   MatMult(C,u,b);

174:   /*
175:     Create linear solver context
176:   */
177:   KSPCreate(PETSC_COMM_WORLD,&ksp);

179:   /*
180:      Set operators. Here the matrix that defines the linear system
181:      also serves as the preconditioning matrix.
182:   */
183:   KSPSetOperators(ksp,C,C);

185:   /*
186:      Set runtime options (e.g., -ksp_type <type> -pc_type <type>)
187:   */
188:   KSPSetFromOptions(ksp);

190:   /*
191:      Solve linear system.  Here we explicitly call KSPSetUp() for more
192:      detailed performance monitoring of certain preconditioners, such
193:      as ICC and ILU.  This call is optional, as KSPSetUp() will
194:      automatically be called within KSPSolve() if it hasn't been
195:      called already.
196:   */
197:   KSPSetUp(ksp);
198:   KSPSolve(ksp,b,x);

200:   /*
201:      Check the error
202:   */
203:   VecAXPY(x,none,u);
204:   VecNorm(x,NORM_2,&norm);
205:   KSPGetIterationNumber(ksp,&its);
206:   if (!testscaledMat || norm > 1.e-7) {
207:     PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g, Iterations %D\n",(double)norm,its);
208:   }

210:   /* -------------- Stage 1: Solve Second System ---------------------- */
211:   /*
212:      Solve another linear system with the same method.  We reuse the KSP
213:      context, matrix and vector data structures, and hence save the
214:      overhead of creating new ones.

216:      Indicate to PETSc profiling that we're concluding the first
217:      stage with PetscLogStagePop(), and beginning the second stage with
218:      PetscLogStagePush().
219:   */
220:   PetscLogStagePop();
221:   PetscLogStagePush(stages[1]);

223:   /*
224:      Initialize all matrix entries to zero.  MatZeroEntries() retains the
225:      nonzero structure of the matrix for sparse formats.
226:   */
227:   MatZeroEntries(C);

229:   /*
230:      Assemble matrix again.  Note that we retain the same matrix data
231:      structure and the same nonzero pattern; we just change the values
232:      of the matrix entries.
233:   */
234:   for (i=0; i<m; i++) {
235:     for (j=2*rank; j<2*rank+2; j++) {
236:       v = -1.0;  Ii = j + n*i;
237:       if (i>0)   {J = Ii - n; MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);}
238:       if (i<m-1) {J = Ii + n; MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);}
239:       if (j>0)   {J = Ii - 1; MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);}
240:       if (j<n-1) {J = Ii + 1; MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);}
241:       v = 6.0; MatSetValues(C,1,&Ii,1,&Ii,&v,ADD_VALUES);
242:     }
243:   }
244:   if (mat_nonsymmetric) {
245:     for (Ii=Istart; Ii<Iend; Ii++) {
246:       v = -1.5; i = Ii/n;
247:       if (i>1)   {J = Ii-n-1; MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);}
248:     }
249:   }
250:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
251:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);

253:   if (testscaledMat) {
254:     PetscRandom    rctx;

256:     /* Scale a(0,0) and a(M-1,M-1) */
257:     if (!rank) {
258:       v = 6.0*0.00001; Ii = 0; J = 0;
259:       MatSetValues(C,1,&Ii,1,&J,&v,INSERT_VALUES);
260:     } else if (rank == size -1) {
261:       v = 6.0*0.00001; Ii = m*n-1; J = m*n-1;
262:       MatSetValues(C,1,&Ii,1,&J,&v,INSERT_VALUES);
263:     }
264:     MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
265:     MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);

267:     /* Compute a new right-hand-side vector */
268:     VecDestroy(&u);
269:     VecCreate(PETSC_COMM_WORLD,&u);
270:     VecSetSizes(u,PETSC_DECIDE,m*n);
271:     VecSetFromOptions(u);

273: 
274:     PetscRandomCreate(PETSC_COMM_WORLD,&rctx);
275:     PetscRandomSetFromOptions(rctx);
276:     VecSetRandom(u,rctx);
277:     PetscRandomDestroy(&rctx);
278:     VecAssemblyBegin(u);
279:     VecAssemblyEnd(u);
280:   }

282:   PetscOptionsGetBool(NULL,NULL,"-test_newMat",&testnewC,NULL);
283:   if (testnewC) {
284:     /*
285:      User may use a new matrix C with same nonzero pattern, e.g.
286:       ./ex5 -ksp_monitor -mat_type sbaij -pc_type cholesky -pc_factor_mat_solver_type mumps -test_newMat
287:     */
288:     Mat Ctmp;
289:     MatDuplicate(C,MAT_COPY_VALUES,&Ctmp);
290:     MatDestroy(&C);
291:     MatDuplicate(Ctmp,MAT_COPY_VALUES,&C);
292:     MatDestroy(&Ctmp);
293:   }

295:   MatMult(C,u,b);

297:   /*
298:      Set operators. Here the matrix that defines the linear system
299:      also serves as the preconditioning matrix.
300:   */
301:   KSPSetOperators(ksp,C,C);

303:   /*
304:      Solve linear system
305:   */
306:   KSPSetUp(ksp);
307:   KSPSolve(ksp,b,x);

309:   /*
310:      Check the error
311:   */
312:   VecAXPY(x,none,u);
313:   VecNorm(x,NORM_2,&norm);
314:   KSPGetIterationNumber(ksp,&its);
315:   if (!testscaledMat || norm > 1.e-7) {
316:     PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g, Iterations %D\n",(double)norm,its);
317:   }

319:   /*
320:      Free work space.  All PETSc objects should be destroyed when they
321:      are no longer needed.
322:   */
323:   KSPDestroy(&ksp);
324:   VecDestroy(&u);
325:   VecDestroy(&x);
326:   VecDestroy(&b);
327:   MatDestroy(&C);

329:   /*
330:      Indicate to PETSc profiling that we're concluding the second stage
331:   */
332:   PetscLogStagePop();

334:   PetscFinalize();
335:   return ierr;
336: }




341: /*TEST

343:    test:
344:       args: -pc_type jacobi -ksp_monitor_short -ksp_gmres_cgs_refinement_type refine_always

346:    test:
347:       suffix: 2
348:       nsize: 2
349:       args: -pc_type jacobi -ksp_monitor_short -ksp_gmres_cgs_refinement_type refine_always -ksp_rtol .000001

351:    test:
352:       suffix: 5
353:       nsize: 2
354:       args: -ksp_gmres_cgs_refinement_type refine_always -ksp_monitor_lg_residualnorm -ksp_monitor_lg_true_residualnorm

356:    test:
357:       suffix: asm
358:       nsize: 4
359:       args: -pc_type asm

361:    test:
362:       suffix: asm_baij
363:       nsize: 4
364:       args: -pc_type asm -mat_type baij
365:       output_file: output/ex5_asm.out

367:    test:
368:       suffix: redundant_0
369:       args: -m 1000 -pc_type redundant -pc_redundant_number 1 -redundant_ksp_type gmres -redundant_pc_type jacobi

371:    test:
372:       suffix: redundant_1
373:       nsize: 5
374:       args: -pc_type redundant -pc_redundant_number 1 -redundant_ksp_type gmres -redundant_pc_type jacobi

376:    test:
377:       suffix: redundant_2
378:       nsize: 5
379:       args: -pc_type redundant -pc_redundant_number 3 -redundant_ksp_type gmres -redundant_pc_type jacobi

381:    test:
382:       suffix: redundant_3
383:       nsize: 5
384:       args: -pc_type redundant -pc_redundant_number 5 -redundant_ksp_type gmres -redundant_pc_type jacobi

386:    test:
387:       suffix: redundant_4
388:       nsize: 5
389:       args: -pc_type redundant -pc_redundant_number 3 -redundant_ksp_type gmres -redundant_pc_type jacobi -psubcomm_type interlaced

391:    test:
392:       suffix: superlu_dist
393:       nsize: 15
394:       requires: superlu_dist
395:       args: -pc_type lu -pc_factor_mat_solver_type superlu_dist -mat_superlu_dist_equil false -m 5000 -mat_superlu_dist_r 3 -mat_superlu_dist_c 5 -test_scaledMat

397:    test:
398:       suffix: superlu_dist_2
399:       nsize: 15
400:       requires: superlu_dist
401:       args: -pc_type lu -pc_factor_mat_solver_type superlu_dist -mat_superlu_dist_equil false -m 5000 -mat_superlu_dist_r 3 -mat_superlu_dist_c 5 -test_scaledMat -mat_superlu_dist_fact SamePattern_SameRowPerm
402:       output_file: output/ex5_superlu_dist.out

404:    test:
405:       suffix: superlu_dist_3
406:       nsize: 15
407:       requires: superlu_dist
408:       args: -pc_type lu -pc_factor_mat_solver_type superlu_dist -mat_superlu_dist_equil false -m 500 -mat_superlu_dist_r 3 -mat_superlu_dist_c 5 -test_scaledMat -mat_superlu_dist_fact DOFACT
409:       output_file: output/ex5_superlu_dist.out

411:    test:
412:       suffix: superlu_dist_0
413:       nsize: 1
414:       requires: superlu_dist
415:       args: -pc_type lu -pc_factor_mat_solver_type superlu_dist -test_scaledMat
416:       output_file: output/ex5_superlu_dist.out

418: TEST*/