petsc-3.14.6 2021-03-30
  2: static char help[] = "The solution of 2 different linear systems with different linear solvers.\n\
  3: Also, this example illustrates the repeated\n\
  4: solution of linear systems, while reusing matrix, vector, and solver data\n\
  5: structures throughout the process.  Note the various stages of event logging.\n\n";

  7: /*T
  8:    Concepts: KSP^repeatedly solving linear systems;
  9:    Concepts: PetscLog^profiling multiple stages of code;
 10:    Concepts: PetscLog^user-defined event profiling;
 11:    Processors: n
 12: T*/



 16: /*
 17:   Include "petscksp.h" so that we can use KSP solvers.  Note that this file
 18:   automatically includes:
 19:      petscsys.h       - base PETSc routines   petscvec.h - vectors
 20:      petscmat.h - matrices
 21:      petscis.h     - index sets            petscksp.h - Krylov subspace methods
 22:      petscviewer.h - viewers               petscpc.h  - preconditioners
 23: */
 24: #include <petscksp.h>

 26: /*
 27:    Declare user-defined routines
 28: */
 29: extern PetscErrorCode CheckError(Vec,Vec,Vec,PetscInt,PetscReal,PetscLogEvent);
 30: extern PetscErrorCode MyKSPMonitor(KSP,PetscInt,PetscReal,void*);

 32: int main(int argc,char **args)
 33: {
 34:   Vec            x1,b1,x2,b2; /* solution and RHS vectors for systems #1 and #2 */
 35:   Vec            u;              /* exact solution vector */
 36:   Mat            C1,C2;         /* matrices for systems #1 and #2 */
 37:   KSP            ksp1,ksp2;   /* KSP contexts for systems #1 and #2 */
 38:   PetscInt       ntimes = 3;     /* number of times to solve the linear systems */
 39:   PetscLogEvent  CHECK_ERROR;    /* event number for error checking */
 40:   PetscInt       ldim,low,high,iglobal,Istart,Iend,Istart2,Iend2;
 41:   PetscInt       Ii,J,i,j,m = 3,n = 2,its,t;
 43:   PetscBool      flg = PETSC_FALSE, unsym = PETSC_TRUE;
 44:   PetscScalar    v;
 45:   PetscMPIInt    rank,size;
 46: #if defined(PETSC_USE_LOG)
 47:   PetscLogStage stages[3];
 48: #endif

 50:   PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr;
 51:   PetscOptionsGetInt(NULL,NULL,"-m",&m,NULL);
 52:   PetscOptionsGetInt(NULL,NULL,"-t",&ntimes,NULL);
 53:   PetscOptionsGetBool(NULL,NULL,"-unsym",&unsym,NULL);
 54:   MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
 55:   MPI_Comm_size(PETSC_COMM_WORLD,&size);
 56:   n    = 2*size;

 58:   /*
 59:      Register various stages for profiling
 60:   */
 61:   PetscLogStageRegister("Prelim setup",&stages[0]);
 62:   PetscLogStageRegister("Linear System 1",&stages[1]);
 63:   PetscLogStageRegister("Linear System 2",&stages[2]);

 65:   /*
 66:      Register a user-defined event for profiling (error checking).
 67:   */
 68:   CHECK_ERROR = 0;
 69:   PetscLogEventRegister("Check Error",KSP_CLASSID,&CHECK_ERROR);

 71:   /* - - - - - - - - - - - - Stage 0: - - - - - - - - - - - - - -
 72:                         Preliminary Setup
 73:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

 75:   PetscLogStagePush(stages[0]);

 77:   /*
 78:      Create data structures for first linear system.
 79:       - Create parallel matrix, specifying only its global dimensions.
 80:         When using MatCreate(), the matrix format can be specified at
 81:         runtime. Also, the parallel partitioning of the matrix is
 82:         determined by PETSc at runtime.
 83:       - Create parallel vectors.
 84:         - When using VecSetSizes(), we specify only the vector's global
 85:           dimension; the parallel partitioning is determined at runtime.
 86:         - Note: We form 1 vector from scratch and then duplicate as needed.
 87:   */
 88:   MatCreate(PETSC_COMM_WORLD,&C1);
 89:   MatSetSizes(C1,PETSC_DECIDE,PETSC_DECIDE,m*n,m*n);
 90:   MatSetFromOptions(C1);
 91:   MatSetUp(C1);
 92:   MatGetOwnershipRange(C1,&Istart,&Iend);
 93:   VecCreate(PETSC_COMM_WORLD,&u);
 94:   VecSetSizes(u,PETSC_DECIDE,m*n);
 95:   VecSetFromOptions(u);
 96:   VecDuplicate(u,&b1);
 97:   VecDuplicate(u,&x1);

 99:   /*
100:      Create first linear solver context.
101:      Set runtime options (e.g., -pc_type <type>).
102:      Note that the first linear system uses the default option
103:      names, while the second linear system uses a different
104:      options prefix.
105:   */
106:   KSPCreate(PETSC_COMM_WORLD,&ksp1);
107:   KSPSetFromOptions(ksp1);

109:   /*
110:      Set user-defined monitoring routine for first linear system.
111:   */
112:   PetscOptionsGetBool(NULL,NULL,"-my_ksp_monitor",&flg,NULL);
113:   if (flg) {KSPMonitorSet(ksp1,MyKSPMonitor,NULL,0);}

115:   /*
116:      Create data structures for second linear system.
117:   */
118:   MatCreate(PETSC_COMM_WORLD,&C2);
119:   MatSetSizes(C2,PETSC_DECIDE,PETSC_DECIDE,m*n,m*n);
120:   MatSetFromOptions(C2);
121:   MatSetUp(C2);
122:   MatGetOwnershipRange(C2,&Istart2,&Iend2);
123:   VecDuplicate(u,&b2);
124:   VecDuplicate(u,&x2);

126:   /*
127:      Create second linear solver context
128:   */
129:   KSPCreate(PETSC_COMM_WORLD,&ksp2);

131:   /*
132:      Set different options prefix for second linear system.
133:      Set runtime options (e.g., -s2_pc_type <type>)
134:   */
135:   KSPAppendOptionsPrefix(ksp2,"s2_");
136:   KSPSetFromOptions(ksp2);

138:   /*
139:      Assemble exact solution vector in parallel.  Note that each
140:      processor needs to set only its local part of the vector.
141:   */
142:   VecGetLocalSize(u,&ldim);
143:   VecGetOwnershipRange(u,&low,&high);
144:   for (i=0; i<ldim; i++) {
145:     iglobal = i + low;
146:     v       = (PetscScalar)(i + 100*rank);
147:     VecSetValues(u,1,&iglobal,&v,ADD_VALUES);
148:   }
149:   VecAssemblyBegin(u);
150:   VecAssemblyEnd(u);

152:   /*
153:      Log the number of flops for computing vector entries
154:   */
155:   PetscLogFlops(2.0*ldim);

157:   /*
158:      End curent profiling stage
159:   */
160:   PetscLogStagePop();

162:   /* --------------------------------------------------------------
163:                         Linear solver loop:
164:       Solve 2 different linear systems several times in succession
165:      -------------------------------------------------------------- */

167:   for (t=0; t<ntimes; t++) {

169:     /* - - - - - - - - - - - - Stage 1: - - - - - - - - - - - - - -
170:                  Assemble and solve first linear system
171:        - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

173:     /*
174:        Begin profiling stage #1
175:     */
176:     PetscLogStagePush(stages[1]);

178:     /*
179:        Initialize all matrix entries to zero.  MatZeroEntries() retains
180:        the nonzero structure of the matrix for sparse formats.
181:     */
182:     if (t > 0) {MatZeroEntries(C1);}

184:     /*
185:        Set matrix entries in parallel.  Also, log the number of flops
186:        for computing matrix entries.
187:         - Each processor needs to insert only elements that it owns
188:           locally (but any non-local elements will be sent to the
189:           appropriate processor during matrix assembly).
190:         - Always specify global row and columns of matrix entries.
191:     */
192:     for (Ii=Istart; Ii<Iend; Ii++) {
193:       v = -1.0; i = Ii/n; j = Ii - i*n;
194:       if (i>0)   {J = Ii - n; MatSetValues(C1,1,&Ii,1,&J,&v,ADD_VALUES);}
195:       if (i<m-1) {J = Ii + n; MatSetValues(C1,1,&Ii,1,&J,&v,ADD_VALUES);}
196:       if (j>0)   {J = Ii - 1; MatSetValues(C1,1,&Ii,1,&J,&v,ADD_VALUES);}
197:       if (j<n-1) {J = Ii + 1; MatSetValues(C1,1,&Ii,1,&J,&v,ADD_VALUES);}
198:       v = 4.0; MatSetValues(C1,1,&Ii,1,&Ii,&v,ADD_VALUES);
199:     }
200:     if (unsym) {
201:       for (Ii=Istart; Ii<Iend; Ii++) { /* Make matrix nonsymmetric */
202:         v = -1.0*(t+0.5); i = Ii/n;
203:         if (i>0)   {J = Ii - n; MatSetValues(C1,1,&Ii,1,&J,&v,ADD_VALUES);}
204:       }
205:       PetscLogFlops(2.0*(Iend-Istart));
206:     }

208:     /*
209:        Assemble matrix, using the 2-step process:
210:          MatAssemblyBegin(), MatAssemblyEnd()
211:        Computations can be done while messages are in transition
212:        by placing code between these two statements.
213:     */
214:     MatAssemblyBegin(C1,MAT_FINAL_ASSEMBLY);
215:     MatAssemblyEnd(C1,MAT_FINAL_ASSEMBLY);

217:     /*
218:        Indicate same nonzero structure of successive linear system matrices
219:     */
220:     MatSetOption(C1,MAT_NEW_NONZERO_LOCATIONS,PETSC_TRUE);

222:     /*
223:        Compute right-hand-side vector
224:     */
225:     MatMult(C1,u,b1);

227:     /*
228:        Set operators. Here the matrix that defines the linear system
229:        also serves as the preconditioning matrix.
230:     */
231:     KSPSetOperators(ksp1,C1,C1);

233:     /*
234:        Use the previous solution of linear system #1 as the initial
235:        guess for the next solve of linear system #1.  The user MUST
236:        call KSPSetInitialGuessNonzero() in indicate use of an initial
237:        guess vector; otherwise, an initial guess of zero is used.
238:     */
239:     if (t>0) {
240:       KSPSetInitialGuessNonzero(ksp1,PETSC_TRUE);
241:     }

243:     /*
244:        Solve the first linear system.  Here we explicitly call
245:        KSPSetUp() for more detailed performance monitoring of
246:        certain preconditioners, such as ICC and ILU.  This call
247:        is optional, ase KSPSetUp() will automatically be called
248:        within KSPSolve() if it hasn't been called already.
249:     */
250:     KSPSetUp(ksp1);
251:     KSPSolve(ksp1,b1,x1);
252:     KSPGetIterationNumber(ksp1,&its);

254:     /*
255:        Check error of solution to first linear system
256:     */
257:     CheckError(u,x1,b1,its,1.e-4,CHECK_ERROR);

259:     /* - - - - - - - - - - - - Stage 2: - - - - - - - - - - - - - -
260:                  Assemble and solve second linear system
261:        - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

263:     /*
264:        Conclude profiling stage #1; begin profiling stage #2
265:     */
266:     PetscLogStagePop();
267:     PetscLogStagePush(stages[2]);

269:     /*
270:        Initialize all matrix entries to zero
271:     */
272:     if (t > 0) {MatZeroEntries(C2);}

274:     /*
275:        Assemble matrix in parallel. Also, log the number of flops
276:        for computing matrix entries.
277:         - To illustrate the features of parallel matrix assembly, we
278:           intentionally set the values differently from the way in
279:           which the matrix is distributed across the processors.  Each
280:           entry that is not owned locally will be sent to the appropriate
281:           processor during MatAssemblyBegin() and MatAssemblyEnd().
282:         - For best efficiency the user should strive to set as many
283:           entries locally as possible.
284:      */
285:     for (i=0; i<m; i++) {
286:       for (j=2*rank; j<2*rank+2; j++) {
287:         v = -1.0;  Ii = j + n*i;
288:         if (i>0)   {J = Ii - n; MatSetValues(C2,1,&Ii,1,&J,&v,ADD_VALUES);}
289:         if (i<m-1) {J = Ii + n; MatSetValues(C2,1,&Ii,1,&J,&v,ADD_VALUES);}
290:         if (j>0)   {J = Ii - 1; MatSetValues(C2,1,&Ii,1,&J,&v,ADD_VALUES);}
291:         if (j<n-1) {J = Ii + 1; MatSetValues(C2,1,&Ii,1,&J,&v,ADD_VALUES);}
292:         v = 6.0 + t*0.5; MatSetValues(C2,1,&Ii,1,&Ii,&v,ADD_VALUES);
293:       }
294:     }
295:     if (unsym) {
296:       for (Ii=Istart2; Ii<Iend2; Ii++) { /* Make matrix nonsymmetric */
297:         v = -1.0*(t+0.5); i = Ii/n;
298:         if (i>0)   {J = Ii - n; MatSetValues(C2,1,&Ii,1,&J,&v,ADD_VALUES);}
299:       }
300:     }
301:     MatAssemblyBegin(C2,MAT_FINAL_ASSEMBLY);
302:     MatAssemblyEnd(C2,MAT_FINAL_ASSEMBLY);
303:     PetscLogFlops(2.0*(Iend-Istart));

305:     /*
306:        Indicate same nonzero structure of successive linear system matrices
307:     */
308:     MatSetOption(C2,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);

310:     /*
311:        Compute right-hand-side vector
312:     */
313:     MatMult(C2,u,b2);

315:     /*
316:        Set operators. Here the matrix that defines the linear system
317:        also serves as the preconditioning matrix.  Indicate same nonzero
318:        structure of successive preconditioner matrices by setting flag
319:        SAME_NONZERO_PATTERN.
320:     */
321:     KSPSetOperators(ksp2,C2,C2);

323:     /*
324:        Solve the second linear system
325:     */
326:     KSPSetUp(ksp2);
327:     KSPSolve(ksp2,b2,x2);
328:     KSPGetIterationNumber(ksp2,&its);

330:     /*
331:        Check error of solution to second linear system
332:     */
333:     CheckError(u,x2,b2,its,1.e-4,CHECK_ERROR);

335:     /*
336:        Conclude profiling stage #2
337:     */
338:     PetscLogStagePop();
339:   }
340:   /* --------------------------------------------------------------
341:                        End of linear solver loop
342:      -------------------------------------------------------------- */

344:   /*
345:      Free work space.  All PETSc objects should be destroyed when they
346:      are no longer needed.
347:   */
348:   KSPDestroy(&ksp1); KSPDestroy(&ksp2);
349:   VecDestroy(&x1);   VecDestroy(&x2);
350:   VecDestroy(&b1);   VecDestroy(&b2);
351:   MatDestroy(&C1);   MatDestroy(&C2);
352:   VecDestroy(&u);

354:   PetscFinalize();
355:   return ierr;
356: }
357: /* ------------------------------------------------------------- */
358: /*
359:     CheckError - Checks the error of the solution.

361:     Input Parameters:
362:     u - exact solution
363:     x - approximate solution
364:     b - work vector
365:     its - number of iterations for convergence
366:     tol - tolerance
367:     CHECK_ERROR - the event number for error checking
368:                   (for use with profiling)

370:     Notes:
371:     In order to profile this section of code separately from the
372:     rest of the program, we register it as an "event" with
373:     PetscLogEventRegister() in the main program.  Then, we indicate
374:     the start and end of this event by respectively calling
375:         PetscLogEventBegin(CHECK_ERROR,u,x,b,0);
376:         PetscLogEventEnd(CHECK_ERROR,u,x,b,0);
377:     Here, we specify the objects most closely associated with
378:     the event (the vectors u,x,b).  Such information is optional;
379:     we could instead just use 0 instead for all objects.
380: */
381: PetscErrorCode CheckError(Vec u,Vec x,Vec b,PetscInt its,PetscReal tol,PetscLogEvent CHECK_ERROR)
382: {
383:   PetscScalar    none = -1.0;
384:   PetscReal      norm;

387:   PetscLogEventBegin(CHECK_ERROR,u,x,b,0);

389:   /*
390:      Compute error of the solution, using b as a work vector.
391:   */
392:   VecCopy(x,b);
393:   VecAXPY(b,none,u);
394:   VecNorm(b,NORM_2,&norm);
395:   if (norm > tol) {
396:     PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g, Iterations %D\n",(double)norm,its);
397:   }
398:   PetscLogEventEnd(CHECK_ERROR,u,x,b,0);
399:   return 0;
400: }
401: /* ------------------------------------------------------------- */
402: /*
403:    MyKSPMonitor - This is a user-defined routine for monitoring
404:    the KSP iterative solvers.

406:    Input Parameters:
407:      ksp   - iterative context
408:      n     - iteration number
409:      rnorm - 2-norm (preconditioned) residual value (may be estimated)
410:      dummy - optional user-defined monitor context (unused here)
411: */
412: PetscErrorCode MyKSPMonitor(KSP ksp,PetscInt n,PetscReal rnorm,void *dummy)
413: {
414:   Vec            x;

417:   /*
418:      Build the solution vector
419:   */
420:   KSPBuildSolution(ksp,NULL,&x);

422:   /*
423:      Write the solution vector and residual norm to stdout.
424:       - PetscPrintf() handles output for multiprocessor jobs
425:         by printing from only one processor in the communicator.
426:       - The parallel viewer PETSC_VIEWER_STDOUT_WORLD handles
427:         data from multiple processors so that the output
428:         is not jumbled.
429:   */
430:   PetscPrintf(PETSC_COMM_WORLD,"iteration %D solution vector:\n",n);
431:   VecView(x,PETSC_VIEWER_STDOUT_WORLD);
432:   PetscPrintf(PETSC_COMM_WORLD,"iteration %D KSP Residual norm %14.12e \n",n,rnorm);
433:   return 0;
434: }



438: /*TEST

440:    test:
441:       args: -t 2 -pc_type jacobi -ksp_monitor_short -ksp_type gmres -ksp_gmres_cgs_refinement_type refine_always -s2_ksp_type bcgs -s2_pc_type jacobi -s2_ksp_monitor_short

443:    test:
444:       requires: hpddm
445:       suffix: hpddm
446:       args: -t 2 -pc_type jacobi -ksp_monitor_short -ksp_type {{gmres hpddm}} -s2_ksp_type {{gmres hpddm}} -s2_pc_type jacobi -s2_ksp_monitor_short

448:    test:
449:       requires: hpddm
450:       suffix: hpddm_2
451:       args: -t 2 -pc_type jacobi -ksp_monitor_short -ksp_type gmres -s2_ksp_type hpddm -s2_ksp_hpddm_type gcrodr -s2_ksp_hpddm_recycle 10 -s2_pc_type jacobi -s2_ksp_monitor_short

453:    testset:
454:       requires: hpddm
455:       output_file: output/ex9_hpddm_cg.out
456:       args: -unsym 0 -t 2 -pc_type jacobi -ksp_monitor_short -s2_pc_type jacobi -s2_ksp_monitor_short -ksp_rtol 1.e-2 -s2_ksp_rtol 1.e-2
457:       test:
458:          suffix: hpddm_cg_p_p
459:          args: -ksp_type cg -s2_ksp_type cg
460:       test:
461:          suffix: hpddm_cg_p_h
462:          args: -ksp_type cg -s2_ksp_type hpddm -s2_ksp_hpddm_type {{cg bcg bfbcg}shared output}
463:       test:
464:          suffix: hpddm_cg_h_h
465:          args: -ksp_type hpddm -ksp_hpddm_type cg -s2_ksp_type hpddm -s2_ksp_hpddm_type {{cg bcg bfbcg}shared output}

467: TEST*/