Actual source code: ex15.c

  1: static const char help[] = "p-Bratu nonlinear PDE in 2d.\n\
  2: We solve the  p-Laplacian (nonlinear diffusion) combined with\n\
  3: the Bratu (solid fuel ignition) nonlinearity in a 2D rectangular\n\
  4: domain, using distributed arrays (DAs) to partition the parallel grid.\n\
  5: The command line options include:\n\
  6:   -p <2>: `p' in p-Laplacian term\n\
  7:   -epsilon <1e-05>: Strain-regularization in p-Laplacian\n\
  8:   -lambda <6>: Bratu parameter\n\
  9:   -blocks <bx,by>: number of coefficient interfaces in x and y direction\n\
 10:   -kappa <1e-3>: diffusivity in odd regions\n\
 11: \n";

 13: /*F
 14:     The $p$-Bratu problem is a combination of the $p$-Laplacian (nonlinear diffusion) and the Brutu solid fuel ignition problem.
 15:     This problem is modeled by the partial differential equation

 17: \begin{equation*}
 18:         -\nabla\cdot (\eta \nabla u) - \lambda \exp(u) = 0
 19: \end{equation*}

 21:     on $\Omega = (-1,1)^2$ with closure

 23: \begin{align*}
 24:         \eta(\gamma) &= (\epsilon^2 + \gamma)^{(p-2)/2} & \gamma &= \frac 1 2 |\nabla u|^2
 25: \end{align*}

 27:     and boundary conditions $u = 0$ for $(x,y) \in \partial \Omega$

 29:     A 9-point finite difference stencil is used to discretize
 30:     the boundary value problem to obtain a nonlinear system of equations.
 31:     This would be a 5-point stencil if not for the $p$-Laplacian's nonlinearity.
 32: F*/

 34: /*
 35:       mpiexec -n 2 ./ex15 -snes_monitor -ksp_monitor log_summary
 36: */

 38: /*
 39:    Include "petscdmda.h" so that we can use distributed arrays (DMDAs).
 40:    Include "petscsnes.h" so that we can use SNES solvers.  Note that this
 41:    file automatically includes:
 42:      petsc.h       - base PETSc routines   petscvec.h - vectors
 43:      petscsys.h    - system routines       petscmat.h - matrices
 44:      petscis.h     - index sets            petscksp.h - Krylov subspace methods
 45:      petscviewer.h - viewers               petscpc.h  - preconditioners
 46:      petscksp.h   - linear solvers
 47: */

 49: #include <petscdm.h>
 50: #include <petscdmda.h>
 51: #include <petscsnes.h>

 53: typedef enum {
 54:   JAC_BRATU,
 55:   JAC_PICARD,
 56:   JAC_STAR,
 57:   JAC_NEWTON
 58: } JacType;
 59: static const char *const JacTypes[] = {"BRATU", "PICARD", "STAR", "NEWTON", "JacType", "JAC_", 0};

 61: /*
 62:    User-defined application context - contains data needed by the
 63:    application-provided call-back routines, FormJacobianLocal() and
 64:    FormFunctionLocal().
 65: */
 66: typedef struct {
 67:   PetscReal lambda;  /* Bratu parameter */
 68:   PetscReal p;       /* Exponent in p-Laplacian */
 69:   PetscReal epsilon; /* Regularization */
 70:   PetscReal source;  /* Source term */
 71:   JacType   jtype;   /* What type of Jacobian to assemble */
 72:   PetscBool picard;
 73:   PetscInt  blocks[2];
 74:   PetscReal kappa;
 75:   PetscInt  initial; /* initial conditions type */
 76: } AppCtx;

 78: /*
 79:    User-defined routines
 80: */
 81: static PetscErrorCode FormRHS(AppCtx *, DM, Vec);
 82: static PetscErrorCode FormInitialGuess(AppCtx *, DM, Vec);
 83: static PetscErrorCode FormFunctionLocal(DMDALocalInfo *, PetscScalar **, PetscScalar **, AppCtx *);
 84: static PetscErrorCode FormFunctionPicardLocal(DMDALocalInfo *, PetscScalar **, PetscScalar **, AppCtx *);
 85: static PetscErrorCode FormJacobianLocal(DMDALocalInfo *, PetscScalar **, Mat, Mat, AppCtx *);
 86: static PetscErrorCode NonlinearGS(SNES, Vec, Vec, void *);

 88: typedef struct _n_PreCheck *PreCheck;
 89: struct _n_PreCheck {
 90:   MPI_Comm    comm;
 91:   PetscReal   angle;
 92:   Vec         Ylast;
 93:   PetscViewer monitor;
 94: };
 95: PetscErrorCode PreCheckCreate(MPI_Comm, PreCheck *);
 96: PetscErrorCode PreCheckDestroy(PreCheck *);
 97: PetscErrorCode PreCheckFunction(SNESLineSearch, Vec, Vec, PetscBool *, void *);
 98: PetscErrorCode PreCheckSetFromOptions(PreCheck);

100: int main(int argc, char **argv)
101: {
102:   SNES                snes;       /* nonlinear solver */
103:   Vec                 x, r, b;    /* solution, residual, rhs vectors */
104:   AppCtx              user;       /* user-defined work context */
105:   PetscInt            its;        /* iterations for convergence */
106:   SNESConvergedReason reason;     /* Check convergence */
107:   PetscBool           alloc_star; /* Only allocate for the STAR stencil  */
108:   PetscBool           write_output;
109:   char                filename[PETSC_MAX_PATH_LEN] = "ex15.vts";
110:   PetscReal           bratu_lambda_max = 6.81, bratu_lambda_min = 0.;
111:   DM                  da;              /* distributed array data structure */
112:   PreCheck            precheck = NULL; /* precheck context for version in this file */
113:   PetscInt            use_precheck;    /* 0=none, 1=version in this file, 2=SNES-provided version */
114:   PetscReal           precheck_angle;  /* When manually setting the SNES-provided precheck function */
115:   SNESLineSearch      linesearch;

117:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
118:      Initialize program
119:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

122:   PetscInitialize(&argc, &argv, (char *)0, help);

124:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
125:      Initialize problem parameters
126:   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
127:   user.lambda    = 0.0;
128:   user.p         = 2.0;
129:   user.epsilon   = 1e-5;
130:   user.source    = 0.1;
131:   user.jtype     = JAC_NEWTON;
132:   user.initial   = -1;
133:   user.blocks[0] = 1;
134:   user.blocks[1] = 1;
135:   user.kappa     = 1e-3;
136:   alloc_star     = PETSC_FALSE;
137:   use_precheck   = 0;
138:   precheck_angle = 10.;
139:   user.picard    = PETSC_FALSE;
140:   PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "p-Bratu options", __FILE__);
141:   {
142:     PetscInt two = 2;
143:     PetscOptionsReal("-lambda", "Bratu parameter", "", user.lambda, &user.lambda, NULL);
144:     PetscOptionsReal("-p", "Exponent `p' in p-Laplacian", "", user.p, &user.p, NULL);
145:     PetscOptionsReal("-epsilon", "Strain-regularization in p-Laplacian", "", user.epsilon, &user.epsilon, NULL);
146:     PetscOptionsReal("-source", "Constant source term", "", user.source, &user.source, NULL);
147:     PetscOptionsEnum("-jtype", "Jacobian approximation to assemble", "", JacTypes, (PetscEnum)user.jtype, (PetscEnum *)&user.jtype, NULL);
148:     PetscOptionsName("-picard", "Solve with defect-correction Picard iteration", "", &user.picard);
149:     if (user.picard) {
150:       user.jtype = JAC_PICARD;
152:       /* the Picard linearization only requires a 5 point stencil, while the Newton linearization requires a 9 point stencil */
153:       /* hence allocating the 5 point stencil gives the same convergence as the 9 point stencil since the extra stencil points are not used */
154:       PetscOptionsBool("-alloc_star", "Allocate for STAR stencil (5-point)", "", alloc_star, &alloc_star, NULL);
155:     }
156:     PetscOptionsInt("-precheck", "Use a pre-check correction intended for use with Picard iteration 1=this version, 2=library", "", use_precheck, &use_precheck, NULL);
157:     PetscOptionsInt("-initial", "Initial conditions type (-1: default, 0: zero-valued, 1: peaked guess)", "", user.initial, &user.initial, NULL);
158:     if (use_precheck == 2) { /* Using library version, get the angle */
159:       PetscOptionsReal("-precheck_angle", "Angle in degrees between successive search directions necessary to activate step correction", "", precheck_angle, &precheck_angle, NULL);
160:     }
161:     PetscOptionsIntArray("-blocks", "number of coefficient interfaces in x and y direction", "", user.blocks, &two, NULL);
162:     PetscOptionsReal("-kappa", "diffusivity in odd regions", "", user.kappa, &user.kappa, NULL);
163:     PetscOptionsString("-o", "Output solution in vts format", "", filename, filename, sizeof(filename), &write_output);
164:   }
165:   PetscOptionsEnd();
166:   if (user.lambda > bratu_lambda_max || user.lambda < bratu_lambda_min) PetscPrintf(PETSC_COMM_WORLD, "WARNING: lambda %g out of range for p=2\n", (double)user.lambda);

168:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
169:      Create nonlinear solver context
170:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
171:   SNESCreate(PETSC_COMM_WORLD, &snes);

173:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
174:      Create distributed array (DMDA) to manage parallel grid and vectors
175:   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
176:   DMDACreate2d(PETSC_COMM_WORLD, DM_BOUNDARY_NONE, DM_BOUNDARY_NONE, alloc_star ? DMDA_STENCIL_STAR : DMDA_STENCIL_BOX, 4, 4, PETSC_DECIDE, PETSC_DECIDE, 1, 1, NULL, NULL, &da);
177:   DMSetFromOptions(da);
178:   DMSetUp(da);

180:   /*  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
181:      Extract global vectors from DM; then duplicate for remaining
182:      vectors that are the same types
183:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
184:   DMCreateGlobalVector(da, &x);
185:   VecDuplicate(x, &r);
186:   VecDuplicate(x, &b);

188:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
189:      User can override with:
190:      -snes_mf : matrix-free Newton-Krylov method with no preconditioning
191:                 (unless user explicitly sets preconditioner)
192:      -snes_mf_operator : form preconditioning matrix as set by the user,
193:                          but use matrix-free approx for Jacobian-vector
194:                          products within Newton-Krylov method

196:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

198:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
199:      Set local function evaluation routine
200:   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
201:   DMSetApplicationContext(da, &user);
202:   SNESSetDM(snes, da);
203:   if (user.picard) {
204:     /*
205:         This is not really right requiring the user to call SNESSetFunction/Jacobian but the DMDASNESSetPicardLocal() cannot access
206:         the SNES to set it
207:     */
208:     DMDASNESSetPicardLocal(da, INSERT_VALUES, (PetscErrorCode(*)(DMDALocalInfo *, void *, void *, void *))FormFunctionPicardLocal, (PetscErrorCode(*)(DMDALocalInfo *, void *, Mat, Mat, void *))FormJacobianLocal, &user);
209:     SNESSetFunction(snes, NULL, SNESPicardComputeFunction, &user);
210:     SNESSetJacobian(snes, NULL, NULL, SNESPicardComputeJacobian, &user);
211:   } else {
212:     DMDASNESSetFunctionLocal(da, INSERT_VALUES, (PetscErrorCode(*)(DMDALocalInfo *, void *, void *, void *))FormFunctionLocal, &user);
213:     DMDASNESSetJacobianLocal(da, (PetscErrorCode(*)(DMDALocalInfo *, void *, Mat, Mat, void *))FormJacobianLocal, &user);
214:   }

216:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
217:      Customize nonlinear solver; set runtime options
218:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
219:   SNESSetFromOptions(snes);
220:   SNESSetNGS(snes, NonlinearGS, &user);
221:   SNESGetLineSearch(snes, &linesearch);
222:   /* Set up the precheck context if requested */
223:   if (use_precheck == 1) { /* Use the precheck routines in this file */
224:     PreCheckCreate(PETSC_COMM_WORLD, &precheck);
225:     PreCheckSetFromOptions(precheck);
226:     SNESLineSearchSetPreCheck(linesearch, PreCheckFunction, precheck);
227:   } else if (use_precheck == 2) { /* Use the version provided by the library */
228:     SNESLineSearchSetPreCheck(linesearch, SNESLineSearchPreCheckPicard, &precheck_angle);
229:   }

231:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
232:      Evaluate initial guess
233:      Note: The user should initialize the vector, x, with the initial guess
234:      for the nonlinear solver prior to calling SNESSolve().  In particular,
235:      to employ an initial guess of zero, the user should explicitly set
236:      this vector to zero by calling VecSet().
237:   */

239:   FormInitialGuess(&user, da, x);
240:   FormRHS(&user, da, b);

242:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
243:      Solve nonlinear system
244:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
245:   SNESSolve(snes, b, x);
246:   SNESGetIterationNumber(snes, &its);
247:   SNESGetConvergedReason(snes, &reason);

249:   PetscPrintf(PETSC_COMM_WORLD, "%s Number of nonlinear iterations = %" PetscInt_FMT "\n", SNESConvergedReasons[reason], its);

251:   if (write_output) {
252:     PetscViewer viewer;
253:     PetscViewerVTKOpen(PETSC_COMM_WORLD, filename, FILE_MODE_WRITE, &viewer);
254:     VecView(x, viewer);
255:     PetscViewerDestroy(&viewer);
256:   }

258:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
259:      Free work space.  All PETSc objects should be destroyed when they
260:      are no longer needed.
261:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

263:   VecDestroy(&x);
264:   VecDestroy(&r);
265:   VecDestroy(&b);
266:   SNESDestroy(&snes);
267:   DMDestroy(&da);
268:   PreCheckDestroy(&precheck);
269:   PetscFinalize();
270:   return 0;
271: }

273: /* ------------------------------------------------------------------- */
274: /*
275:    FormInitialGuess - Forms initial approximation.

277:    Input Parameters:
278:    user - user-defined application context
279:    X - vector

281:    Output Parameter:
282:    X - vector
283:  */
284: static PetscErrorCode FormInitialGuess(AppCtx *user, DM da, Vec X)
285: {
286:   PetscInt      i, j, Mx, My, xs, ys, xm, ym;
287:   PetscReal     temp1, temp, hx, hy;
288:   PetscScalar **x;

291:   DMDAGetInfo(da, PETSC_IGNORE, &Mx, &My, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE);

293:   hx    = 1.0 / (PetscReal)(Mx - 1);
294:   hy    = 1.0 / (PetscReal)(My - 1);
295:   temp1 = user->lambda / (user->lambda + 1.);

297:   /*
298:      Get a pointer to vector data.
299:        - For default PETSc vectors, VecGetArray() returns a pointer to
300:          the data array.  Otherwise, the routine is implementation dependent.
301:        - You MUST call VecRestoreArray() when you no longer need access to
302:          the array.
303:   */
304:   DMDAVecGetArray(da, X, &x);

306:   /*
307:      Get local grid boundaries (for 2-dimensional DA):
308:        xs, ys   - starting grid indices (no ghost points)
309:        xm, ym   - widths of local grid (no ghost points)

311:   */
312:   DMDAGetCorners(da, &xs, &ys, NULL, &xm, &ym, NULL);

314:   /*
315:      Compute initial guess over the locally owned part of the grid
316:   */
317:   for (j = ys; j < ys + ym; j++) {
318:     temp = (PetscReal)(PetscMin(j, My - j - 1)) * hy;
319:     for (i = xs; i < xs + xm; i++) {
320:       if (i == 0 || j == 0 || i == Mx - 1 || j == My - 1) {
321:         /* boundary conditions are all zero Dirichlet */
322:         x[j][i] = 0.0;
323:       } else {
324:         if (user->initial == -1) {
325:           if (user->lambda != 0) {
326:             x[j][i] = temp1 * PetscSqrtReal(PetscMin((PetscReal)(PetscMin(i, Mx - i - 1)) * hx, temp));
327:           } else {
328:             /* The solution above is an exact solution for lambda=0, this avoids "accidentally" starting
329:              * with an exact solution. */
330:             const PetscReal xx = 2 * (PetscReal)i / (Mx - 1) - 1, yy = 2 * (PetscReal)j / (My - 1) - 1;
331:             x[j][i] = (1 - xx * xx) * (1 - yy * yy) * xx * yy;
332:           }
333:         } else if (user->initial == 0) {
334:           x[j][i] = 0.;
335:         } else if (user->initial == 1) {
336:           const PetscReal xx = 2 * (PetscReal)i / (Mx - 1) - 1, yy = 2 * (PetscReal)j / (My - 1) - 1;
337:           x[j][i] = (1 - xx * xx) * (1 - yy * yy) * xx * yy;
338:         } else {
339:           if (user->lambda != 0) {
340:             x[j][i] = temp1 * PetscSqrtReal(PetscMin((PetscReal)(PetscMin(i, Mx - i - 1)) * hx, temp));
341:           } else {
342:             x[j][i] = 0.5 * PetscSqrtReal(PetscMin((PetscReal)(PetscMin(i, Mx - i - 1)) * hx, temp));
343:           }
344:         }
345:       }
346:     }
347:   }
348:   /*
349:      Restore vector
350:   */
351:   DMDAVecRestoreArray(da, X, &x);
352:   return 0;
353: }

355: /*
356:    FormRHS - Forms constant RHS for the problem.

358:    Input Parameters:
359:    user - user-defined application context
360:    B - RHS vector

362:    Output Parameter:
363:    B - vector
364:  */
365: static PetscErrorCode FormRHS(AppCtx *user, DM da, Vec B)
366: {
367:   PetscInt      i, j, Mx, My, xs, ys, xm, ym;
368:   PetscReal     hx, hy;
369:   PetscScalar **b;

372:   DMDAGetInfo(da, PETSC_IGNORE, &Mx, &My, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE);

374:   hx = 1.0 / (PetscReal)(Mx - 1);
375:   hy = 1.0 / (PetscReal)(My - 1);
376:   DMDAVecGetArray(da, B, &b);
377:   DMDAGetCorners(da, &xs, &ys, NULL, &xm, &ym, NULL);
378:   for (j = ys; j < ys + ym; j++) {
379:     for (i = xs; i < xs + xm; i++) {
380:       if (i == 0 || j == 0 || i == Mx - 1 || j == My - 1) {
381:         b[j][i] = 0.0;
382:       } else {
383:         b[j][i] = hx * hy * user->source;
384:       }
385:     }
386:   }
387:   DMDAVecRestoreArray(da, B, &b);
388:   return 0;
389: }

391: static inline PetscReal kappa(const AppCtx *ctx, PetscReal x, PetscReal y)
392: {
393:   return (((PetscInt)(x * ctx->blocks[0])) + ((PetscInt)(y * ctx->blocks[1]))) % 2 ? ctx->kappa : 1.0;
394: }
395: /* p-Laplacian diffusivity */
396: static inline PetscScalar eta(const AppCtx *ctx, PetscReal x, PetscReal y, PetscScalar ux, PetscScalar uy)
397: {
398:   return kappa(ctx, x, y) * PetscPowScalar(PetscSqr(ctx->epsilon) + 0.5 * (ux * ux + uy * uy), 0.5 * (ctx->p - 2.));
399: }
400: static inline PetscScalar deta(const AppCtx *ctx, PetscReal x, PetscReal y, PetscScalar ux, PetscScalar uy)
401: {
402:   return (ctx->p == 2) ? 0 : kappa(ctx, x, y) * PetscPowScalar(PetscSqr(ctx->epsilon) + 0.5 * (ux * ux + uy * uy), 0.5 * (ctx->p - 4)) * 0.5 * (ctx->p - 2.);
403: }

405: /* ------------------------------------------------------------------- */
406: /*
407:    FormFunctionLocal - Evaluates nonlinear function, F(x).
408:  */
409: static PetscErrorCode FormFunctionLocal(DMDALocalInfo *info, PetscScalar **x, PetscScalar **f, AppCtx *user)
410: {
411:   PetscReal   hx, hy, dhx, dhy, sc;
412:   PetscInt    i, j;
413:   PetscScalar eu;

416:   hx  = 1.0 / (PetscReal)(info->mx - 1);
417:   hy  = 1.0 / (PetscReal)(info->my - 1);
418:   sc  = hx * hy * user->lambda;
419:   dhx = 1 / hx;
420:   dhy = 1 / hy;
421:   /*
422:      Compute function over the locally owned part of the grid
423:   */
424:   for (j = info->ys; j < info->ys + info->ym; j++) {
425:     for (i = info->xs; i < info->xs + info->xm; i++) {
426:       PetscReal xx = i * hx, yy = j * hy;
427:       if (i == 0 || j == 0 || i == info->mx - 1 || j == info->my - 1) {
428:         f[j][i] = x[j][i];
429:       } else {
430:         const PetscScalar u = x[j][i], ux_E = dhx * (x[j][i + 1] - x[j][i]), uy_E = 0.25 * dhy * (x[j + 1][i] + x[j + 1][i + 1] - x[j - 1][i] - x[j - 1][i + 1]), ux_W = dhx * (x[j][i] - x[j][i - 1]), uy_W = 0.25 * dhy * (x[j + 1][i - 1] + x[j + 1][i] - x[j - 1][i - 1] - x[j - 1][i]), ux_N = 0.25 * dhx * (x[j][i + 1] + x[j + 1][i + 1] - x[j][i - 1] - x[j + 1][i - 1]), uy_N = dhy * (x[j + 1][i] - x[j][i]), ux_S = 0.25 * dhx * (x[j - 1][i + 1] + x[j][i + 1] - x[j - 1][i - 1] - x[j][i - 1]), uy_S = dhy * (x[j][i] - x[j - 1][i]), e_E = eta(user, xx, yy, ux_E, uy_E), e_W = eta(user, xx, yy, ux_W, uy_W), e_N = eta(user, xx, yy, ux_N, uy_N), e_S = eta(user, xx, yy, ux_S, uy_S), uxx = -hy * (e_E * ux_E - e_W * ux_W), uyy = -hx * (e_N * uy_N - e_S * uy_S);
431:         if (sc) eu = PetscExpScalar(u);
432:         else eu = 0.;
433:         /* For p=2, these terms decay to:
434:          uxx = (2.0*u - x[j][i-1] - x[j][i+1])*hydhx
435:          uyy = (2.0*u - x[j-1][i] - x[j+1][i])*hxdhy
436:         */
437:         f[j][i] = uxx + uyy - sc * eu;
438:       }
439:     }
440:   }
441:   PetscLogFlops(info->xm * info->ym * (72.0));
442:   return 0;
443: }

445: /*
446:     This is the opposite sign of the part of FormFunctionLocal that excludes the A(x) x part of the operation,
447:     that is FormFunction applies A(x) x - b(x) while this applies b(x) because for Picard we think of it as solving A(x) x = b(x)

449: */
450: static PetscErrorCode FormFunctionPicardLocal(DMDALocalInfo *info, PetscScalar **x, PetscScalar **f, AppCtx *user)
451: {
452:   PetscReal hx, hy, sc;
453:   PetscInt  i, j;

456:   hx = 1.0 / (PetscReal)(info->mx - 1);
457:   hy = 1.0 / (PetscReal)(info->my - 1);
458:   sc = hx * hy * user->lambda;
459:   /*
460:      Compute function over the locally owned part of the grid
461:   */
462:   for (j = info->ys; j < info->ys + info->ym; j++) {
463:     for (i = info->xs; i < info->xs + info->xm; i++) {
464:       if (!(i == 0 || j == 0 || i == info->mx - 1 || j == info->my - 1)) {
465:         const PetscScalar u = x[j][i];
466:         f[j][i]             = sc * PetscExpScalar(u);
467:       } else {
468:         f[j][i] = 0.0; /* this is zero because the A(x) x term forces the x to be zero on the boundary */
469:       }
470:     }
471:   }
472:   PetscLogFlops(info->xm * info->ym * 2.0);
473:   return 0;
474: }

476: /*
477:    FormJacobianLocal - Evaluates Jacobian matrix.
478: */
479: static PetscErrorCode FormJacobianLocal(DMDALocalInfo *info, PetscScalar **x, Mat J, Mat B, AppCtx *user)
480: {
481:   PetscInt    i, j;
482:   MatStencil  col[9], row;
483:   PetscScalar v[9];
484:   PetscReal   hx, hy, hxdhy, hydhx, dhx, dhy, sc;

487:   MatZeroEntries(B);
488:   hx    = 1.0 / (PetscReal)(info->mx - 1);
489:   hy    = 1.0 / (PetscReal)(info->my - 1);
490:   sc    = hx * hy * user->lambda;
491:   dhx   = 1 / hx;
492:   dhy   = 1 / hy;
493:   hxdhy = hx / hy;
494:   hydhx = hy / hx;

496:   /*
497:      Compute entries for the locally owned part of the Jacobian.
498:       - PETSc parallel matrix formats are partitioned by
499:         contiguous chunks of rows across the processors.
500:       - Each processor needs to insert only elements that it owns
501:         locally (but any non-local elements will be sent to the
502:         appropriate processor during matrix assembly).
503:       - Here, we set all entries for a particular row at once.
504:   */
505:   for (j = info->ys; j < info->ys + info->ym; j++) {
506:     for (i = info->xs; i < info->xs + info->xm; i++) {
507:       PetscReal xx = i * hx, yy = j * hy;
508:       row.j = j;
509:       row.i = i;
510:       /* boundary points */
511:       if (i == 0 || j == 0 || i == info->mx - 1 || j == info->my - 1) {
512:         v[0] = 1.0;
513:         MatSetValuesStencil(B, 1, &row, 1, &row, v, INSERT_VALUES);
514:       } else {
515:         /* interior grid points */
516:         const PetscScalar ux_E = dhx * (x[j][i + 1] - x[j][i]), uy_E = 0.25 * dhy * (x[j + 1][i] + x[j + 1][i + 1] - x[j - 1][i] - x[j - 1][i + 1]), ux_W = dhx * (x[j][i] - x[j][i - 1]), uy_W = 0.25 * dhy * (x[j + 1][i - 1] + x[j + 1][i] - x[j - 1][i - 1] - x[j - 1][i]), ux_N = 0.25 * dhx * (x[j][i + 1] + x[j + 1][i + 1] - x[j][i - 1] - x[j + 1][i - 1]), uy_N = dhy * (x[j + 1][i] - x[j][i]), ux_S = 0.25 * dhx * (x[j - 1][i + 1] + x[j][i + 1] - x[j - 1][i - 1] - x[j][i - 1]), uy_S = dhy * (x[j][i] - x[j - 1][i]), u = x[j][i], e_E = eta(user, xx, yy, ux_E, uy_E), e_W = eta(user, xx, yy, ux_W, uy_W), e_N = eta(user, xx, yy, ux_N, uy_N), e_S = eta(user, xx, yy, ux_S, uy_S), de_E = deta(user, xx, yy, ux_E, uy_E), de_W = deta(user, xx, yy, ux_W, uy_W), de_N = deta(user, xx, yy, ux_N, uy_N), de_S = deta(user, xx, yy, ux_S, uy_S), skew_E = de_E * ux_E * uy_E, skew_W = de_W * ux_W * uy_W, skew_N = de_N * ux_N * uy_N, skew_S = de_S * ux_S * uy_S, cross_EW = 0.25 * (skew_E - skew_W), cross_NS = 0.25 * (skew_N - skew_S), newt_E = e_E + de_E * PetscSqr(ux_E), newt_W = e_W + de_W * PetscSqr(ux_W), newt_N = e_N + de_N * PetscSqr(uy_N), newt_S = e_S + de_S * PetscSqr(uy_S);
517:         /* interior grid points */
518:         switch (user->jtype) {
519:         case JAC_BRATU:
520:           /* Jacobian from p=2 */
521:           v[0]     = -hxdhy;
522:           col[0].j = j - 1;
523:           col[0].i = i;
524:           v[1]     = -hydhx;
525:           col[1].j = j;
526:           col[1].i = i - 1;
527:           v[2]     = 2.0 * (hydhx + hxdhy) - sc * PetscExpScalar(u);
528:           col[2].j = row.j;
529:           col[2].i = row.i;
530:           v[3]     = -hydhx;
531:           col[3].j = j;
532:           col[3].i = i + 1;
533:           v[4]     = -hxdhy;
534:           col[4].j = j + 1;
535:           col[4].i = i;
536:           MatSetValuesStencil(B, 1, &row, 5, col, v, INSERT_VALUES);
537:           break;
538:         case JAC_PICARD:
539:           /* Jacobian arising from Picard linearization */
540:           v[0]     = -hxdhy * e_S;
541:           col[0].j = j - 1;
542:           col[0].i = i;
543:           v[1]     = -hydhx * e_W;
544:           col[1].j = j;
545:           col[1].i = i - 1;
546:           v[2]     = (e_W + e_E) * hydhx + (e_S + e_N) * hxdhy;
547:           col[2].j = row.j;
548:           col[2].i = row.i;
549:           v[3]     = -hydhx * e_E;
550:           col[3].j = j;
551:           col[3].i = i + 1;
552:           v[4]     = -hxdhy * e_N;
553:           col[4].j = j + 1;
554:           col[4].i = i;
555:           MatSetValuesStencil(B, 1, &row, 5, col, v, INSERT_VALUES);
556:           break;
557:         case JAC_STAR:
558:           /* Full Jacobian, but only a star stencil */
559:           col[0].j = j - 1;
560:           col[0].i = i;
561:           col[1].j = j;
562:           col[1].i = i - 1;
563:           col[2].j = j;
564:           col[2].i = i;
565:           col[3].j = j;
566:           col[3].i = i + 1;
567:           col[4].j = j + 1;
568:           col[4].i = i;
569:           v[0]     = -hxdhy * newt_S + cross_EW;
570:           v[1]     = -hydhx * newt_W + cross_NS;
571:           v[2]     = hxdhy * (newt_N + newt_S) + hydhx * (newt_E + newt_W) - sc * PetscExpScalar(u);
572:           v[3]     = -hydhx * newt_E - cross_NS;
573:           v[4]     = -hxdhy * newt_N - cross_EW;
574:           MatSetValuesStencil(B, 1, &row, 5, col, v, INSERT_VALUES);
575:           break;
576:         case JAC_NEWTON:
577:           /* The Jacobian is

579:            -div [ eta (grad u) + deta (grad u0 . grad u) grad u0 ] - (eE u0) u

581:           */
582:           col[0].j = j - 1;
583:           col[0].i = i - 1;
584:           col[1].j = j - 1;
585:           col[1].i = i;
586:           col[2].j = j - 1;
587:           col[2].i = i + 1;
588:           col[3].j = j;
589:           col[3].i = i - 1;
590:           col[4].j = j;
591:           col[4].i = i;
592:           col[5].j = j;
593:           col[5].i = i + 1;
594:           col[6].j = j + 1;
595:           col[6].i = i - 1;
596:           col[7].j = j + 1;
597:           col[7].i = i;
598:           col[8].j = j + 1;
599:           col[8].i = i + 1;
600:           v[0]     = -0.25 * (skew_S + skew_W);
601:           v[1]     = -hxdhy * newt_S + cross_EW;
602:           v[2]     = 0.25 * (skew_S + skew_E);
603:           v[3]     = -hydhx * newt_W + cross_NS;
604:           v[4]     = hxdhy * (newt_N + newt_S) + hydhx * (newt_E + newt_W) - sc * PetscExpScalar(u);
605:           v[5]     = -hydhx * newt_E - cross_NS;
606:           v[6]     = 0.25 * (skew_N + skew_W);
607:           v[7]     = -hxdhy * newt_N - cross_EW;
608:           v[8]     = -0.25 * (skew_N + skew_E);
609:           MatSetValuesStencil(B, 1, &row, 9, col, v, INSERT_VALUES);
610:           break;
611:         default:
612:           SETERRQ(PetscObjectComm((PetscObject)info->da), PETSC_ERR_SUP, "Jacobian type %d not implemented", user->jtype);
613:         }
614:       }
615:     }
616:   }

618:   /*
619:      Assemble matrix, using the 2-step process:
620:        MatAssemblyBegin(), MatAssemblyEnd().
621:   */
622:   MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY);
623:   MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY);

625:   if (J != B) {
626:     MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY);
627:     MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY);
628:   }

630:   /*
631:      Tell the matrix we will never add a new nonzero location to the
632:      matrix. If we do, it will generate an error.
633:   */
634:   if (user->jtype == JAC_NEWTON) PetscLogFlops(info->xm * info->ym * (131.0));
635:   return 0;
636: }

638: /***********************************************************
639:  * PreCheck implementation
640:  ***********************************************************/
641: PetscErrorCode PreCheckSetFromOptions(PreCheck precheck)
642: {
643:   PetscBool flg;

646:   PetscOptionsBegin(precheck->comm, NULL, "PreCheck Options", "none");
647:   PetscOptionsReal("-precheck_angle", "Angle in degrees between successive search directions necessary to activate step correction", "", precheck->angle, &precheck->angle, NULL);
648:   flg = PETSC_FALSE;
649:   PetscOptionsBool("-precheck_monitor", "Monitor choices made by precheck routine", "", flg, &flg, NULL);
650:   if (flg) PetscViewerASCIIOpen(precheck->comm, "stdout", &precheck->monitor);
651:   PetscOptionsEnd();
652:   return 0;
653: }

655: /*
656:   Compare the direction of the current and previous step, modify the current step accordingly
657: */
658: PetscErrorCode PreCheckFunction(SNESLineSearch linesearch, Vec X, Vec Y, PetscBool *changed, void *ctx)
659: {
660:   PreCheck    precheck;
661:   Vec         Ylast;
662:   PetscScalar dot;
663:   PetscInt    iter;
664:   PetscReal   ynorm, ylastnorm, theta, angle_radians;
665:   SNES        snes;

668:   SNESLineSearchGetSNES(linesearch, &snes);
669:   precheck = (PreCheck)ctx;
670:   if (!precheck->Ylast) VecDuplicate(Y, &precheck->Ylast);
671:   Ylast = precheck->Ylast;
672:   SNESGetIterationNumber(snes, &iter);
673:   if (iter < 1) {
674:     VecCopy(Y, Ylast);
675:     *changed = PETSC_FALSE;
676:     return 0;
677:   }

679:   VecDot(Y, Ylast, &dot);
680:   VecNorm(Y, NORM_2, &ynorm);
681:   VecNorm(Ylast, NORM_2, &ylastnorm);
682:   /* Compute the angle between the vectors Y and Ylast, clip to keep inside the domain of acos() */
683:   theta         = PetscAcosReal((PetscReal)PetscClipInterval(PetscAbsScalar(dot) / (ynorm * ylastnorm), -1.0, 1.0));
684:   angle_radians = precheck->angle * PETSC_PI / 180.;
685:   if (PetscAbsReal(theta) < angle_radians || PetscAbsReal(theta - PETSC_PI) < angle_radians) {
686:     /* Modify the step Y */
687:     PetscReal alpha, ydiffnorm;
688:     VecAXPY(Ylast, -1.0, Y);
689:     VecNorm(Ylast, NORM_2, &ydiffnorm);
690:     alpha = ylastnorm / ydiffnorm;
691:     VecCopy(Y, Ylast);
692:     VecScale(Y, alpha);
693:     if (precheck->monitor) PetscViewerASCIIPrintf(precheck->monitor, "Angle %g degrees less than threshold %g, corrected step by alpha=%g\n", (double)(theta * 180. / PETSC_PI), (double)precheck->angle, (double)alpha);
694:   } else {
695:     VecCopy(Y, Ylast);
696:     *changed = PETSC_FALSE;
697:     if (precheck->monitor) PetscViewerASCIIPrintf(precheck->monitor, "Angle %g degrees exceeds threshold %g, no correction applied\n", (double)(theta * 180. / PETSC_PI), (double)precheck->angle);
698:   }
699:   return 0;
700: }

702: PetscErrorCode PreCheckDestroy(PreCheck *precheck)
703: {
705:   if (!*precheck) return 0;
706:   VecDestroy(&(*precheck)->Ylast);
707:   PetscViewerDestroy(&(*precheck)->monitor);
708:   PetscFree(*precheck);
709:   return 0;
710: }

712: PetscErrorCode PreCheckCreate(MPI_Comm comm, PreCheck *precheck)
713: {
715:   PetscNew(precheck);

717:   (*precheck)->comm  = comm;
718:   (*precheck)->angle = 10.; /* only active if angle is less than 10 degrees */
719:   return 0;
720: }

722: /*
723:       Applies some sweeps on nonlinear Gauss-Seidel on each process

725:  */
726: PetscErrorCode NonlinearGS(SNES snes, Vec X, Vec B, void *ctx)
727: {
728:   PetscInt      i, j, k, xs, ys, xm, ym, its, tot_its, sweeps, l, m;
729:   PetscReal     hx, hy, hxdhy, hydhx, dhx, dhy, sc;
730:   PetscScalar **x, **b, bij, F, F0 = 0, J, y, u, eu;
731:   PetscReal     atol, rtol, stol;
732:   DM            da;
733:   AppCtx       *user = (AppCtx *)ctx;
734:   Vec           localX, localB;
735:   DMDALocalInfo info;

738:   SNESGetDM(snes, &da);
739:   DMDAGetLocalInfo(da, &info);

741:   hx    = 1.0 / (PetscReal)(info.mx - 1);
742:   hy    = 1.0 / (PetscReal)(info.my - 1);
743:   sc    = hx * hy * user->lambda;
744:   dhx   = 1 / hx;
745:   dhy   = 1 / hy;
746:   hxdhy = hx / hy;
747:   hydhx = hy / hx;

749:   tot_its = 0;
750:   SNESNGSGetSweeps(snes, &sweeps);
751:   SNESNGSGetTolerances(snes, &atol, &rtol, &stol, &its);
752:   DMGetLocalVector(da, &localX);
753:   if (B) DMGetLocalVector(da, &localB);
754:   if (B) {
755:     DMGlobalToLocalBegin(da, B, INSERT_VALUES, localB);
756:     DMGlobalToLocalEnd(da, B, INSERT_VALUES, localB);
757:   }
758:   if (B) DMDAVecGetArrayRead(da, localB, &b);
759:   DMGlobalToLocalBegin(da, X, INSERT_VALUES, localX);
760:   DMGlobalToLocalEnd(da, X, INSERT_VALUES, localX);
761:   DMDAVecGetArray(da, localX, &x);
762:   for (l = 0; l < sweeps; l++) {
763:     /*
764:      Get local grid boundaries (for 2-dimensional DMDA):
765:      xs, ys   - starting grid indices (no ghost points)
766:      xm, ym   - widths of local grid (no ghost points)
767:      */
768:     DMDAGetCorners(da, &xs, &ys, NULL, &xm, &ym, NULL);
769:     for (m = 0; m < 2; m++) {
770:       for (j = ys; j < ys + ym; j++) {
771:         for (i = xs + (m + j) % 2; i < xs + xm; i += 2) {
772:           PetscReal xx = i * hx, yy = j * hy;
773:           if (B) bij = b[j][i];
774:           else bij = 0.;

776:           if (i == 0 || j == 0 || i == info.mx - 1 || j == info.my - 1) {
777:             /* boundary conditions are all zero Dirichlet */
778:             x[j][i] = 0.0 + bij;
779:           } else {
780:             const PetscScalar u_E = x[j][i + 1], u_W = x[j][i - 1], u_N = x[j + 1][i], u_S = x[j - 1][i];
781:             const PetscScalar uy_E = 0.25 * dhy * (x[j + 1][i] + x[j + 1][i + 1] - x[j - 1][i] - x[j - 1][i + 1]), uy_W = 0.25 * dhy * (x[j + 1][i - 1] + x[j + 1][i] - x[j - 1][i - 1] - x[j - 1][i]), ux_N = 0.25 * dhx * (x[j][i + 1] + x[j + 1][i + 1] - x[j][i - 1] - x[j + 1][i - 1]), ux_S = 0.25 * dhx * (x[j - 1][i + 1] + x[j][i + 1] - x[j - 1][i - 1] - x[j][i - 1]);
782:             u = x[j][i];
783:             for (k = 0; k < its; k++) {
784:               const PetscScalar ux_E = dhx * (u_E - u), ux_W = dhx * (u - u_W), uy_N = dhy * (u_N - u), uy_S = dhy * (u - u_S), e_E = eta(user, xx, yy, ux_E, uy_E), e_W = eta(user, xx, yy, ux_W, uy_W), e_N = eta(user, xx, yy, ux_N, uy_N), e_S = eta(user, xx, yy, ux_S, uy_S), de_E = deta(user, xx, yy, ux_E, uy_E), de_W = deta(user, xx, yy, ux_W, uy_W), de_N = deta(user, xx, yy, ux_N, uy_N), de_S = deta(user, xx, yy, ux_S, uy_S), newt_E = e_E + de_E * PetscSqr(ux_E), newt_W = e_W + de_W * PetscSqr(ux_W), newt_N = e_N + de_N * PetscSqr(uy_N), newt_S = e_S + de_S * PetscSqr(uy_S), uxx = -hy * (e_E * ux_E - e_W * ux_W), uyy = -hx * (e_N * uy_N - e_S * uy_S);

786:               if (sc) eu = PetscExpScalar(u);
787:               else eu = 0;

789:               F = uxx + uyy - sc * eu - bij;
790:               if (k == 0) F0 = F;
791:               J = hxdhy * (newt_N + newt_S) + hydhx * (newt_E + newt_W) - sc * eu;
792:               y = F / J;
793:               u -= y;
794:               tot_its++;
795:               if (atol > PetscAbsReal(PetscRealPart(F)) || rtol * PetscAbsReal(PetscRealPart(F0)) > PetscAbsReal(PetscRealPart(F)) || stol * PetscAbsReal(PetscRealPart(u)) > PetscAbsReal(PetscRealPart(y))) break;
796:             }
797:             x[j][i] = u;
798:           }
799:         }
800:       }
801:     }
802:     /*
803: x     Restore vector
804:      */
805:   }
806:   DMDAVecRestoreArray(da, localX, &x);
807:   DMLocalToGlobalBegin(da, localX, INSERT_VALUES, X);
808:   DMLocalToGlobalEnd(da, localX, INSERT_VALUES, X);
809:   PetscLogFlops(tot_its * (118.0));
810:   DMRestoreLocalVector(da, &localX);
811:   if (B) {
812:     DMDAVecRestoreArrayRead(da, localB, &b);
813:     DMRestoreLocalVector(da, &localB);
814:   }
815:   return 0;
816: }

818: /*TEST

820:    test:
821:       nsize: 2
822:       args: -snes_monitor_short -da_grid_x 20 -da_grid_y 20 -p 1.3 -lambda 1 -jtype NEWTON
823:       requires: !single

825:    test:
826:       suffix: 2
827:       nsize: 2
828:       args: -snes_monitor_short -da_grid_x 20 -da_grid_y 20 -p 1.3 -lambda 1 -jtype PICARD -precheck 1
829:       requires: !single

831:    test:
832:       suffix: 3
833:       nsize: 2
834:       args: -snes_monitor_short -da_grid_x 20 -da_grid_y 20 -p 1.3 -lambda 1 -jtype PICARD -picard -precheck 1 -p 3
835:       requires: !single

837:    test:
838:       suffix: 3_star
839:       nsize: 2
840:       args: -snes_monitor_short -da_grid_x 20 -da_grid_y 20 -p 1.3 -lambda 1 -jtype PICARD -picard -precheck 1 -p 3 -alloc_star
841:       output_file: output/ex15_3.out
842:       requires: !single

844:    test:
845:       suffix: 4
846:       args: -snes_monitor_short -snes_type newtonls -npc_snes_type ngs -snes_npc_side left -da_grid_x 20 -da_grid_y 20 -p 1.3 -lambda 1 -ksp_monitor_short -pc_type none
847:       requires: !single

849:    test:
850:       suffix: lag_jac
851:       nsize: 4
852:       args: -snes_monitor_short -da_grid_x 20 -da_grid_y 20 -p 6.0 -lambda 0 -jtype NEWTON -snes_type ngmres -npc_snes_type newtonls -npc_snes_lag_jacobian 5 -npc_pc_type asm -npc_ksp_converged_reason -npc_snes_lag_jacobian_persists
853:       requires: !single

855:    test:
856:       suffix: lag_pc
857:       nsize: 4
858:       args: -snes_monitor_short -da_grid_x 20 -da_grid_y 20 -p 6.0 -lambda 0 -jtype NEWTON -snes_type ngmres -npc_snes_type newtonls -npc_snes_lag_preconditioner 5 -npc_pc_type asm -npc_ksp_converged_reason -npc_snes_lag_preconditioner_persists
859:       requires: !single

861:    test:
862:       suffix: nleqerr
863:       args: -snes_monitor_short -snes_type newtonls -da_grid_x 20 -da_grid_y 20 -p 1.3 -lambda 1 -snes_linesearch_monitor -pc_type lu -snes_linesearch_type nleqerr
864:       requires: !single

866:    test:
867:       suffix: mf
868:       args: -snes_monitor_short -pc_type lu -da_refine 4  -p 3 -ksp_rtol 1.e-12  -snes_mf_operator
869:       requires: !single

871:    test:
872:       suffix: mf_picard
873:       args: -snes_monitor_short -pc_type lu -da_refine 4  -p 3 -ksp_rtol 1.e-12  -snes_mf_operator -picard
874:       requires: !single
875:       output_file: output/ex15_mf.out

877:    test:
878:       suffix: fd_picard
879:       args: -snes_monitor_short -pc_type lu -da_refine 2  -p 3 -ksp_rtol 1.e-12  -snes_fd -picard
880:       requires: !single

882:    test:
883:       suffix: fd_color_picard
884:       args: -snes_monitor_short -pc_type lu -da_refine 4  -p 3 -ksp_rtol 1.e-12  -snes_fd_color -picard
885:       requires: !single
886:       output_file: output/ex15_mf.out

888: TEST*/