Actual source code: ex3k.kokkos.cxx

  1: static char help[] = "Newton methods to solve u'' + u^{2} = f in parallel. Uses Kokkos\n\\n";

  3: #include <petscdmda_kokkos.hpp>
  4: #include <petscsnes.h>

  6: /*
  7:    User-defined application context
  8: */
  9: typedef struct {
 10:   DM        da; /* distributed array */
 11:   Vec       F;  /* right-hand-side of PDE */
 12:   PetscReal h;  /* mesh spacing */
 13: } ApplicationCtx;

 15: /* ------------------------------------------------------------------- */
 16: /*
 17:    FormInitialGuess - Computes initial guess.

 19:    Input/Output Parameter:
 20: .  x - the solution vector
 21: */
 22: PetscErrorCode FormInitialGuess(Vec x)
 23: {
 24:   PetscScalar pfive = .50;

 27:   VecSet(x, pfive);
 28:   return 0;
 29: }

 31: /* ------------------------------------------------------------------- */
 32: /*
 33:    CpuFunction - Evaluates nonlinear function, F(x) on CPU

 35:    Input Parameters:
 36: .  snes - the SNES context
 37: .  x - input vector
 38: .  ctx - optional user-defined context, as set by SNESSetFunction()

 40:    Output Parameter:
 41: .  r - function vector

 43:    Note:
 44:    The user-defined context can contain any application-specific
 45:    data needed for the function evaluation.
 46: */
 47: PetscErrorCode CpuFunction(SNES snes, Vec x, Vec r, void *ctx)
 48: {
 49:   ApplicationCtx *user = (ApplicationCtx *)ctx;
 50:   DM              da   = user->da;
 51:   PetscScalar    *X, *R, *F, d;
 52:   PetscInt        i, M, xs, xm;
 53:   Vec             xl;

 56:   DMGetLocalVector(da, &xl);
 57:   DMGlobalToLocal(da, x, INSERT_VALUES, xl);
 58:   DMDAVecGetArray(da, xl, &X);
 59:   DMDAVecGetArray(da, r, &R);
 60:   DMDAVecGetArray(da, user->F, &F);

 62:   DMDAGetCorners(da, &xs, NULL, NULL, &xm, NULL, NULL);
 63:   DMDAGetInfo(da, NULL, &M, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);

 65:   if (xs == 0) { /* left boundary */
 66:     R[0] = X[0];
 67:     xs++;
 68:     xm--;
 69:   }
 70:   if (xs + xm == M) { /* right boundary */
 71:     R[xs + xm - 1] = X[xs + xm - 1] - 1.0;
 72:     xm--;
 73:   }
 74:   d = 1.0 / (user->h * user->h);
 75:   for (i = xs; i < xs + xm; i++) R[i] = d * (X[i - 1] - 2.0 * X[i] + X[i + 1]) + X[i] * X[i] - F[i];

 77:   DMDAVecRestoreArray(da, xl, &X);
 78:   DMDAVecRestoreArray(da, r, &R);
 79:   DMDAVecRestoreArray(da, user->F, &F);
 80:   DMRestoreLocalVector(da, &xl);
 81:   return 0;
 82: }

 84: using DefaultExecutionSpace            = Kokkos::DefaultExecutionSpace;
 85: using DefaultMemorySpace               = Kokkos::DefaultExecutionSpace::memory_space;
 86: using PetscScalarKokkosOffsetView      = Kokkos::Experimental::OffsetView<PetscScalar *, DefaultMemorySpace>;
 87: using ConstPetscScalarKokkosOffsetView = Kokkos::Experimental::OffsetView<const PetscScalar *, DefaultMemorySpace>;

 89: PetscErrorCode KokkosFunction(SNES snes, Vec x, Vec r, void *ctx)
 90: {
 91:   ApplicationCtx                  *user = (ApplicationCtx *)ctx;
 92:   DM                               da   = user->da;
 93:   PetscScalar                      d;
 94:   PetscInt                         M;
 95:   Vec                              xl;
 96:   PetscScalarKokkosOffsetView      R;
 97:   ConstPetscScalarKokkosOffsetView X, F;

100:   DMGetLocalVector(da, &xl);
101:   DMGlobalToLocal(da, x, INSERT_VALUES, xl);
102:   d = 1.0 / (user->h * user->h);
103:   DMDAGetInfo(da, NULL, &M, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
104:   DMDAVecGetKokkosOffsetView(da, xl, &X);      /* read only */
105:   DMDAVecGetKokkosOffsetViewWrite(da, r, &R);  /* write only */
106:   DMDAVecGetKokkosOffsetView(da, user->F, &F); /* read only */
107:   Kokkos::parallel_for(
108:     Kokkos::RangePolicy<>(R.begin(0), R.end(0)), KOKKOS_LAMBDA(int i) {
109:       if (i == 0) R(0) = X(0);                                                 /* left boundary */
110:       else if (i == M - 1) R(i) = X(i) - 1.0;                                  /* right boundary */
111:       else R(i) = d * (X(i - 1) - 2.0 * X(i) + X(i + 1)) + X(i) * X(i) - F(i); /* interior */
112:     });
113:   DMDAVecRestoreKokkosOffsetView(da, xl, &X);
114:   DMDAVecRestoreKokkosOffsetViewWrite(da, r, &R);
115:   DMDAVecRestoreKokkosOffsetView(da, user->F, &F);
116:   DMRestoreLocalVector(da, &xl);
117:   return 0;
118: }

120: PetscErrorCode StubFunction(SNES snes, Vec x, Vec r, void *ctx)
121: {
122:   ApplicationCtx *user = (ApplicationCtx *)ctx;
123:   DM              da   = user->da;
124:   Vec             rk;
125:   PetscReal       norm = 0;

128:   DMGetGlobalVector(da, &rk);
129:   CpuFunction(snes, x, r, ctx);
130:   KokkosFunction(snes, x, rk, ctx);
131:   VecAXPY(rk, -1.0, r);
132:   VecNorm(rk, NORM_2, &norm);
133:   DMRestoreGlobalVector(da, &rk);
135:   return 0;
136: }
137: /* ------------------------------------------------------------------- */
138: /*
139:    FormJacobian - Evaluates Jacobian matrix.

141:    Input Parameters:
142: .  snes - the SNES context
143: .  x - input vector
144: .  dummy - optional user-defined context (not used here)

146:    Output Parameters:
147: .  jac - Jacobian matrix
148: .  B - optionally different preconditioning matrix
149: .  flag - flag indicating matrix structure
150: */
151: PetscErrorCode FormJacobian(SNES snes, Vec x, Mat jac, Mat B, void *ctx)
152: {
153:   ApplicationCtx *user = (ApplicationCtx *)ctx;
154:   PetscScalar    *xx, d, A[3];
155:   PetscInt        i, j[3], M, xs, xm;
156:   DM              da = user->da;

159:   /*
160:      Get pointer to vector data
161:   */
162:   DMDAVecGetArrayRead(da, x, &xx);
163:   DMDAGetCorners(da, &xs, NULL, NULL, &xm, NULL, NULL);

165:   /*
166:     Get range of locally owned matrix
167:   */
168:   DMDAGetInfo(da, NULL, &M, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);

170:   /*
171:      Determine starting and ending local indices for interior grid points.
172:      Set Jacobian entries for boundary points.
173:   */

175:   if (xs == 0) { /* left boundary */
176:     i    = 0;
177:     A[0] = 1.0;

179:     MatSetValues(jac, 1, &i, 1, &i, A, INSERT_VALUES);
180:     xs++;
181:     xm--;
182:   }
183:   if (xs + xm == M) { /* right boundary */
184:     i    = M - 1;
185:     A[0] = 1.0;
186:     MatSetValues(jac, 1, &i, 1, &i, A, INSERT_VALUES);
187:     xm--;
188:   }

190:   /*
191:      Interior grid points
192:       - Note that in this case we set all elements for a particular
193:         row at once.
194:   */
195:   d = 1.0 / (user->h * user->h);
196:   for (i = xs; i < xs + xm; i++) {
197:     j[0] = i - 1;
198:     j[1] = i;
199:     j[2] = i + 1;
200:     A[0] = A[2] = d;
201:     A[1]        = -2.0 * d + 2.0 * xx[i];
202:     MatSetValues(jac, 1, &i, 3, j, A, INSERT_VALUES);
203:   }

205:   /*
206:      Assemble matrix, using the 2-step process:
207:        MatAssemblyBegin(), MatAssemblyEnd().
208:      By placing code between these two statements, computations can be
209:      done while messages are in transition.

211:      Also, restore vector.
212:   */

214:   MatAssemblyBegin(jac, MAT_FINAL_ASSEMBLY);
215:   DMDAVecRestoreArrayRead(da, x, &xx);
216:   MatAssemblyEnd(jac, MAT_FINAL_ASSEMBLY);

218:   return 0;
219: }

221: int main(int argc, char **argv)
222: {
223:   SNES           snes;       /* SNES context */
224:   Mat            J;          /* Jacobian matrix */
225:   ApplicationCtx ctx;        /* user-defined context */
226:   Vec            x, r, U, F; /* vectors */
227:   PetscScalar    none = -1.0;
228:   PetscInt       its, N = 5, maxit, maxf;
229:   PetscReal      abstol, rtol, stol, norm;
230:   PetscBool      viewinitial = PETSC_FALSE;

233:   PetscInitialize(&argc, &argv, (char *)0, help);
234:   PetscOptionsGetInt(NULL, NULL, "-n", &N, NULL);
235:   ctx.h = 1.0 / (N - 1);
236:   PetscOptionsGetBool(NULL, NULL, "-view_initial", &viewinitial, NULL);

238:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
239:      Create nonlinear solver context
240:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
241:   SNESCreate(PETSC_COMM_WORLD, &snes);

243:   /*
244:      Create distributed array (DMDA) to manage parallel grid and vectors
245:   */
246:   DMDACreate1d(PETSC_COMM_WORLD, DM_BOUNDARY_NONE, N, 1, 1, NULL, &ctx.da);
247:   DMSetFromOptions(ctx.da);
248:   DMSetUp(ctx.da);

250:   /*
251:      Extract global and local vectors from DMDA; then duplicate for remaining
252:      vectors that are the same types
253:   */
254:   DMCreateGlobalVector(ctx.da, &x);
255:   PetscObjectSetName((PetscObject)x, "Approximate Solution");
256:   VecDuplicate(x, &r);
257:   VecDuplicate(x, &F);
258:   ctx.F = F;
259:   PetscObjectSetName((PetscObject)F, "Forcing function");
260:   VecDuplicate(x, &U);
261:   PetscObjectSetName((PetscObject)U, "Exact Solution");

263:   /*
264:      Set function evaluation routine and vector.  Whenever the nonlinear
265:      solver needs to compute the nonlinear function, it will call this
266:      routine.
267:       - Note that the final routine argument is the user-defined
268:         context that provides application-specific data for the
269:         function evaluation routine.

271:      At the beginning, one can use a stub function that checks the Kokkos version
272:      against the CPU version to quickly expose errors.
273:      SNESSetFunction(snes,r,StubFunction,&ctx);
274:   */
275:   SNESSetFunction(snes, r, KokkosFunction, &ctx);

277:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
278:      Create matrix data structure; set Jacobian evaluation routine
279:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
280:   DMCreateMatrix(ctx.da, &J);

282:   /*
283:      Set Jacobian matrix data structure and default Jacobian evaluation
284:      routine.  Whenever the nonlinear solver needs to compute the
285:      Jacobian matrix, it will call this routine.
286:       - Note that the final routine argument is the user-defined
287:         context that provides application-specific data for the
288:         Jacobian evaluation routine.
289:   */
290:   SNESSetJacobian(snes, J, J, FormJacobian, &ctx);
291:   SNESSetFromOptions(snes);
292:   SNESGetTolerances(snes, &abstol, &rtol, &stol, &maxit, &maxf);
293:   PetscPrintf(PETSC_COMM_WORLD, "atol=%g, rtol=%g, stol=%g, maxit=%" PetscInt_FMT ", maxf=%" PetscInt_FMT "\n", (double)abstol, (double)rtol, (double)stol, maxit, maxf);

295:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
296:      Initialize application:
297:      Store forcing function of PDE and exact solution
298:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
299:   {
300:     PetscScalarKokkosOffsetView FF, UU;
301:     DMDAVecGetKokkosOffsetViewWrite(ctx.da, F, &FF);
302:     DMDAVecGetKokkosOffsetViewWrite(ctx.da, U, &UU);
303:     Kokkos::parallel_for(
304:       Kokkos::RangePolicy<>(FF.begin(0), FF.end(0)), KOKKOS_LAMBDA(int i) {
305:         PetscReal xp = i * ctx.h;
306:         FF(i)        = 6.0 * xp + pow(xp + 1.e-12, 6.0); /* +1.e-12 is to prevent 0^6 */
307:         UU(i)        = xp * xp * xp;
308:       });
309:     DMDAVecRestoreKokkosOffsetViewWrite(ctx.da, F, &FF);
310:     DMDAVecRestoreKokkosOffsetViewWrite(ctx.da, U, &UU);
311:   }

313:   if (viewinitial) {
314:     VecView(U, NULL);
315:     VecView(F, NULL);
316:   }

318:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
319:      Evaluate initial guess; then solve nonlinear system
320:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

322:   /*
323:      Note: The user should initialize the vector, x, with the initial guess
324:      for the nonlinear solver prior to calling SNESSolve().  In particular,
325:      to employ an initial guess of zero, the user should explicitly set
326:      this vector to zero by calling VecSet().
327:   */
328:   FormInitialGuess(x);
329:   SNESSolve(snes, NULL, x);
330:   SNESGetIterationNumber(snes, &its);
331:   PetscPrintf(PETSC_COMM_WORLD, "Number of SNES iterations = %" PetscInt_FMT "\n", its);

333:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
334:      Check solution and clean up
335:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
336:   /*
337:      Check the error
338:   */
339:   VecAXPY(x, none, U);
340:   VecNorm(x, NORM_2, &norm);
341:   PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g Iterations %" PetscInt_FMT "\n", (double)norm, its);

343:   /*
344:      Free work space.  All PETSc objects should be destroyed when they
345:      are no longer needed.
346:   */
347:   VecDestroy(&x);
348:   VecDestroy(&r);
349:   VecDestroy(&U);
350:   VecDestroy(&F);
351:   MatDestroy(&J);
352:   SNESDestroy(&snes);
353:   DMDestroy(&ctx.da);
354:   PetscFinalize();
355:   return 0;
356: }

358: /*TEST

360:    build:
361:      requires: kokkos_kernels

363:    test:
364:      requires: kokkos_kernels !complex !single
365:      nsize: 2
366:      args: -dm_vec_type kokkos -dm_mat_type aijkokkos -view_initial -snes_monitor
367:      output_file: output/ex3k_1.out

369: TEST*/