Actual source code: ex3k.kokkos.cxx
1: static char help[] = "Newton methods to solve u'' + u^{2} = f in parallel. Uses Kokkos\n\\n";
3: #include <petscdmda_kokkos.hpp>
4: #include <petscsnes.h>
6: /*
7: User-defined application context
8: */
9: typedef struct {
10: DM da; /* distributed array */
11: Vec F; /* right-hand-side of PDE */
12: PetscReal h; /* mesh spacing */
13: } ApplicationCtx;
15: /* ------------------------------------------------------------------- */
16: /*
17: FormInitialGuess - Computes initial guess.
19: Input/Output Parameter:
20: . x - the solution vector
21: */
22: PetscErrorCode FormInitialGuess(Vec x)
23: {
24: PetscScalar pfive = .50;
27: VecSet(x, pfive);
28: return 0;
29: }
31: /* ------------------------------------------------------------------- */
32: /*
33: CpuFunction - Evaluates nonlinear function, F(x) on CPU
35: Input Parameters:
36: . snes - the SNES context
37: . x - input vector
38: . ctx - optional user-defined context, as set by SNESSetFunction()
40: Output Parameter:
41: . r - function vector
43: Note:
44: The user-defined context can contain any application-specific
45: data needed for the function evaluation.
46: */
47: PetscErrorCode CpuFunction(SNES snes, Vec x, Vec r, void *ctx)
48: {
49: ApplicationCtx *user = (ApplicationCtx *)ctx;
50: DM da = user->da;
51: PetscScalar *X, *R, *F, d;
52: PetscInt i, M, xs, xm;
53: Vec xl;
56: DMGetLocalVector(da, &xl);
57: DMGlobalToLocal(da, x, INSERT_VALUES, xl);
58: DMDAVecGetArray(da, xl, &X);
59: DMDAVecGetArray(da, r, &R);
60: DMDAVecGetArray(da, user->F, &F);
62: DMDAGetCorners(da, &xs, NULL, NULL, &xm, NULL, NULL);
63: DMDAGetInfo(da, NULL, &M, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
65: if (xs == 0) { /* left boundary */
66: R[0] = X[0];
67: xs++;
68: xm--;
69: }
70: if (xs + xm == M) { /* right boundary */
71: R[xs + xm - 1] = X[xs + xm - 1] - 1.0;
72: xm--;
73: }
74: d = 1.0 / (user->h * user->h);
75: for (i = xs; i < xs + xm; i++) R[i] = d * (X[i - 1] - 2.0 * X[i] + X[i + 1]) + X[i] * X[i] - F[i];
77: DMDAVecRestoreArray(da, xl, &X);
78: DMDAVecRestoreArray(da, r, &R);
79: DMDAVecRestoreArray(da, user->F, &F);
80: DMRestoreLocalVector(da, &xl);
81: return 0;
82: }
84: using DefaultExecutionSpace = Kokkos::DefaultExecutionSpace;
85: using DefaultMemorySpace = Kokkos::DefaultExecutionSpace::memory_space;
86: using PetscScalarKokkosOffsetView = Kokkos::Experimental::OffsetView<PetscScalar *, DefaultMemorySpace>;
87: using ConstPetscScalarKokkosOffsetView = Kokkos::Experimental::OffsetView<const PetscScalar *, DefaultMemorySpace>;
89: PetscErrorCode KokkosFunction(SNES snes, Vec x, Vec r, void *ctx)
90: {
91: ApplicationCtx *user = (ApplicationCtx *)ctx;
92: DM da = user->da;
93: PetscScalar d;
94: PetscInt M;
95: Vec xl;
96: PetscScalarKokkosOffsetView R;
97: ConstPetscScalarKokkosOffsetView X, F;
100: DMGetLocalVector(da, &xl);
101: DMGlobalToLocal(da, x, INSERT_VALUES, xl);
102: d = 1.0 / (user->h * user->h);
103: DMDAGetInfo(da, NULL, &M, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
104: DMDAVecGetKokkosOffsetView(da, xl, &X); /* read only */
105: DMDAVecGetKokkosOffsetViewWrite(da, r, &R); /* write only */
106: DMDAVecGetKokkosOffsetView(da, user->F, &F); /* read only */
107: Kokkos::parallel_for(
108: Kokkos::RangePolicy<>(R.begin(0), R.end(0)), KOKKOS_LAMBDA(int i) {
109: if (i == 0) R(0) = X(0); /* left boundary */
110: else if (i == M - 1) R(i) = X(i) - 1.0; /* right boundary */
111: else R(i) = d * (X(i - 1) - 2.0 * X(i) + X(i + 1)) + X(i) * X(i) - F(i); /* interior */
112: });
113: DMDAVecRestoreKokkosOffsetView(da, xl, &X);
114: DMDAVecRestoreKokkosOffsetViewWrite(da, r, &R);
115: DMDAVecRestoreKokkosOffsetView(da, user->F, &F);
116: DMRestoreLocalVector(da, &xl);
117: return 0;
118: }
120: PetscErrorCode StubFunction(SNES snes, Vec x, Vec r, void *ctx)
121: {
122: ApplicationCtx *user = (ApplicationCtx *)ctx;
123: DM da = user->da;
124: Vec rk;
125: PetscReal norm = 0;
128: DMGetGlobalVector(da, &rk);
129: CpuFunction(snes, x, r, ctx);
130: KokkosFunction(snes, x, rk, ctx);
131: VecAXPY(rk, -1.0, r);
132: VecNorm(rk, NORM_2, &norm);
133: DMRestoreGlobalVector(da, &rk);
135: return 0;
136: }
137: /* ------------------------------------------------------------------- */
138: /*
139: FormJacobian - Evaluates Jacobian matrix.
141: Input Parameters:
142: . snes - the SNES context
143: . x - input vector
144: . dummy - optional user-defined context (not used here)
146: Output Parameters:
147: . jac - Jacobian matrix
148: . B - optionally different preconditioning matrix
149: . flag - flag indicating matrix structure
150: */
151: PetscErrorCode FormJacobian(SNES snes, Vec x, Mat jac, Mat B, void *ctx)
152: {
153: ApplicationCtx *user = (ApplicationCtx *)ctx;
154: PetscScalar *xx, d, A[3];
155: PetscInt i, j[3], M, xs, xm;
156: DM da = user->da;
159: /*
160: Get pointer to vector data
161: */
162: DMDAVecGetArrayRead(da, x, &xx);
163: DMDAGetCorners(da, &xs, NULL, NULL, &xm, NULL, NULL);
165: /*
166: Get range of locally owned matrix
167: */
168: DMDAGetInfo(da, NULL, &M, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
170: /*
171: Determine starting and ending local indices for interior grid points.
172: Set Jacobian entries for boundary points.
173: */
175: if (xs == 0) { /* left boundary */
176: i = 0;
177: A[0] = 1.0;
179: MatSetValues(jac, 1, &i, 1, &i, A, INSERT_VALUES);
180: xs++;
181: xm--;
182: }
183: if (xs + xm == M) { /* right boundary */
184: i = M - 1;
185: A[0] = 1.0;
186: MatSetValues(jac, 1, &i, 1, &i, A, INSERT_VALUES);
187: xm--;
188: }
190: /*
191: Interior grid points
192: - Note that in this case we set all elements for a particular
193: row at once.
194: */
195: d = 1.0 / (user->h * user->h);
196: for (i = xs; i < xs + xm; i++) {
197: j[0] = i - 1;
198: j[1] = i;
199: j[2] = i + 1;
200: A[0] = A[2] = d;
201: A[1] = -2.0 * d + 2.0 * xx[i];
202: MatSetValues(jac, 1, &i, 3, j, A, INSERT_VALUES);
203: }
205: /*
206: Assemble matrix, using the 2-step process:
207: MatAssemblyBegin(), MatAssemblyEnd().
208: By placing code between these two statements, computations can be
209: done while messages are in transition.
211: Also, restore vector.
212: */
214: MatAssemblyBegin(jac, MAT_FINAL_ASSEMBLY);
215: DMDAVecRestoreArrayRead(da, x, &xx);
216: MatAssemblyEnd(jac, MAT_FINAL_ASSEMBLY);
218: return 0;
219: }
221: int main(int argc, char **argv)
222: {
223: SNES snes; /* SNES context */
224: Mat J; /* Jacobian matrix */
225: ApplicationCtx ctx; /* user-defined context */
226: Vec x, r, U, F; /* vectors */
227: PetscScalar none = -1.0;
228: PetscInt its, N = 5, maxit, maxf;
229: PetscReal abstol, rtol, stol, norm;
230: PetscBool viewinitial = PETSC_FALSE;
233: PetscInitialize(&argc, &argv, (char *)0, help);
234: PetscOptionsGetInt(NULL, NULL, "-n", &N, NULL);
235: ctx.h = 1.0 / (N - 1);
236: PetscOptionsGetBool(NULL, NULL, "-view_initial", &viewinitial, NULL);
238: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
239: Create nonlinear solver context
240: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
241: SNESCreate(PETSC_COMM_WORLD, &snes);
243: /*
244: Create distributed array (DMDA) to manage parallel grid and vectors
245: */
246: DMDACreate1d(PETSC_COMM_WORLD, DM_BOUNDARY_NONE, N, 1, 1, NULL, &ctx.da);
247: DMSetFromOptions(ctx.da);
248: DMSetUp(ctx.da);
250: /*
251: Extract global and local vectors from DMDA; then duplicate for remaining
252: vectors that are the same types
253: */
254: DMCreateGlobalVector(ctx.da, &x);
255: PetscObjectSetName((PetscObject)x, "Approximate Solution");
256: VecDuplicate(x, &r);
257: VecDuplicate(x, &F);
258: ctx.F = F;
259: PetscObjectSetName((PetscObject)F, "Forcing function");
260: VecDuplicate(x, &U);
261: PetscObjectSetName((PetscObject)U, "Exact Solution");
263: /*
264: Set function evaluation routine and vector. Whenever the nonlinear
265: solver needs to compute the nonlinear function, it will call this
266: routine.
267: - Note that the final routine argument is the user-defined
268: context that provides application-specific data for the
269: function evaluation routine.
271: At the beginning, one can use a stub function that checks the Kokkos version
272: against the CPU version to quickly expose errors.
273: SNESSetFunction(snes,r,StubFunction,&ctx);
274: */
275: SNESSetFunction(snes, r, KokkosFunction, &ctx);
277: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
278: Create matrix data structure; set Jacobian evaluation routine
279: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
280: DMCreateMatrix(ctx.da, &J);
282: /*
283: Set Jacobian matrix data structure and default Jacobian evaluation
284: routine. Whenever the nonlinear solver needs to compute the
285: Jacobian matrix, it will call this routine.
286: - Note that the final routine argument is the user-defined
287: context that provides application-specific data for the
288: Jacobian evaluation routine.
289: */
290: SNESSetJacobian(snes, J, J, FormJacobian, &ctx);
291: SNESSetFromOptions(snes);
292: SNESGetTolerances(snes, &abstol, &rtol, &stol, &maxit, &maxf);
293: PetscPrintf(PETSC_COMM_WORLD, "atol=%g, rtol=%g, stol=%g, maxit=%" PetscInt_FMT ", maxf=%" PetscInt_FMT "\n", (double)abstol, (double)rtol, (double)stol, maxit, maxf);
295: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
296: Initialize application:
297: Store forcing function of PDE and exact solution
298: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
299: {
300: PetscScalarKokkosOffsetView FF, UU;
301: DMDAVecGetKokkosOffsetViewWrite(ctx.da, F, &FF);
302: DMDAVecGetKokkosOffsetViewWrite(ctx.da, U, &UU);
303: Kokkos::parallel_for(
304: Kokkos::RangePolicy<>(FF.begin(0), FF.end(0)), KOKKOS_LAMBDA(int i) {
305: PetscReal xp = i * ctx.h;
306: FF(i) = 6.0 * xp + pow(xp + 1.e-12, 6.0); /* +1.e-12 is to prevent 0^6 */
307: UU(i) = xp * xp * xp;
308: });
309: DMDAVecRestoreKokkosOffsetViewWrite(ctx.da, F, &FF);
310: DMDAVecRestoreKokkosOffsetViewWrite(ctx.da, U, &UU);
311: }
313: if (viewinitial) {
314: VecView(U, NULL);
315: VecView(F, NULL);
316: }
318: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
319: Evaluate initial guess; then solve nonlinear system
320: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
322: /*
323: Note: The user should initialize the vector, x, with the initial guess
324: for the nonlinear solver prior to calling SNESSolve(). In particular,
325: to employ an initial guess of zero, the user should explicitly set
326: this vector to zero by calling VecSet().
327: */
328: FormInitialGuess(x);
329: SNESSolve(snes, NULL, x);
330: SNESGetIterationNumber(snes, &its);
331: PetscPrintf(PETSC_COMM_WORLD, "Number of SNES iterations = %" PetscInt_FMT "\n", its);
333: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
334: Check solution and clean up
335: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
336: /*
337: Check the error
338: */
339: VecAXPY(x, none, U);
340: VecNorm(x, NORM_2, &norm);
341: PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g Iterations %" PetscInt_FMT "\n", (double)norm, its);
343: /*
344: Free work space. All PETSc objects should be destroyed when they
345: are no longer needed.
346: */
347: VecDestroy(&x);
348: VecDestroy(&r);
349: VecDestroy(&U);
350: VecDestroy(&F);
351: MatDestroy(&J);
352: SNESDestroy(&snes);
353: DMDestroy(&ctx.da);
354: PetscFinalize();
355: return 0;
356: }
358: /*TEST
360: build:
361: requires: kokkos_kernels
363: test:
364: requires: kokkos_kernels !complex !single
365: nsize: 2
366: args: -dm_vec_type kokkos -dm_mat_type aijkokkos -view_initial -snes_monitor
367: output_file: output/ex3k_1.out
369: TEST*/