Actual source code: ex3k.kokkos.cxx
1: static char help[] = "Newton methods to solve u'' + u^{2} = f in parallel. Uses Kokkos\n\\n";
3: #include <petscdmda_kokkos.hpp>
4: #include <petscsnes.h>
6: /*
7: User-defined application context
8: */
9: typedef struct {
10: DM da; /* distributed array */
11: Vec F; /* right-hand-side of PDE */
12: PetscReal h; /* mesh spacing */
13: } ApplicationCtx;
15: /* ------------------------------------------------------------------- */
16: /*
17: FormInitialGuess - Computes initial guess.
19: Input/Output Parameter:
20: . x - the solution vector
21: */
22: PetscErrorCode FormInitialGuess(Vec x)
23: {
24: PetscScalar pfive = .50;
26: PetscFunctionBeginUser;
27: PetscCall(VecSet(x, pfive));
28: PetscFunctionReturn(PETSC_SUCCESS);
29: }
31: /* ------------------------------------------------------------------- */
32: /*
33: CpuFunction - Evaluates nonlinear function, F(x) on CPU
35: Input Parameters:
36: . snes - the SNES context
37: . x - input vector
38: . ctx - optional user-defined context, as set by SNESSetFunction()
40: Output Parameter:
41: . r - function vector
43: Note:
44: The user-defined context can contain any application-specific
45: data needed for the function evaluation.
46: */
47: PetscErrorCode CpuFunction(SNES snes, Vec x, Vec r, void *ctx)
48: {
49: ApplicationCtx *user = (ApplicationCtx *)ctx;
50: DM da = user->da;
51: PetscScalar *X, *R, *F, d;
52: PetscInt i, M, xs, xm;
53: Vec xl;
55: PetscFunctionBeginUser;
56: PetscCall(DMGetLocalVector(da, &xl));
57: PetscCall(DMGlobalToLocal(da, x, INSERT_VALUES, xl));
58: PetscCall(DMDAVecGetArray(da, xl, &X));
59: PetscCall(DMDAVecGetArray(da, r, &R));
60: PetscCall(DMDAVecGetArray(da, user->F, &F));
62: PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &xm, NULL, NULL));
63: PetscCall(DMDAGetInfo(da, NULL, &M, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL));
65: if (xs == 0) { /* left boundary */
66: R[0] = X[0];
67: xs++;
68: xm--;
69: }
70: if (xs + xm == M) { /* right boundary */
71: R[xs + xm - 1] = X[xs + xm - 1] - 1.0;
72: xm--;
73: }
74: d = 1.0 / (user->h * user->h);
75: for (i = xs; i < xs + xm; i++) R[i] = d * (X[i - 1] - 2.0 * X[i] + X[i + 1]) + X[i] * X[i] - F[i];
77: PetscCall(DMDAVecRestoreArray(da, xl, &X));
78: PetscCall(DMDAVecRestoreArray(da, r, &R));
79: PetscCall(DMDAVecRestoreArray(da, user->F, &F));
80: PetscCall(DMRestoreLocalVector(da, &xl));
81: PetscFunctionReturn(PETSC_SUCCESS);
82: }
84: using DefaultExecutionSpace = Kokkos::DefaultExecutionSpace;
85: using DefaultMemorySpace = Kokkos::DefaultExecutionSpace::memory_space;
86: using PetscScalarKokkosOffsetView = Kokkos::Experimental::OffsetView<PetscScalar *, DefaultMemorySpace>;
87: using ConstPetscScalarKokkosOffsetView = Kokkos::Experimental::OffsetView<const PetscScalar *, DefaultMemorySpace>;
89: PetscErrorCode KokkosFunction(SNES snes, Vec x, Vec r, void *ctx)
90: {
91: ApplicationCtx *user = (ApplicationCtx *)ctx;
92: DM da = user->da;
93: PetscScalar d;
94: PetscInt M;
95: Vec xl;
96: PetscScalarKokkosOffsetView R;
97: ConstPetscScalarKokkosOffsetView X, F;
99: PetscFunctionBeginUser;
100: PetscCall(DMGetLocalVector(da, &xl));
101: PetscCall(DMGlobalToLocal(da, x, INSERT_VALUES, xl));
102: d = 1.0 / (user->h * user->h);
103: PetscCall(DMDAGetInfo(da, NULL, &M, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL));
104: PetscCall(DMDAVecGetKokkosOffsetView(da, xl, &X)); /* read only */
105: PetscCall(DMDAVecGetKokkosOffsetViewWrite(da, r, &R)); /* write only */
106: PetscCall(DMDAVecGetKokkosOffsetView(da, user->F, &F)); /* read only */
107: Kokkos::parallel_for(
108: Kokkos::RangePolicy<>(R.begin(0), R.end(0)), KOKKOS_LAMBDA(int i) {
109: if (i == 0) R(0) = X(0); /* left boundary */
110: else if (i == M - 1) R(i) = X(i) - 1.0; /* right boundary */
111: else R(i) = d * (X(i - 1) - 2.0 * X(i) + X(i + 1)) + X(i) * X(i) - F(i); /* interior */
112: });
113: PetscCall(DMDAVecRestoreKokkosOffsetView(da, xl, &X));
114: PetscCall(DMDAVecRestoreKokkosOffsetViewWrite(da, r, &R));
115: PetscCall(DMDAVecRestoreKokkosOffsetView(da, user->F, &F));
116: PetscCall(DMRestoreLocalVector(da, &xl));
117: PetscFunctionReturn(PETSC_SUCCESS);
118: }
120: PetscErrorCode StubFunction(SNES snes, Vec x, Vec r, void *ctx)
121: {
122: ApplicationCtx *user = (ApplicationCtx *)ctx;
123: DM da = user->da;
124: Vec rk;
125: PetscReal norm = 0;
127: PetscFunctionBeginUser;
128: PetscCall(DMGetGlobalVector(da, &rk));
129: PetscCall(CpuFunction(snes, x, r, ctx));
130: PetscCall(KokkosFunction(snes, x, rk, ctx));
131: PetscCall(VecAXPY(rk, -1.0, r));
132: PetscCall(VecNorm(rk, NORM_2, &norm));
133: PetscCall(DMRestoreGlobalVector(da, &rk));
134: PetscCheck(norm <= 1e-6, PETSC_COMM_SELF, PETSC_ERR_PLIB, "KokkosFunction() different from CpuFunction() with a diff norm = %g", (double)norm);
135: PetscFunctionReturn(PETSC_SUCCESS);
136: }
137: /* ------------------------------------------------------------------- */
138: /*
139: FormJacobian - Evaluates Jacobian matrix.
141: Input Parameters:
142: . snes - the SNES context
143: . x - input vector
144: . dummy - optional user-defined context (not used here)
146: Output Parameters:
147: . jac - Jacobian matrix
148: . B - optionally different preconditioning matrix
149: . flag - flag indicating matrix structure
150: */
151: PetscErrorCode FormJacobian(SNES snes, Vec x, Mat jac, Mat B, void *ctx)
152: {
153: ApplicationCtx *user = (ApplicationCtx *)ctx;
154: PetscScalar *xx, d, A[3];
155: PetscInt i, j[3], M, xs, xm;
156: DM da = user->da;
158: PetscFunctionBeginUser;
159: /*
160: Get pointer to vector data
161: */
162: PetscCall(DMDAVecGetArrayRead(da, x, &xx));
163: PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &xm, NULL, NULL));
165: /*
166: Get range of locally owned matrix
167: */
168: PetscCall(DMDAGetInfo(da, NULL, &M, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL));
170: /*
171: Determine starting and ending local indices for interior grid points.
172: Set Jacobian entries for boundary points.
173: */
175: if (xs == 0) { /* left boundary */
176: i = 0;
177: A[0] = 1.0;
179: PetscCall(MatSetValues(jac, 1, &i, 1, &i, A, INSERT_VALUES));
180: xs++;
181: xm--;
182: }
183: if (xs + xm == M) { /* right boundary */
184: i = M - 1;
185: A[0] = 1.0;
186: PetscCall(MatSetValues(jac, 1, &i, 1, &i, A, INSERT_VALUES));
187: xm--;
188: }
190: /*
191: Interior grid points
192: - Note that in this case we set all elements for a particular
193: row at once.
194: */
195: d = 1.0 / (user->h * user->h);
196: for (i = xs; i < xs + xm; i++) {
197: j[0] = i - 1;
198: j[1] = i;
199: j[2] = i + 1;
200: A[0] = A[2] = d;
201: A[1] = -2.0 * d + 2.0 * xx[i];
202: PetscCall(MatSetValues(jac, 1, &i, 3, j, A, INSERT_VALUES));
203: }
205: /*
206: Assemble matrix, using the 2-step process:
207: MatAssemblyBegin(), MatAssemblyEnd().
208: By placing code between these two statements, computations can be
209: done while messages are in transition.
211: Also, restore vector.
212: */
214: PetscCall(MatAssemblyBegin(jac, MAT_FINAL_ASSEMBLY));
215: PetscCall(DMDAVecRestoreArrayRead(da, x, &xx));
216: PetscCall(MatAssemblyEnd(jac, MAT_FINAL_ASSEMBLY));
218: PetscFunctionReturn(PETSC_SUCCESS);
219: }
221: int main(int argc, char **argv)
222: {
223: SNES snes; /* SNES context */
224: Mat J; /* Jacobian matrix */
225: ApplicationCtx ctx; /* user-defined context */
226: Vec x, r, U, F; /* vectors */
227: PetscScalar none = -1.0;
228: PetscInt its, N = 5, maxit, maxf;
229: PetscReal abstol, rtol, stol, norm;
230: PetscBool viewinitial = PETSC_FALSE;
232: PetscFunctionBeginUser;
233: PetscCall(PetscInitialize(&argc, &argv, (char *)0, help));
234: PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &N, NULL));
235: ctx.h = 1.0 / (N - 1);
236: PetscCall(PetscOptionsGetBool(NULL, NULL, "-view_initial", &viewinitial, NULL));
238: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
239: Create nonlinear solver context
240: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
241: PetscCall(SNESCreate(PETSC_COMM_WORLD, &snes));
243: /*
244: Create distributed array (DMDA) to manage parallel grid and vectors
245: */
246: PetscCall(DMDACreate1d(PETSC_COMM_WORLD, DM_BOUNDARY_NONE, N, 1, 1, NULL, &ctx.da));
247: PetscCall(DMSetFromOptions(ctx.da));
248: PetscCall(DMSetUp(ctx.da));
250: /*
251: Extract global and local vectors from DMDA; then duplicate for remaining
252: vectors that are the same types
253: */
254: PetscCall(DMCreateGlobalVector(ctx.da, &x));
255: PetscCall(PetscObjectSetName((PetscObject)x, "Approximate Solution"));
256: PetscCall(VecDuplicate(x, &r));
257: PetscCall(VecDuplicate(x, &F));
258: ctx.F = F;
259: PetscCall(PetscObjectSetName((PetscObject)F, "Forcing function"));
260: PetscCall(VecDuplicate(x, &U));
261: PetscCall(PetscObjectSetName((PetscObject)U, "Exact Solution"));
263: /*
264: Set function evaluation routine and vector. Whenever the nonlinear
265: solver needs to compute the nonlinear function, it will call this
266: routine.
267: - Note that the final routine argument is the user-defined
268: context that provides application-specific data for the
269: function evaluation routine.
271: At the beginning, one can use a stub function that checks the Kokkos version
272: against the CPU version to quickly expose errors.
273: PetscCall(SNESSetFunction(snes,r,StubFunction,&ctx));
274: */
275: PetscCall(SNESSetFunction(snes, r, KokkosFunction, &ctx));
277: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
278: Create matrix data structure; set Jacobian evaluation routine
279: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
280: PetscCall(DMCreateMatrix(ctx.da, &J));
282: /*
283: Set Jacobian matrix data structure and default Jacobian evaluation
284: routine. Whenever the nonlinear solver needs to compute the
285: Jacobian matrix, it will call this routine.
286: - Note that the final routine argument is the user-defined
287: context that provides application-specific data for the
288: Jacobian evaluation routine.
289: */
290: PetscCall(SNESSetJacobian(snes, J, J, FormJacobian, &ctx));
291: PetscCall(SNESSetFromOptions(snes));
292: PetscCall(SNESGetTolerances(snes, &abstol, &rtol, &stol, &maxit, &maxf));
293: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "atol=%g, rtol=%g, stol=%g, maxit=%" PetscInt_FMT ", maxf=%" PetscInt_FMT "\n", (double)abstol, (double)rtol, (double)stol, maxit, maxf));
295: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
296: Initialize application:
297: Store forcing function of PDE and exact solution
298: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
299: {
300: PetscScalarKokkosOffsetView FF, UU;
301: PetscCall(DMDAVecGetKokkosOffsetViewWrite(ctx.da, F, &FF));
302: PetscCall(DMDAVecGetKokkosOffsetViewWrite(ctx.da, U, &UU));
303: Kokkos::parallel_for(
304: Kokkos::RangePolicy<>(FF.begin(0), FF.end(0)), KOKKOS_LAMBDA(int i) {
305: PetscReal xp = i * ctx.h;
306: FF(i) = 6.0 * xp + pow(xp + 1.e-12, 6.0); /* +1.e-12 is to prevent 0^6 */
307: UU(i) = xp * xp * xp;
308: });
309: PetscCall(DMDAVecRestoreKokkosOffsetViewWrite(ctx.da, F, &FF));
310: PetscCall(DMDAVecRestoreKokkosOffsetViewWrite(ctx.da, U, &UU));
311: }
313: if (viewinitial) {
314: PetscCall(VecView(U, NULL));
315: PetscCall(VecView(F, NULL));
316: }
318: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
319: Evaluate initial guess; then solve nonlinear system
320: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
322: /*
323: Note: The user should initialize the vector, x, with the initial guess
324: for the nonlinear solver prior to calling SNESSolve(). In particular,
325: to employ an initial guess of zero, the user should explicitly set
326: this vector to zero by calling VecSet().
327: */
328: PetscCall(FormInitialGuess(x));
329: PetscCall(SNESSolve(snes, NULL, x));
330: PetscCall(SNESGetIterationNumber(snes, &its));
331: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Number of SNES iterations = %" PetscInt_FMT "\n", its));
333: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
334: Check solution and clean up
335: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
336: /*
337: Check the error
338: */
339: PetscCall(VecAXPY(x, none, U));
340: PetscCall(VecNorm(x, NORM_2, &norm));
341: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g Iterations %" PetscInt_FMT "\n", (double)norm, its));
343: /*
344: Free work space. All PETSc objects should be destroyed when they
345: are no longer needed.
346: */
347: PetscCall(VecDestroy(&x));
348: PetscCall(VecDestroy(&r));
349: PetscCall(VecDestroy(&U));
350: PetscCall(VecDestroy(&F));
351: PetscCall(MatDestroy(&J));
352: PetscCall(SNESDestroy(&snes));
353: PetscCall(DMDestroy(&ctx.da));
354: PetscCall(PetscFinalize());
355: return 0;
356: }
358: /*TEST
360: build:
361: requires: kokkos_kernels
363: test:
364: requires: kokkos_kernels !complex !single
365: nsize: 2
366: args: -dm_vec_type kokkos -dm_mat_type aijkokkos -view_initial -snes_monitor
367: output_file: output/ex3k_1.out
369: TEST*/