Actual source code: petsclandau.h
1: #pragma once
3: #include <petscdmplex.h>
4: #include <petscts.h>
6: /* MANSEC = TS */
7: /* SUBMANSEC = LANDAU */
9: PETSC_EXTERN PetscErrorCode DMPlexLandauPrintNorms(Vec, PetscInt);
10: PETSC_EXTERN PetscErrorCode DMPlexLandauCreateVelocitySpace(MPI_Comm, PetscInt, const char[], Vec *, Mat *, DM *);
11: PETSC_EXTERN PetscErrorCode DMPlexLandauDestroyVelocitySpace(DM *);
12: PETSC_EXTERN PetscErrorCode DMPlexLandauAccess(DM, Vec, PetscErrorCode (*)(DM, Vec, PetscInt, PetscInt, PetscInt, void *), void *);
13: PETSC_EXTERN PetscErrorCode DMPlexLandauAddMaxwellians(DM, Vec, PetscReal, PetscReal[], PetscReal[], PetscInt, PetscInt, PetscInt, void *);
14: PETSC_EXTERN PetscErrorCode DMPlexLandauCreateMassMatrix(DM dm, Mat *Amat);
15: PETSC_EXTERN PetscErrorCode DMPlexLandauIFunction(TS, PetscReal, Vec, Vec, Vec, void *);
16: PETSC_EXTERN PetscErrorCode DMPlexLandauIJacobian(TS, PetscReal, Vec, Vec, PetscReal, Mat, Mat, void *);
18: /*MC
19: LandauIdx - Integer type used to index entries in the `DMPLEX` Landau collision-operator data structures, such as the COO matrix workspaces and the `P4estVertexMaps` reduced-quadrature maps
21: Level: developer
23: Note:
24: `LandauIdx` is a `PetscInt`; it is named separately so the device-side data structures used by the Landau collision operator can be sized independently from the rest of PETSc if needed.
26: .seealso: `DMPlexLandauCreateVelocitySpace()`, `LandauStaticData`, `LandauCtx`, `P4estVertexMaps`
27: M*/
28: typedef PetscInt LandauIdx;
30: /* the Fokker-Planck-Landau context */
31: #if !defined(LANDAU_MAX_SPECIES)
32: #if defined(PETSC_USE_DMLANDAU_2D)
33: #define LANDAU_MAX_SPECIES 10
34: #define LANDAU_MAX_GRIDS 3
35: #else
36: #define LANDAU_MAX_SPECIES 10
37: #define LANDAU_MAX_GRIDS 3
38: #endif
39: #else
40: #define LANDAU_MAX_GRIDS 3
41: #endif
43: #if !defined(LANDAU_MAX_Q)
44: #if defined(LANDAU_MAX_NQND)
45: #error "LANDAU_MAX_NQND but not LANDAU_MAX_Q. Use -DLANDAU_MAX_Q=4 for Q3 elements"
46: #endif
47: #if defined(PETSC_USE_DMLANDAU_2D)
48: #define LANDAU_MAX_Q 6
49: #else
50: #define LANDAU_MAX_Q 6
51: #endif
52: #else
53: #undef LANDAU_MAX_NQND
54: #endif
56: #if defined(PETSC_USE_DMLANDAU_2D)
57: #define LANDAU_MAX_Q_FACE LANDAU_MAX_Q
58: #define LANDAU_MAX_NQND (LANDAU_MAX_Q * LANDAU_MAX_Q)
59: #define LANDAU_MAX_BATCH_SZ 1024
60: #define LANDAU_DIM 2
61: #else
62: #define LANDAU_MAX_Q_FACE (LANDAU_MAX_Q * LANDAU_MAX_Q)
63: #define LANDAU_MAX_NQND (LANDAU_MAX_Q * LANDAU_MAX_Q * LANDAU_MAX_Q)
64: #define LANDAU_MAX_BATCH_SZ 64
65: #define LANDAU_DIM 3
66: #endif
68: /*E
69: LandauDeviceType - Selects the backend used to evaluate the Landau collision-operator Jacobian and to hold its workspace data
71: Values:
72: + `LANDAU_KOKKOS` - run on the device with the Kokkos backend (requires PETSc configured `--with-kokkos`)
73: - `LANDAU_CPU` - run on the host (default when Kokkos is not enabled)
75: Level: intermediate
77: .seealso: `DMPlexLandauCreateVelocitySpace()`, `LandauCtx`, `LandauStaticData`
78: E*/
79: typedef enum {
80: LANDAU_KOKKOS,
81: LANDAU_CPU
82: } LandauDeviceType;
84: /*S
85: LandauStaticData - Workspace of pre-computed quadrature, geometry, and physics data that is shared by every Jacobian assembly of the `DMPLEX` Landau collision operator
87: Level: developer
89: Note:
90: The fields are typed as `void *` so that the same struct can hold either host arrays or device (Kokkos/CUDA/HIP) arrays depending on `LandauDeviceType`.
91: The contents are managed by `DMPlexLandauCreateVelocitySpace()` and friends and are not intended to be inspected by user code.
93: .seealso: `DMPlexLandauCreateVelocitySpace()`, `LandauCtx`, `LandauDeviceType`, `LandauIdx`
94: S*/
95: typedef struct {
96: void *invJ; // nip*dim*dim
97: void *D; // nq*nb*dim
98: void *B; // nq*nb
99: void *alpha; // ns
100: void *beta; // ns
101: void *invMass; // ns
102: void *w; // nip
103: void *x; // nip
104: void *y; // nip
105: void *z; // nip
106: void *Eq_m; // ns - dynamic
107: void *f; // nip*Nf - dynamic (IP)
108: void *dfdx; // nip*Nf - dynamic (IP)
109: void *dfdy; // nip*Nf - dynamic (IP)
110: void *dfdz; // nip*Nf - dynamic (IP)
111: int dim_, ns_, nip_, nq_, nb_;
112: void *NCells; // remove and use elem_offset - TODO
113: void *species_offset; // for each grid, but same for all batched vertices
114: void *mat_offset; // for each grid, but same for all batched vertices
115: void *elem_offset; // for each grid, but same for all batched vertices
116: void *ip_offset; // for each grid, but same for all batched vertices
117: void *ipf_offset; // for each grid, but same for all batched vertices
118: void *ipfdf_data; // for each grid, but same for all batched vertices
119: void *maps; // for each grid, but same for all batched vertices
120: // COO
121: void *coo_elem_offsets;
122: void *coo_elem_point_offsets;
123: void *coo_elem_fullNb;
124: void *coo_vals;
125: void *lambdas;
126: LandauIdx coo_n_cellsTot;
127: LandauIdx coo_size;
128: LandauIdx coo_max_fullnb;
129: } LandauStaticData;
131: /*E
132: LandauOMPTimers - Identifiers for the timing slots kept in `LandauCtx.times[]` for the `DMPLEX` Landau collision operator
134: Values:
135: + `LANDAU_EX2_TSSOLVE` - total `TSSolve()` time of the Landau example
136: . `LANDAU_MATRIX_TOTAL` - total time spent constructing the Jacobian and mass matrices
137: . `LANDAU_OPERATOR` - time inside the Landau operator evaluation
138: . `LANDAU_JACOBIAN_COUNT` - number of Jacobian evaluations (stored as a count, reused as a timer slot)
139: . `LANDAU_JACOBIAN` - time inside Jacobian construction
140: . `LANDAU_MASS` - time inside mass-matrix construction
141: . `LANDAU_F_DF` - time evaluating the distribution function and its derivatives
142: . `LANDAU_KERNEL` - time inside the Landau collision kernel
143: . `KSP_FACTOR` - time inside the KSP factor stage when using a direct solver
144: . `KSP_SOLVE` - time inside `KSPSolve()`
145: - `LANDAU_NUM_TIMERS` - sentinel; equals the number of timer slots allocated in `LandauCtx`
147: Level: developer
149: .seealso: `LandauCtx`, `DMPlexLandauCreateVelocitySpace()`
150: E*/
151: typedef enum {
152: LANDAU_EX2_TSSOLVE,
153: LANDAU_MATRIX_TOTAL,
154: LANDAU_OPERATOR,
155: LANDAU_JACOBIAN_COUNT,
156: LANDAU_JACOBIAN,
157: LANDAU_MASS,
158: LANDAU_F_DF,
159: LANDAU_KERNEL,
160: KSP_FACTOR,
161: KSP_SOLVE,
162: LANDAU_NUM_TIMERS
163: } LandauOMPTimers;
165: /*S
166: LandauCtx - Application context for the `DMPLEX` Landau collision operator that records the species data, mesh configuration, AMR settings, batching parameters, and pre-computed static data needed to evaluate the operator
168: Level: intermediate
170: Note:
171: The context is created and managed by `DMPlexLandauCreateVelocitySpace()` and is attached to the returned `DM` as its application context. User code normally obtains it with `DMGetApplicationContext()` rather than constructing it directly.
173: .seealso: `DMPlexLandauCreateVelocitySpace()`, `DMPlexLandauDestroyVelocitySpace()`, `DMPlexLandauIFunction()`, `DMPlexLandauIJacobian()`,
174: `LandauStaticData`, `LandauDeviceType`, `LandauOMPTimers`
175: S*/
176: typedef struct {
177: PetscBool interpolate; /* Generate intermediate mesh elements */
178: PetscBool gpu_assembly;
179: MPI_Comm comm; /* global communicator to use for errors and diagnostics */
180: double times[LANDAU_NUM_TIMERS];
181: PetscBool use_matrix_mass;
182: /* FE */
183: PetscFE fe[LANDAU_MAX_SPECIES];
184: /* geometry */
185: PetscReal radius[LANDAU_MAX_GRIDS];
186: PetscReal radius_par[LANDAU_MAX_GRIDS];
187: PetscReal radius_perp[LANDAU_MAX_GRIDS];
188: PetscReal re_radius; /* RE: radius of refinement along v_perp=0, z>0 */
189: PetscReal vperp0_radius1; /* RE: radius of refinement along v_perp=0 */
190: PetscReal vperp0_radius2; /* RE: radius of refinement along v_perp=0 after origin AMR refinement */
191: PetscBool sphere;
192: PetscBool map_sphere;
193: PetscReal sphere_inner_radius_90degree[LANDAU_MAX_GRIDS];
194: PetscReal sphere_inner_radius_45degree[LANDAU_MAX_GRIDS];
195: PetscInt cells0[3];
196: /* AMR */
197: PetscBool use_p4est;
198: PetscInt numRERefine; /* RE: refinement along v_perp=0, z > 0 */
199: PetscInt nZRefine1; /* RE: origin refinement after v_perp=0 refinement */
200: PetscInt nZRefine2; /* RE: origin refinement after origin AMR refinement */
201: PetscInt numAMRRefine[LANDAU_MAX_GRIDS]; /* normal AMR - refine from origin */
202: PetscInt postAMRRefine[LANDAU_MAX_GRIDS]; /* uniform refinement of AMR */
203: PetscBool simplex;
204: char filename[PETSC_MAX_PATH_LEN];
205: PetscReal thermal_speed[LANDAU_MAX_GRIDS];
206: PetscBool sphere_uniform_normal;
207: /* relativistic */
208: PetscBool use_energy_tensor_trick;
209: PetscBool use_relativistic_corrections;
210: /* physics */
211: PetscReal thermal_temps[LANDAU_MAX_SPECIES];
212: PetscReal masses[LANDAU_MAX_SPECIES]; /* mass of each species */
213: PetscReal charges[LANDAU_MAX_SPECIES]; /* charge of each species */
214: PetscReal n[LANDAU_MAX_SPECIES]; /* number density of each species */
215: PetscReal m_0; /* reference mass */
216: PetscReal v_0; /* reference velocity */
217: PetscReal n_0; /* reference number density */
218: PetscReal t_0; /* reference time */
219: PetscReal Ez;
220: PetscReal epsilon0;
221: PetscReal k;
222: PetscReal lambdas[LANDAU_MAX_GRIDS][LANDAU_MAX_GRIDS];
223: PetscReal electronShift;
224: PetscInt num_species;
225: PetscInt num_grids;
226: PetscInt species_offset[LANDAU_MAX_GRIDS + 1]; // for each grid, but same for all batched vertices
227: PetscInt mat_offset[LANDAU_MAX_GRIDS + 1]; // for each grid, but same for all batched vertices
228: // batching
229: PetscBool jacobian_field_major_order; // this could be a type but lets not get pedantic
230: VecScatter plex_batch;
231: Vec work_vec;
232: IS batch_is;
233: PetscErrorCode (*seqaij_mult)(Mat, Vec, Vec);
234: PetscErrorCode (*seqaij_multtranspose)(Mat, Vec, Vec);
235: PetscErrorCode (*seqaij_solve)(Mat, Vec, Vec);
236: PetscErrorCode (*seqaij_getdiagonal)(Mat, Vec);
237: /* COO */
238: Mat J;
239: Mat M;
240: Vec X;
241: /* derived type */
242: void *data;
243: /* computing */
244: LandauDeviceType deviceType;
245: DM pack;
246: DM plex[LANDAU_MAX_GRIDS];
247: LandauStaticData SData_d; /* static geometric data on device */
248: /* diagnostics */
249: PetscInt verbose;
250: PetscLogEvent events[20];
251: PetscLogStage stage;
252: PetscObjectState norm_state;
253: PetscInt batch_sz;
254: PetscInt batch_view_idx;
255: } LandauCtx;
257: #define LANDAU_SPECIES_MAJOR
258: #if !defined(LANDAU_SPECIES_MAJOR)
259: #define LAND_PACK_IDX(_b, _g) (_b * ctx->num_grids + _g)
260: #define LAND_MOFFSET(_b, _g, _nbch, _ngrid, _mat_off) (_b * _mat_off[_ngrid] + _mat_off[_g])
261: #else
262: #define LAND_PACK_IDX(_b, _g) (_g * ctx->batch_sz + _b)
263: #define LAND_MOFFSET(_b, _g, _nbch, _ngrid, _mat_off) (_nbch * _mat_off[_g] + _b * (_mat_off[_g + 1] - _mat_off[_g]))
264: #endif
266: /*S
267: pointInterpolationP4est - One entry in the reduced quadrature-point map used by the `DMPLEX` Landau collision operator; pairs a global Landau matrix index with the weight that scales the contribution of the corresponding quadrature point
269: Level: developer
271: Note:
272: These records are arrays inside `P4estVertexMaps` and describe how multiple coincident quadrature points produced by the p4est-based AMR mesh are combined into a single entry of the Landau Jacobian.
274: .seealso: `LandauIdx`, `LandauCtx`, `LandauStaticData`, `DMPlexLandauCreateVelocitySpace()`
275: S*/
276: typedef struct {
277: PetscReal scale;
278: LandauIdx gid; // Landau matrix index (<10,000)
279: } pointInterpolationP4est;
280: typedef struct _lP4estVertexMaps {
281: LandauIdx (*gIdx)[LANDAU_MAX_SPECIES][LANDAU_MAX_NQND]; // #elems * LANDAU_MAX_NQND
282: LandauIdx num_elements;
283: LandauIdx num_reduced;
284: LandauIdx num_face; // (Q or Q^2 for 3D)
285: LandauDeviceType deviceType;
286: PetscInt Nf;
287: pointInterpolationP4est (*c_maps)[LANDAU_MAX_Q_FACE];
288: struct _lP4estVertexMaps *d_self;
289: void *vp1, *vp2, *vp3;
290: PetscInt numgrids;
291: } P4estVertexMaps;
293: #if defined(PETSC_HAVE_KOKKOS)
294: PETSC_EXTERN PetscErrorCode LandauKokkosJacobian(DM[], const PetscInt, const PetscInt, const PetscInt, const PetscInt, const PetscInt[], PetscReal[], PetscScalar[], const PetscScalar[], const LandauStaticData *, const PetscReal, const PetscLogEvent[], const PetscInt[], const PetscInt[], Mat[], Mat);
295: PETSC_EXTERN PetscErrorCode LandauKokkosCreateMatMaps(P4estVertexMaps *, pointInterpolationP4est (*)[LANDAU_MAX_Q_FACE], PetscInt[], PetscInt);
296: PETSC_EXTERN PetscErrorCode LandauKokkosDestroyMatMaps(P4estVertexMaps *, PetscInt);
297: PETSC_EXTERN PetscErrorCode LandauKokkosStaticDataSet(DM, const PetscInt, const PetscInt, const PetscInt, const PetscInt, PetscInt[], PetscInt[], PetscInt[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], LandauStaticData *);
298: PETSC_EXTERN PetscErrorCode LandauKokkosStaticDataClear(LandauStaticData *);
299: #endif