Actual source code: land_kernel.h
petsc-3.14.6 2021-03-30
1: #include "./land_tensors.h"
3: /* landau_inner_integral() */
4: /* Compute g2 and g3 for element, assemble into eleme matrix */
5: PETSC_DEVICE_FUNC_DECL void
6: landau_inner_integral(const PetscInt myQi, const PetscInt qi_inc, const PetscInt mySubBlk, const PetscInt nSubBlks, const PetscInt ip_start, const PetscInt ip_end, const PetscInt ip_stride, /* decomposition args, not discretization */
7: const PetscInt jpidx, const PetscInt Nf, const PetscInt dim, const PetscReal * const IPDataGlobal, const PetscReal wiGlobal[], const PetscReal invJj[],
8: const PetscReal nu_alpha[], const PetscReal nu_beta[], const PetscReal invMass[], const PetscReal Eq_m[], PetscBool quarter3DDomain,
9: const PetscInt Nq, const PetscInt Nb, const PetscInt qj_start, const PetscInt qj_end, const PetscReal * const BB, const PetscReal * const DD, PetscScalar *elemMat, /* discretization args; local output */
10: PetscReal g2[/* LANDAU_MAX_NQ */][LANDAU_MAX_SUB_THREAD_BLOCKS][LANDAU_MAX_SPECIES][LANDAU_DIM], PetscReal g3[/* LANDAU_MAX_NQ */][LANDAU_MAX_SUB_THREAD_BLOCKS][LANDAU_MAX_SPECIES][LANDAU_DIM][LANDAU_DIM] /* shared memory buffers */
11: , PetscInt myelem)
12: {
13: PetscReal gg2[LANDAU_MAX_SPECIES][LANDAU_DIM],gg3[LANDAU_MAX_SPECIES][LANDAU_DIM][LANDAU_DIM];
14: const PetscInt ipdata_sz = (dim + Nf*(1+dim));
15: PetscInt d,f,d2,dp,d3,fieldB,ipidx,fieldA;
16: const LandauPointData * const fplpt_j = (LandauPointData*)(IPDataGlobal + jpidx*ipdata_sz);
17: const PetscReal * const vj = fplpt_j->crd, wj = wiGlobal[jpidx];
18: // create g2 & g3
19: for (d=0;d<dim;d++) { // clear accumulation data D & K
20: for (f=0;f<Nf;f++) {
21: gg2[f][d] = 0;
22: for (d2=0;d2<dim;d2++) gg3[f][d][d2] = 0;
23: }
24: }
25: for (ipidx = ip_start; ipidx < ip_end; ipidx += ip_stride) {
26: const LandauPointData * const fplpt = (LandauPointData*)(IPDataGlobal + ipidx*ipdata_sz);
27: const LandauFDF * const fdf = &fplpt->fdf[0];
28: const PetscReal wi = wiGlobal[ipidx];
29: #if LANDAU_DIM==2
30: PetscReal Ud[2][2], Uk[2][2];
31: LandauTensor2D(vj, fplpt->crd[0], fplpt->crd[1], Ud, Uk, (ipidx==jpidx) ? 0. : 1.);
32: for (fieldA = 0; fieldA < Nf; ++fieldA) {
33: for (fieldB = 0; fieldB < Nf; ++fieldB) {
34: for (d2 = 0; d2 < 2; ++d2) {
35: for (d3 = 0; d3 < 2; ++d3) {
36: /* K = U * grad(f): g2=e: i,A */
37: gg2[fieldA][d2] += nu_alpha[fieldA]*nu_beta[fieldB] * invMass[fieldB] * Uk[d2][d3] * fdf[fieldB].df[d3] * wi;
38: /* D = -U * (I \kron (fx)): g3=f: i,j,A */
39: gg3[fieldA][d2][d3] -= nu_alpha[fieldA]*nu_beta[fieldB] * invMass[fieldA] * Ud[d2][d3] * fdf[fieldB].f * wi;
40: }
41: }
42: }
43: }
44: #else
45: PetscReal U[3][3];
46: if (!quarter3DDomain) {
47: LandauTensor3D(vj, fplpt->crd[0], fplpt->crd[1], fplpt->crd[2], U, (ipidx==jpidx) ? 0. : 1.);
48: for (fieldA = 0; fieldA < Nf; ++fieldA) {
49: for (fieldB = 0; fieldB < Nf; ++fieldB) {
50: for (d2 = 0; d2 < 3; ++d2) {
51: for (d3 = 0; d3 < 3; ++d3) {
52: /* K = U * grad(f): g2 = e: i,A */
53: gg2[fieldA][d2] += nu_alpha[fieldA]*nu_beta[fieldB] * invMass[fieldB] * U[d2][d3] * fplpt->fdf[fieldB].df[d3] * wi;
54: /* D = -U * (I \kron (fx)): g3 = f: i,j,A */
55: gg3[fieldA][d2][d3] -= nu_alpha[fieldA]*nu_beta[fieldB] * invMass[fieldA] * U[d2][d3] * fplpt->fdf[fieldB].f * wi;
56: }
57: }
58: }
59: }
60: } else {
61: PetscReal lxx[] = {fplpt->crd[0], fplpt->crd[1]}, R[2][2] = {{-1,1},{1,-1}};
62: PetscReal ldf[3*LANDAU_MAX_SPECIES];
63: for (fieldB = 0; fieldB < Nf; ++fieldB) for (d3 = 0; d3 < 3; ++d3) ldf[d3 + fieldB*3] = fplpt->fdf[fieldB].df[d3] * wi * invMass[fieldB];
64: for (dp=0;dp<4;dp++) {
65: LandauTensor3D(vj, lxx[0], lxx[1], fplpt->z, U, (ipidx==jpidx) ? 0. : 1.);
66: for (fieldA = 0; fieldA < Nf; ++fieldA) {
67: for (fieldB = 0; fieldB < Nf; ++fieldB) {
68: for (d2 = 0; d2 < 3; ++d2) {
69: for (d3 = 0; d3 < 3; ++d3) {
70: /* K = U * grad(f): g2 = e: i,A */
71: gg2[fieldA][d2] += nu_alpha[fieldA]*nu_beta[fieldB] * U[d2][d3] * ldf[d3 + fieldB*3];
72: /* D = -U * (I \kron (fx)): g3 = f: i,j,A */
73: gg3[fieldA][d2][d3] -= nu_alpha[fieldA]*nu_beta[fieldB] * invMass[fieldA] * U[d2][d3] * f[fieldB] * wi;
74: }
75: }
76: }
77: }
78: for (d3 = 0; d3 < 2; ++d3) {
79: lxx[d3] *= R[d3][dp%2];
80: for (fieldB = 0; fieldB < Nf; ++fieldB) {
81: ldf[d3 + fieldB*3] *= R[d3][dp%2];
82: }
83: }
84: }
85: }
86: #endif
87: } /* IPs */
88: /* add electric field term once per IP */
89: if (mySubBlk==0) {
90: for (fieldA = 0; fieldA < Nf; ++fieldA) {
91: gg2[fieldA][dim-1] += Eq_m[fieldA];
92: }
93: }
94: //intf("%d %d gg2[1][1]=%g\n",myelem,qj_start,gg2[1][dim-1]);
95: /* Jacobian transform - g2 */
96: for (fieldA = 0; fieldA < Nf; ++fieldA) {
97: for (d = 0; d < dim; ++d) {
98: g2[myQi][mySubBlk][fieldA][d] = 0.0;
99: for (d2 = 0; d2 < dim; ++d2) {
100: g2[myQi][mySubBlk][fieldA][d] += invJj[d*dim+d2]*gg2[fieldA][d2];
101: //printf("\t:g2[%d][%d][%d]=%g\n",fieldA,qj_start,d,g2[myQi][mySubBlk][fieldA][d]);
102: g3[myQi][mySubBlk][fieldA][d][d2] = 0.0;
103: for (d3 = 0; d3 < dim; ++d3) {
104: for (dp = 0; dp < dim; ++dp) {
105: g3[myQi][mySubBlk][fieldA][d][d2] += invJj[d*dim + d3]*gg3[fieldA][d3][dp]*invJj[d2*dim + dp];
106: //printf("\t\t\t:%d %d %d %d g3=%g\n",qj_start,fieldA,d,d2,g3[myQi][mySubBlk][fieldA][d][d2]);
107: }
108: }
109: g3[myQi][mySubBlk][fieldA][d][d2] *= wj;
110: }
111: g2[myQi][mySubBlk][fieldA][d] *= wj;
112: }
113: }
114: // Synchronize (ensure all the data is available) and sum g2 & g3
115: PETSC_THREAD_SYNC;
116: if (mySubBlk==0) { /* on one thread, sum up g2 & g3 (noop with one subblock) -- could parallelize! */
117: for (fieldA = 0; fieldA < Nf; ++fieldA) {
118: for (d = 0; d < dim; ++d) {
119: for (d3 = 1; d3 < nSubBlks; ++d3) {
120: g2[myQi][0][fieldA][d] += g2[myQi][d3][fieldA][d];
121: for (dp = 0; dp < dim; ++dp) {
122: g3[myQi][0][fieldA][d][dp] += g3[myQi][d3][fieldA][d][dp];
123: }
124: }
125: }
126: }
127: }
129: /* FE matrix construction */
130: PETSC_THREAD_SYNC; // Synchronize (ensure all the data is available) and sum IP matrices
131: {
132: PetscInt fieldA,d,f,qj,qj_0,d2,g,totDim=Nb*Nf;
133: /* assemble - on the diagonal (I,I) */
134: for (fieldA = mySubBlk; fieldA < Nf ; fieldA += nSubBlks) {
135: for (f = myQi; f < Nb ; f += qi_inc) { /* vectorizing here, maybe */
136: const PetscInt i = fieldA*Nb + f; /* Element matrix row */
137: for (g = 0; g < Nb; ++g) {
138: const PetscInt j = fieldA*Nb + g; /* Element matrix column */
139: const PetscInt fOff = i*totDim + j;
140: for (qj = qj_start, qj_0 = 0 ; qj < qj_end ; qj++, qj_0++) {
141: const PetscReal *BJq = &BB[qj*Nb], *DIq = &DD[qj*Nb*dim];
142: for (d = 0; d < dim; ++d) {
143: elemMat[fOff] += DIq[f*dim+d]*g2[qj_0][0][fieldA][d]*BJq[g];
144: //intf("\tmat[%d %d %d %d %d]=%g D[%d]=%g g2[%d][%d][%d]=%g B=%g\n", print, fOff,fieldA,qj,d, elemMat[fOff],f*dim+d,DIq[f*dim+d],fieldA,qj,d,g2[qj_0][0][fieldA][d],BJq[g]);
145: for (d2 = 0; d2 < dim; ++d2) {
146: elemMat[fOff] += DIq[f*dim + d]*g3[qj_0][0][fieldA][d][d2]*DIq[g*dim + d2];
147: }
148: }
149: }
150: }
151: }
152: }
153: }
154: }