Actual source code: ex62.c
petsc-3.3-p7 2013-05-11
1: static char help[] = "Stokes Problem in 2d and 3d with simplicial finite elements.\n\
2: We solve the Stokes problem in a rectangular\n\
3: domain, using a parallel unstructured mesh (DMCOMPLEX) to discretize it.\n\n\n";
5: /*
6: The isoviscous Stokes problem, which we discretize using the finite
7: element method on an unstructured mesh. The weak form equations are
9: < \nabla v, \nabla u + {\nabla u}^T > - < \nabla\cdot v, p > + < v, f > = 0
10: < q, \nabla\cdot v > = 0
12: We start with homogeneous Dirichlet conditions. We will expand this as the set
13: of test problems is developed.
15: Discretization:
17: We use a Python script to generate a tabulation of the finite element basis
18: functions at quadrature points, which we put in a C header file. The generic
19: command would be:
21: bin/pythonscripts/PetscGenerateFEMQuadrature.py dim order dim 1 laplacian dim order 1 1 gradient src/snes/examples/tutorials/ex62.h
23: We can currently generate an arbitrary order Lagrange element. The underlying
24: FIAT code is capable of handling more exotic elements, but these have not been
25: tested with this code.
27: Field Data:
29: Sieve data is organized by point, and the closure operation just stacks up the
30: data from each sieve point in the closure. Thus, for a P_2-P_1 Stokes element, we
31: have
33: cl{e} = {f e_0 e_1 e_2 v_0 v_1 v_2}
34: x = [u_{e_0} v_{e_0} u_{e_1} v_{e_1} u_{e_2} v_{e_2} u_{v_0} v_{v_0} p_{v_0} u_{v_1} v_{v_1} p_{v_1} u_{v_2} v_{v_2} p_{v_2}]
36: The problem here is that we would like to loop over each field separately for
37: integration. Therefore, the closure visitor in DMComplexVecGetClosure() reorders
38: the data so that each field is contiguous
40: x' = [u_{e_0} v_{e_0} u_{e_1} v_{e_1} u_{e_2} v_{e_2} u_{v_0} v_{v_0} u_{v_1} v_{v_1} u_{v_2} v_{v_2} p_{v_0} p_{v_1} p_{v_2}]
42: Likewise, DMComplexVecSetClosure() takes data partitioned by field, and correctly
43: puts it into the Sieve ordering.
45: Next Steps:
47: - Refine and show convergence of correct order automatically (use femTest.py)
48: - Fix InitialGuess for arbitrary disc (means making dual application work again)
49: - Redo slides from GUCASTutorial for this new example
50: - Sparsify Jacobian
51: - How do we get sparsity? I think by chopping up elemMat into blocks, and setting individual blocks
52: - Maybe we just have MatSetClosure() handle this by ignoring blocks which do not interact
54: Possible new examples:
56: - Hexahedral Stokes example
57: - Improve DMDA conversion to get edges
58: - A Finite Volume problem
59: - Make new SNES F90 example that solves two-domain Laplace with different coefficient, reads from Exodus file
60: */
62: #include <petscdmcomplex.h>
63: #include <petscsnes.h>
65: /*------------------------------------------------------------------------------
66: This code can be generated using 'bin/pythonscripts/PetscGenerateFEMQuadrature.py dim order dim 1 laplacian dim order 1 1 gradient src/snes/examples/tutorials/ex62.h'
67: -----------------------------------------------------------------------------*/
68: #include "ex62.h"
70: const PetscInt numFields = 2;
71: const PetscInt numComponents = NUM_BASIS_COMPONENTS_0+NUM_BASIS_COMPONENTS_1;
73: typedef enum {NEUMANN, DIRICHLET} BCType;
74: typedef enum {RUN_FULL, RUN_TEST} RunType;
76: typedef struct {
77: DM dm; /* REQUIRED in order to use SNES evaluation functions */
78: PetscInt debug; /* The debugging level */
79: PetscMPIInt rank; /* The process rank */
80: PetscMPIInt numProcs; /* The number of processes */
81: RunType runType; /* Whether to run tests, or solve the full problem */
82: PetscBool jacobianMF; /* Whether to calculate the Jacobian action on the fly */
83: PetscLogEvent createMeshEvent, residualEvent, jacobianEvent, integrateResCPUEvent, integrateJacCPUEvent, integrateJacActionCPUEvent;
84: PetscBool showInitial, showResidual, showJacobian, showSolution;
85: /* Domain and mesh definition */
86: PetscInt dim; /* The topological mesh dimension */
87: PetscBool interpolate; /* Generate intermediate mesh elements */
88: PetscReal refinementLimit; /* The largest allowable cell volume */
89: char partitioner[2048]; /* The graph partitioner */
90: /* Element quadrature */
91: PetscQuadrature q[/*numFields*/2]; /* C89 Sucks Sucks Sucks Sucks */
92: /* GPU partitioning */
93: PetscInt numBatches; /* The number of cell batches per kernel */
94: PetscInt numBlocks; /* The number of concurrent blocks per kernel */
95: /* Problem definition */
96: /* C89 Sucks Sucks Sucks Sucks: numFields = 2, numComponents = NUM_BASIS_COMPONENTS_0+NUM_BASIS_COMPONENTS_1 */
97: void (*f0Funcs[2])(PetscScalar u[], const PetscScalar gradU[], PetscScalar f0[]); /* The f_0 functions f0_u(x,y,z), and f0_p(x,y,z) */
98: void (*f1Funcs[2])(PetscScalar u[], const PetscScalar gradU[], PetscScalar f1[]); /* The f_1 functions f1_u(x,y,z), and f1_p(x,y,z) */
99: void (*g0Funcs[2*2])(PetscScalar u[], const PetscScalar gradU[], PetscScalar g0[]); /* The g_0 functions g0_uu(x,y,z), g0_up(x,y,z), g0_pu(x,y,z), and g0_pp(x,y,z) */
100: void (*g1Funcs[2*2])(PetscScalar u[], const PetscScalar gradU[], PetscScalar g1[]); /* The g_1 functions g1_uu(x,y,z), g1_up(x,y,z), g1_pu(x,y,z), and g1_pp(x,y,z) */
101: void (*g2Funcs[2*2])(PetscScalar u[], const PetscScalar gradU[], PetscScalar g2[]); /* The g_2 functions g2_uu(x,y,z), g2_up(x,y,z), g2_pu(x,y,z), and g2_pp(x,y,z) */
102: void (*g3Funcs[2*2])(PetscScalar u[], const PetscScalar gradU[], PetscScalar g3[]); /* The g_3 functions g3_uu(x,y,z), g3_up(x,y,z), g3_pu(x,y,z), and g3_pp(x,y,z) */
103: PetscScalar (*exactFuncs[NUM_BASIS_COMPONENTS_0+NUM_BASIS_COMPONENTS_1])(const PetscReal x[]); /* The exact solution function u(x,y,z), v(x,y,z), and p(x,y,z) */
104: BCType bcType; /* The type of boundary conditions */
105: } AppCtx;
107: typedef struct {
108: DM dm;
109: Vec u; /* The base vector for the Jacbobian action J(u) x */
110: Mat J; /* Preconditioner for testing */
111: AppCtx *user;
112: } JacActionCtx;
114: PetscScalar zero(const PetscReal coords[]) {
115: return 0.0;
116: }
118: /*
119: In 2D we use exact solution:
121: u = x^2 + y^2
122: v = 2 x^2 - 2xy
123: p = x + y - 1
124: f_x = f_y = 3
126: so that
128: -\Delta u + \nabla p + f = <-4, -4> + <1, 1> + <3, 3> = 0
129: \nabla \cdot u = 2x - 2x = 0
130: */
131: PetscScalar quadratic_u_2d(const PetscReal x[]) {
132: return x[0]*x[0] + x[1]*x[1];
133: };
135: PetscScalar quadratic_v_2d(const PetscReal x[]) {
136: return 2.0*x[0]*x[0] - 2.0*x[0]*x[1];
137: };
139: PetscScalar linear_p_2d(const PetscReal x[]) {
140: return x[0] + x[1] - 1.0;
141: };
143: void f0_u(PetscScalar u[], const PetscScalar gradU[], PetscScalar f0[]) {
144: const PetscInt Ncomp = NUM_BASIS_COMPONENTS_0;
145: PetscInt comp;
147: for(comp = 0; comp < Ncomp; ++comp) {
148: f0[comp] = 3.0;
149: }
150: }
152: /* gradU[comp*dim+d] = {u_x, u_y, v_x, v_y} or {u_x, u_y, u_z, v_x, v_y, v_z, w_x, w_y, w_z}
153: u[Ncomp] = {p} */
154: void f1_u(PetscScalar u[], const PetscScalar gradU[], PetscScalar f1[]) {
155: const PetscInt dim = SPATIAL_DIM_0;
156: const PetscInt Ncomp = NUM_BASIS_COMPONENTS_0;
157: PetscInt comp, d;
159: for(comp = 0; comp < Ncomp; ++comp) {
160: for(d = 0; d < dim; ++d) {
161: /* f1[comp*dim+d] = 0.5*(gradU[comp*dim+d] + gradU[d*dim+comp]); */
162: f1[comp*dim+d] = gradU[comp*dim+d];
163: }
164: f1[comp*dim+comp] -= u[Ncomp];
165: }
166: }
168: /* gradU[comp*dim+d] = {u_x, u_y, v_x, v_y} or {u_x, u_y, u_z, v_x, v_y, v_z, w_x, w_y, w_z} */
169: void f0_p(PetscScalar u[], const PetscScalar gradU[], PetscScalar f0[]) {
170: const PetscInt dim = SPATIAL_DIM_0;
171: PetscInt d;
173: f0[0] = 0.0;
174: for(d = 0; d < dim; ++d) {
175: f0[0] += gradU[d*dim+d];
176: }
177: }
179: void f1_p(PetscScalar u[], const PetscScalar gradU[], PetscScalar f1[]) {
180: const PetscInt dim = SPATIAL_DIM_0;
181: PetscInt d;
183: for(d = 0; d < dim; ++d) {
184: f1[d] = 0.0;
185: }
186: }
188: /* < q, \nabla\cdot v >
189: NcompI = 1, NcompJ = dim */
190: void g1_pu(PetscScalar u[], const PetscScalar gradU[], PetscScalar g1[]) {
191: const PetscInt dim = SPATIAL_DIM_0;
192: PetscInt d;
194: for(d = 0; d < dim; ++d) {
195: g1[d*dim+d] = 1.0; /* \frac{\partial\phi^{u_d}}{\partial x_d} */
196: }
197: }
199: /* -< \nabla\cdot v, p >
200: NcompI = dim, NcompJ = 1 */
201: void g2_up(PetscScalar u[], const PetscScalar gradU[], PetscScalar g2[]) {
202: const PetscInt dim = SPATIAL_DIM_0;
203: PetscInt d;
205: for(d = 0; d < dim; ++d) {
206: g2[d*dim+d] = -1.0; /* \frac{\partial\psi^{u_d}}{\partial x_d} */
207: }
208: }
210: /* < \nabla v, \nabla u + {\nabla u}^T >
211: This just gives \nabla u, give the perdiagonal for the transpose */
212: void g3_uu(PetscScalar u[], const PetscScalar gradU[], PetscScalar g3[]) {
213: const PetscInt dim = SPATIAL_DIM_0;
214: const PetscInt Ncomp = NUM_BASIS_COMPONENTS_0;
215: PetscInt compI, d;
217: for(compI = 0; compI < Ncomp; ++compI) {
218: for(d = 0; d < dim; ++d) {
219: g3[((compI*Ncomp+compI)*dim+d)*dim+d] = 1.0;
220: }
221: }
222: }
224: /*
225: In 3D we use exact solution:
227: u = x^2 + y^2
228: v = y^2 + z^2
229: w = x^2 + y^2 - 2(x+y)z
230: p = x + y + z - 3/2
231: f_x = f_y = f_z = 3
233: so that
235: -\Delta u + \nabla p + f = <-4, -4, -4> + <1, 1, 1> + <3, 3, 3> = 0
236: \nabla \cdot u = 2x + 2y - 2(x + y) = 0
237: */
238: PetscScalar quadratic_u_3d(const PetscReal x[]) {
239: return x[0]*x[0] + x[1]*x[1];
240: };
242: PetscScalar quadratic_v_3d(const PetscReal x[]) {
243: return x[1]*x[1] + x[2]*x[2];
244: };
246: PetscScalar quadratic_w_3d(const PetscReal x[]) {
247: return x[0]*x[0] + x[1]*x[1] - 2.0*(x[0] + x[1])*x[2];
248: };
250: PetscScalar linear_p_3d(const PetscReal x[]) {
251: return x[0] + x[1] + x[2] - 1.5;
252: };
256: PetscErrorCode ProcessOptions(MPI_Comm comm, AppCtx *options) {
257: const char *bcTypes[2] = {"neumann", "dirichlet"};
258: const char *runTypes[2] = {"full", "test"};
259: PetscInt bc, run;
263: options->debug = 0;
264: options->runType = RUN_FULL;
265: options->dim = 2;
266: options->interpolate = PETSC_FALSE;
267: options->refinementLimit = 0.0;
268: options->bcType = DIRICHLET;
269: options->numBatches = 1;
270: options->numBlocks = 1;
271: options->jacobianMF = PETSC_FALSE;
272: options->showResidual = PETSC_FALSE;
273: options->showResidual = PETSC_FALSE;
274: options->showJacobian = PETSC_FALSE;
275: options->showSolution = PETSC_TRUE;
277: MPI_Comm_size(comm, &options->numProcs);
278: MPI_Comm_rank(comm, &options->rank);
279: PetscOptionsBegin(comm, "", "Stokes Problem Options", "DMCOMPLEX");
280: PetscOptionsInt("-debug", "The debugging level", "ex62.c", options->debug, &options->debug, PETSC_NULL);
281: run = options->runType;
282: PetscOptionsEList("-run_type", "The run type", "ex62.c", runTypes, 2, runTypes[options->runType], &run, PETSC_NULL);
283: options->runType = (RunType) run;
284: PetscOptionsInt("-dim", "The topological mesh dimension", "ex62.c", options->dim, &options->dim, PETSC_NULL);
285: PetscOptionsBool("-interpolate", "Generate intermediate mesh elements", "ex62.c", options->interpolate, &options->interpolate, PETSC_NULL);
286: PetscOptionsReal("-refinement_limit", "The largest allowable cell volume", "ex62.c", options->refinementLimit, &options->refinementLimit, PETSC_NULL);
287: PetscStrcpy(options->partitioner, "chaco");
288: PetscOptionsString("-partitioner", "The graph partitioner", "pflotran.cxx", options->partitioner, options->partitioner, 2048, PETSC_NULL);
289: bc = options->bcType;
290: PetscOptionsEList("-bc_type","Type of boundary condition","ex62.c",bcTypes,2,bcTypes[options->bcType],&bc,PETSC_NULL);
291: options->bcType = (BCType) bc;
292: PetscOptionsInt("-gpu_batches", "The number of cell batches per kernel", "ex62.c", options->numBatches, &options->numBatches, PETSC_NULL);
293: PetscOptionsInt("-gpu_blocks", "The number of concurrent blocks per kernel", "ex62.c", options->numBlocks, &options->numBlocks, PETSC_NULL);
294: PetscOptionsBool("-jacobian_mf", "Calculate the action of the Jacobian on the fly", "ex62.c", options->jacobianMF, &options->jacobianMF, PETSC_NULL);
295: PetscOptionsBool("-show_initial", "Output the initial guess for verification", "ex62.c", options->showInitial, &options->showInitial, PETSC_NULL);
296: PetscOptionsBool("-show_residual", "Output the residual for verification", "ex62.c", options->showResidual, &options->showResidual, PETSC_NULL);
297: PetscOptionsBool("-show_jacobian", "Output the Jacobian for verification", "ex62.c", options->showJacobian, &options->showJacobian, PETSC_NULL);
298: PetscOptionsBool("-show_solution", "Output the solution for verification", "ex62.c", options->showSolution, &options->showSolution, PETSC_NULL);
299: PetscOptionsEnd();
301: PetscLogEventRegister("CreateMesh", DM_CLASSID, &options->createMeshEvent);
302: PetscLogEventRegister("Residual", SNES_CLASSID, &options->residualEvent);
303: PetscLogEventRegister("IntegResBatchCPU", SNES_CLASSID, &options->integrateResCPUEvent);
304: PetscLogEventRegister("IntegJacBatchCPU", SNES_CLASSID, &options->integrateJacCPUEvent);
305: PetscLogEventRegister("IntegJacActBatchCPU", SNES_CLASSID, &options->integrateJacActionCPUEvent);
306: PetscLogEventRegister("Jacobian", SNES_CLASSID, &options->jacobianEvent);
307: return(0);
308: };
312: PetscErrorCode VecChop(Vec v, PetscReal tol)
313: {
314: PetscScalar *a;
315: PetscInt n, i;
319: VecGetLocalSize(v, &n);
320: VecGetArray(v, &a);
321: for(i = 0; i < n; ++i) {
322: if (PetscAbsScalar(a[i]) < tol) a[i] = 0.0;
323: }
324: VecRestoreArray(v, &a);
325: return(0);
326: }
330: PetscErrorCode MatChop(Mat A, PetscReal tol)
331: {
332: PetscScalar *newVals;
333: PetscInt *newCols;
334: PetscInt rStart, rEnd, numRows, maxRows, r, colMax = 0;
338: MatGetOwnershipRange(A, &rStart, &rEnd);
339: for(r = rStart; r < rEnd; ++r) {
340: PetscInt ncols;
342: MatGetRow(A, r, &ncols, PETSC_NULL, PETSC_NULL);
343: colMax = PetscMax(colMax, ncols);
344: MatRestoreRow(A, r, &ncols, PETSC_NULL, PETSC_NULL);
345: }
346: numRows = rEnd - rStart;
347: MPI_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PETSC_COMM_WORLD);
348: PetscMalloc2(colMax,PetscInt,&newCols,colMax,PetscScalar,&newVals);
349: for(r = rStart; r < rStart+maxRows; ++r) {
350: const PetscScalar *vals;
351: const PetscInt *cols;
352: PetscInt ncols, c;
354: if (r < rEnd) {
355: MatGetRow(A, r, &ncols, &cols, &vals);
356: for(c = 0; c < ncols; ++c) {
357: newCols[c] = cols[c];
358: newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c];
359: }
360: MatRestoreRow(A, r, &ncols, &cols, &vals);
361: MatSetValues(A, 1, &r, ncols, newCols, newVals, INSERT_VALUES);
362: }
363: MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
364: MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
365: }
366: PetscFree2(newCols,newVals);
367: return(0);
368: }
372: PetscErrorCode CreateMesh(MPI_Comm comm, AppCtx *user, DM *dm)
373: {
374: PetscInt dim = user->dim;
375: PetscBool interpolate = user->interpolate;
376: PetscReal refinementLimit = user->refinementLimit;
377: const char *partitioner = user->partitioner;
381: PetscLogEventBegin(user->createMeshEvent,0,0,0,0);
382: DMComplexCreateBoxMesh(comm, dim, interpolate, dm);
383: {
384: DM refinedMesh = PETSC_NULL;
385: DM distributedMesh = PETSC_NULL;
387: /* Refine mesh using a volume constraint */
388: DMComplexSetRefinementLimit(*dm, refinementLimit);
389: DMRefine(*dm, comm, &refinedMesh);
390: if (refinedMesh) {
391: DMDestroy(dm);
392: *dm = refinedMesh;
393: }
394: /* Distribute mesh over processes */
395: DMComplexDistribute(*dm, partitioner, &distributedMesh);
396: if (distributedMesh) {
397: DMDestroy(dm);
398: *dm = distributedMesh;
399: }
400: }
401: DMSetFromOptions(*dm);
402: PetscLogEventEnd(user->createMeshEvent,0,0,0,0);
403: user->dm = *dm;
404: return(0);
405: }
409: PetscErrorCode SetupQuadrature(AppCtx *user) {
411: user->q[0].numQuadPoints = NUM_QUADRATURE_POINTS_0;
412: user->q[0].quadPoints = points_0;
413: user->q[0].quadWeights = weights_0;
414: user->q[0].numBasisFuncs = NUM_BASIS_FUNCTIONS_0;
415: user->q[0].numComponents = NUM_BASIS_COMPONENTS_0;
416: user->q[0].basis = Basis_0;
417: user->q[0].basisDer = BasisDerivatives_0;
418: user->q[1].numQuadPoints = NUM_QUADRATURE_POINTS_1;
419: user->q[1].quadPoints = points_1;
420: user->q[1].quadWeights = weights_1;
421: user->q[1].numBasisFuncs = NUM_BASIS_FUNCTIONS_1;
422: user->q[1].numComponents = NUM_BASIS_COMPONENTS_1;
423: user->q[1].basis = Basis_1;
424: user->q[1].basisDer = BasisDerivatives_1;
425: return(0);
426: }
430: /*
431: There is a problem here with uninterpolated meshes. The index in numDof[] is not dimension in this case,
432: but sieve depth.
433: */
434: PetscErrorCode SetupSection(DM dm, AppCtx *user) {
435: PetscSection section;
436: PetscInt dim = user->dim;
437: PetscInt numBC = 0;
438: PetscInt numComp[/*numFields*/2] = {NUM_BASIS_COMPONENTS_0, NUM_BASIS_COMPONENTS_1}; /* C89 Sucks Sucks Sucks Sucks */
439: PetscInt bcFields[1] = {0};
440: IS bcPoints[1] = {PETSC_NULL};
441: PetscInt numDof[/*numFields*/2*(SPATIAL_DIM_0+1)]; /* C89 Sucks Sucks Sucks Sucks */
442: PetscInt f, d;
446: if (dim != SPATIAL_DIM_0) SETERRQ2(((PetscObject) dm)->comm, PETSC_ERR_ARG_SIZ, "Spatial dimension %d should be %d", dim, SPATIAL_DIM_0);
447: if (dim != SPATIAL_DIM_1) SETERRQ2(((PetscObject) dm)->comm, PETSC_ERR_ARG_SIZ, "Spatial dimension %d should be %d", dim, SPATIAL_DIM_1);
448: for(d = 0; d <= dim; ++d) {
449: numDof[0*(dim+1)+d] = numDof_0[d];
450: numDof[1*(dim+1)+d] = numDof_1[d];
451: }
452: for(f = 0; f < numFields; ++f) {
453: for(d = 1; d < dim; ++d) {
454: if ((numDof[f*(dim+1)+d] > 0) && !user->interpolate) SETERRQ(((PetscObject) dm)->comm, PETSC_ERR_ARG_WRONG, "Mesh must be interpolated when unknowns are specified on edges or faces.");
455: }
456: }
457: if (user->bcType == DIRICHLET) {
458: numBC = 1;
459: DMComplexGetStratumIS(dm, "marker", 1, &bcPoints[0]);
460: }
461: DMComplexCreateSection(dm, dim, numFields, numComp, numDof, numBC, bcFields, bcPoints, §ion);
462: PetscSectionSetFieldName(section, 0, "velocity");
463: PetscSectionSetFieldName(section, 1, "pressure");
464: DMSetDefaultSection(dm, section);
465: if (user->bcType == DIRICHLET) {
466: ISDestroy(&bcPoints[0]);
467: }
468: return(0);
469: }
473: PetscErrorCode SetupExactSolution(AppCtx *user) {
475: user->f0Funcs[0] = f0_u;
476: user->f0Funcs[1] = f0_p;
477: user->f1Funcs[0] = f1_u;
478: user->f1Funcs[1] = f1_p;
479: user->g0Funcs[0] = PETSC_NULL;
480: user->g0Funcs[1] = PETSC_NULL;
481: user->g0Funcs[2] = PETSC_NULL;
482: user->g0Funcs[3] = PETSC_NULL;
483: user->g1Funcs[0] = PETSC_NULL;
484: user->g1Funcs[1] = PETSC_NULL;
485: user->g1Funcs[2] = g1_pu; /* < q, \nabla\cdot v > */
486: user->g1Funcs[3] = PETSC_NULL;
487: user->g2Funcs[0] = PETSC_NULL;
488: user->g2Funcs[1] = g2_up; /* < \nabla\cdot v, p > */
489: user->g2Funcs[2] = PETSC_NULL;
490: user->g2Funcs[3] = PETSC_NULL;
491: user->g3Funcs[0] = g3_uu; /* < \nabla v, \nabla u + {\nabla u}^T > */
492: user->g3Funcs[1] = PETSC_NULL;
493: user->g3Funcs[2] = PETSC_NULL;
494: user->g3Funcs[3] = PETSC_NULL;
495: switch(user->dim) {
496: case 2:
497: user->exactFuncs[0] = quadratic_u_2d;
498: user->exactFuncs[1] = quadratic_v_2d;
499: user->exactFuncs[2] = linear_p_2d;
500: break;
501: case 3:
502: user->exactFuncs[0] = quadratic_u_3d;
503: user->exactFuncs[1] = quadratic_v_3d;
504: user->exactFuncs[2] = quadratic_w_3d;
505: user->exactFuncs[3] = linear_p_3d;
506: break;
507: default:
508: SETERRQ1(PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Invalid dimension %d", user->dim);
509: }
510: return(0);
511: }
515: PetscErrorCode ComputeError(Vec X, PetscReal *error, AppCtx *user) {
516: PetscScalar (**exactFuncs)(const PetscReal []) = user->exactFuncs;
517: const PetscInt debug = user->debug;
518: const PetscInt dim = user->dim;
519: Vec localX;
520: PetscReal *coords, *v0, *J, *invJ, detJ;
521: PetscReal localError;
522: PetscInt cStart, cEnd, c, field, fieldOffset, comp;
523: PetscErrorCode ierr;
526: DMGetLocalVector(user->dm, &localX);
527: DMGlobalToLocalBegin(user->dm, X, INSERT_VALUES, localX);
528: DMGlobalToLocalEnd(user->dm, X, INSERT_VALUES, localX);
529: PetscMalloc4(dim,PetscReal,&coords,dim,PetscReal,&v0,dim*dim,PetscReal,&J,dim*dim,PetscReal,&invJ);
530: DMComplexGetHeightStratum(user->dm, 0, &cStart, &cEnd);
531: for(c = cStart; c < cEnd; ++c) {
532: const PetscScalar *x;
533: PetscReal elemError = 0.0;
535: DMComplexComputeCellGeometry(user->dm, c, v0, J, invJ, &detJ);
536: if (detJ <= 0.0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid determinant %g for element %d", detJ, c);
537: DMComplexVecGetClosure(user->dm, PETSC_NULL, localX, c, &x);
539: for(field = 0, comp = 0, fieldOffset = 0; field < numFields; ++field) {
540: const PetscInt numQuadPoints = user->q[field].numQuadPoints;
541: const PetscReal *quadPoints = user->q[field].quadPoints;
542: const PetscReal *quadWeights = user->q[field].quadWeights;
543: const PetscInt numBasisFuncs = user->q[field].numBasisFuncs;
544: const PetscInt numBasisComps = user->q[field].numComponents;
545: const PetscReal *basis = user->q[field].basis;
546: PetscInt q, d, e, fc, f;
548: if (debug) {
549: char title[1024];
550: PetscSNPrintf(title, 1023, "Solution for Field %d", field);
551: DMPrintCellVector(c, title, numBasisFuncs*numBasisComps, &x[fieldOffset]);
552: }
553: for(q = 0; q < numQuadPoints; ++q) {
554: for(d = 0; d < dim; d++) {
555: coords[d] = v0[d];
556: for(e = 0; e < dim; e++) {
557: coords[d] += J[d*dim+e]*(quadPoints[q*dim+e] + 1.0);
558: }
559: }
560: for(fc = 0; fc < numBasisComps; ++fc) {
561: const PetscScalar funcVal = (*exactFuncs[comp+fc])(coords);
562: PetscReal interpolant = 0.0;
563: for(f = 0; f < numBasisFuncs; ++f) {
564: const PetscInt fidx = f*numBasisComps+fc;
565: interpolant += x[fieldOffset+fidx]*basis[q*numBasisFuncs*numBasisComps+fidx];
566: }
567: if (debug) {PetscPrintf(PETSC_COMM_SELF, " elem %d field %d error %g\n", c, field, PetscSqr(interpolant - funcVal)*quadWeights[q]*detJ);}
568: elemError += PetscSqr(interpolant - funcVal)*quadWeights[q]*detJ;
569: }
570: }
571: comp += numBasisComps;
572: fieldOffset += numBasisFuncs*numBasisComps;
573: }
574: if (debug) {PetscPrintf(PETSC_COMM_SELF, " elem %d error %g\n", c, elemError);}
575: localError += elemError;
576: }
577: PetscFree4(coords,v0,J,invJ);
578: DMRestoreLocalVector(user->dm, &localX);
579: MPI_Allreduce(&localError, error, 1, MPIU_REAL, MPI_SUM, PETSC_COMM_WORLD);
580: *error = PetscSqrtReal(*error);
581: return(0);
582: }
586: /*
587: DMComputeVertexFunction - This calls a function with the coordinates of each vertex, and stores the result in a vector.
589: Input Parameters:
590: + dm - The DM
591: . mode - The insertion mode for values
592: . numComp - The number of components (functions)
593: - func - The coordinate functions to evaluate
595: Output Parameter:
596: . X - vector
597: */
598: PetscErrorCode DMComputeVertexFunction(DM dm, InsertMode mode, Vec X, PetscInt numComp, PetscScalar (**funcs)(const PetscReal []), AppCtx *user)
599: {
600: Vec localX, coordinates;
601: PetscSection section, cSection;
602: PetscInt vStart, vEnd, v, c;
603: PetscScalar *values;
607: DMGetLocalVector(dm, &localX);
608: DMComplexGetDepthStratum(dm, 0, &vStart, &vEnd);
609: DMGetDefaultSection(dm, §ion);
610: DMComplexGetCoordinateSection(dm, &cSection);
611: DMComplexGetCoordinateVec(dm, &coordinates);
612: PetscMalloc(numComp * sizeof(PetscScalar), &values);
613: for(v = vStart; v < vEnd; ++v) {
614: PetscScalar *coords;
616: VecGetValuesSection(coordinates, cSection, v, &coords);
617: for(c = 0; c < numComp; ++c) {
618: values[c] = (*funcs[c])(coords);
619: }
620: VecSetValuesSection(localX, section, v, values, mode);
621: }
622: /* Temporary, msut be replaced by a projection on the finite element basis */
623: {
624: PetscScalar *coordsE;
625: PetscInt eStart = 0, eEnd = 0, e, depth, dim;
627: PetscSectionGetDof(cSection, vStart, &dim);
628: DMComplexGetLabelSize(dm, "depth", &depth);
629: --depth;
630: if (depth > 1) {DMComplexGetDepthStratum(dm, 1, &eStart, &eEnd);}
631: PetscMalloc(dim * sizeof(PetscScalar),&coordsE);
632: for(e = eStart; e < eEnd; ++e) {
633: const PetscInt *cone;
634: PetscInt coneSize, d;
635: PetscScalar *coordsA, *coordsB;
637: DMComplexGetConeSize(dm, e, &coneSize);
638: DMComplexGetCone(dm, e, &cone);
639: if (coneSize != 2) SETERRQ2(((PetscObject) dm)->comm, PETSC_ERR_ARG_SIZ, "Cone size %d for point %d should be 2", coneSize, e);
640: VecGetValuesSection(coordinates, cSection, cone[0], &coordsA);
641: VecGetValuesSection(coordinates, cSection, cone[1], &coordsB);
642: for(d = 0; d < dim; ++d) {
643: coordsE[d] = 0.5*(coordsA[d] + coordsB[d]);
644: }
645: for(c = 0; c < numComp; ++c) {
646: values[c] = (*funcs[c])(coordsE);
647: }
648: VecSetValuesSection(localX, section, e, values, mode);
649: }
650: PetscFree(coordsE);
651: }
653: PetscFree(values);
654: if (user->showInitial) {
655: PetscInt p;
657: PetscPrintf(PETSC_COMM_WORLD, "Local function\n");
658: for(p = 0; p < user->numProcs; ++p) {
659: if (p == user->rank) {VecView(localX, PETSC_VIEWER_STDOUT_SELF);}
660: PetscBarrier((PetscObject) dm);
661: }
662: }
663: DMLocalToGlobalBegin(dm, localX, mode, X);
664: DMLocalToGlobalEnd(dm, localX, mode, X);
665: DMRestoreLocalVector(dm, &localX);
666: #if 0
667: const PetscInt localDof = this->_mesh->sizeWithBC(s, *cells->begin());
668: PetscReal detJ;
670: PetscMalloc(localDof * sizeof(PetscScalar), &values);
671: PetscMalloc2(dim,PetscReal,&v0,dim*dim,PetscReal,&J);
672: ALE::ISieveVisitor::PointRetriever<PETSC_MESH_TYPE::sieve_type> pV((int) pow(this->_mesh->getSieve()->getMaxConeSize(), dim+1)+1, true);
674: for(PetscInt c = cStart; c < cEnd; ++c) {
675: ALE::ISieveTraversal<PETSC_MESH_TYPE::sieve_type>::orientedClosure(*this->_mesh->getSieve(), c, pV);
676: const PETSC_MESH_TYPE::point_type *oPoints = pV.getPoints();
677: const int oSize = pV.getSize();
678: int v = 0;
680: DMComplexComputeCellGeometry(dm, c, v0, J, PETSC_NULL, &detJ);
681: for(PetscInt cl = 0; cl < oSize; ++cl) {
682: const PetscInt fDim;
684: PetscSectionGetDof(oPoints[cl], &fDim);
685: if (pointDim) {
686: for(PetscInt d = 0; d < fDim; ++d, ++v) {
687: values[v] = (*this->_options.integrate)(v0, J, v, initFunc);
688: }
689: }
690: }
691: DMComplexVecSetClosure(dm, PETSC_NULL, localX, c, values);
692: pV.clear();
693: }
694: PetscFree2(v0,J);
695: PetscFree(values);
696: #endif
697: return(0);
698: }
702: PetscErrorCode CreatePressureNullSpace(DM dm, AppCtx *user, MatNullSpace *nullSpace) {
703: Vec pressure, localP;
707: DMGetGlobalVector(dm, &pressure);
708: DMGetLocalVector(dm, &localP);
709: VecSet(pressure, 0.0);
710: /* Put a constant in for all pressures
711: Could change this to project the constant function onto the pressure space (when that is finished) */
712: {
713: PetscSection section;
714: PetscInt pStart, pEnd, p;
715: PetscScalar *a;
717: DMGetDefaultSection(dm, §ion);
718: PetscSectionGetChart(section, &pStart, &pEnd);
719: VecGetArray(localP, &a);
720: for(p = pStart; p < pEnd; ++p) {
721: PetscInt fDim, off, d;
723: PetscSectionGetFieldDof(section, p, 1, &fDim);
724: PetscSectionGetFieldOffset(section, p, 1, &off);
725: for(d = 0; d < fDim; ++d) {
726: a[off+d] = 1.0;
727: }
728: }
729: VecRestoreArray(localP, &a);
730: }
731: DMLocalToGlobalBegin(dm, localP, INSERT_VALUES, pressure);
732: DMLocalToGlobalEnd(dm, localP, INSERT_VALUES, pressure);
733: DMRestoreLocalVector(dm, &localP);
734: VecNormalize(pressure, PETSC_NULL);
735: if (user->debug) {
736: PetscPrintf(((PetscObject) dm)->comm, "Pressure Null Space\n");
737: VecView(pressure, PETSC_VIEWER_STDOUT_WORLD);
738: }
739: MatNullSpaceCreate(((PetscObject) dm)->comm, PETSC_FALSE, 1, &pressure, nullSpace);
740: DMRestoreGlobalVector(dm, &pressure);
741: return(0);
742: }
746: PetscErrorCode IntegrateResidualBatchCPU(PetscInt Ne, PetscInt numFields, PetscInt field, const PetscScalar coefficients[], const PetscReal jacobianInverses[], const PetscReal jacobianDeterminants[], PetscQuadrature quad[], void (*f0_func)(PetscScalar u[], const PetscScalar gradU[], PetscScalar f0[]), void (*f1_func)(PetscScalar u[], const PetscScalar gradU[], PetscScalar f1[]), PetscScalar elemVec[], AppCtx *user) {
747: const PetscInt debug = user->debug;
748: const PetscInt dim = SPATIAL_DIM_0;
749: PetscInt cOffset = 0;
750: PetscInt eOffset = 0, e;
754: PetscLogEventBegin(user->integrateResCPUEvent,0,0,0,0);
755: for(e = 0; e < Ne; ++e) {
756: const PetscReal detJ = jacobianDeterminants[e];
757: const PetscReal *invJ = &jacobianInverses[e*dim*dim];
758: const PetscInt Nq = quad[field].numQuadPoints;
759: PetscScalar f0[NUM_QUADRATURE_POINTS_0*dim];
760: PetscScalar f1[NUM_QUADRATURE_POINTS_0*dim*dim];
761: PetscInt q, f;
763: if (Nq > NUM_QUADRATURE_POINTS_0) SETERRQ2(PETSC_COMM_WORLD, PETSC_ERR_LIB, "Number of quadrature points %d should be <= %d", Nq, NUM_QUADRATURE_POINTS_0);
764: if (debug > 1) {
765: PetscPrintf(PETSC_COMM_SELF, " detJ: %g\n", detJ);
766: DMPrintCellMatrix(e, "invJ", dim, dim, invJ);
767: }
768: for(q = 0; q < Nq; ++q) {
769: if (debug) {PetscPrintf(PETSC_COMM_SELF, " quad point %d\n", q);}
770: PetscScalar u[dim+1];
771: PetscScalar gradU[dim*(dim+1)];
772: PetscInt fOffset = 0;
773: PetscInt dOffset = cOffset;
774: const PetscInt Ncomp = quad[field].numComponents;
775: const PetscReal *quadWeights = quad[field].quadWeights;
776: PetscInt d, f, i;
778: for(d = 0; d <= dim; ++d) {u[d] = 0.0;}
779: for(d = 0; d < dim*(dim+1); ++d) {gradU[d] = 0.0;}
780: for(f = 0; f < numFields; ++f) {
781: const PetscInt Nb = quad[f].numBasisFuncs;
782: const PetscInt Ncomp = quad[f].numComponents;
783: const PetscReal *basis = quad[f].basis;
784: const PetscReal *basisDer = quad[f].basisDer;
785: PetscInt b, comp;
787: for(b = 0; b < Nb; ++b) {
788: for(comp = 0; comp < Ncomp; ++comp) {
789: const PetscInt cidx = b*Ncomp+comp;
790: PetscScalar realSpaceDer[dim];
791: PetscInt d, g;
793: u[fOffset+comp] += coefficients[dOffset+cidx]*basis[q*Nb*Ncomp+cidx];
794: for(d = 0; d < dim; ++d) {
795: realSpaceDer[d] = 0.0;
796: for(g = 0; g < dim; ++g) {
797: realSpaceDer[d] += invJ[g*dim+d]*basisDer[(q*Nb*Ncomp+cidx)*dim+g];
798: }
799: gradU[(fOffset+comp)*dim+d] += coefficients[dOffset+cidx]*realSpaceDer[d];
800: }
801: }
802: }
803: if (debug > 1) {
804: PetscInt d;
805: for(comp = 0; comp < Ncomp; ++comp) {
806: PetscPrintf(PETSC_COMM_SELF, " u[%d,%d]: %g\n", f, comp, u[fOffset+comp]);
807: for(d = 0; d < dim; ++d) {
808: PetscPrintf(PETSC_COMM_SELF, " gradU[%d,%d]_%c: %g\n", f, comp, 'x'+d, gradU[(fOffset+comp)*dim+d]);
809: }
810: }
811: }
812: fOffset += Ncomp;
813: dOffset += Nb*Ncomp;
814: }
816: f0_func(u, gradU, &f0[q*Ncomp]);
817: for(i = 0; i < Ncomp; ++i) {
818: f0[q*Ncomp+i] *= detJ*quadWeights[q];
819: }
820: f1_func(u, gradU, &f1[q*Ncomp*dim]);
821: for(i = 0; i < Ncomp*dim; ++i) {
822: f1[q*Ncomp*dim+i] *= detJ*quadWeights[q];
823: }
824: if (debug > 1) {
825: PetscInt c,d;
826: for(c = 0; c < Ncomp; ++c) {
827: PetscPrintf(PETSC_COMM_SELF, " f0[%d]: %g\n", c, f0[q*Ncomp+c]);
828: for(d = 0; d < dim; ++d) {
829: PetscPrintf(PETSC_COMM_SELF, " f1[%d]_%c: %g\n", c, 'x'+d, f1[(q*Ncomp + c)*dim+d]);
830: }
831: }
832: }
833: if (q == Nq-1) {cOffset = dOffset;}
834: }
835: for(f = 0; f < numFields; ++f) {
836: const PetscInt Nq = quad[f].numQuadPoints;
837: const PetscInt Nb = quad[f].numBasisFuncs;
838: const PetscInt Ncomp = quad[f].numComponents;
839: const PetscReal *basis = quad[f].basis;
840: const PetscReal *basisDer = quad[f].basisDer;
841: PetscInt b, comp;
843: if (f == field) {
844: for(b = 0; b < Nb; ++b) {
845: for(comp = 0; comp < Ncomp; ++comp) {
846: const PetscInt cidx = b*Ncomp+comp;
847: PetscInt q;
849: elemVec[eOffset+cidx] = 0.0;
850: for(q = 0; q < Nq; ++q) {
851: PetscScalar realSpaceDer[dim];
852: PetscInt d, g;
854: elemVec[eOffset+cidx] += basis[q*Nb*Ncomp+cidx]*f0[q*Ncomp+comp];
855: for(d = 0; d < dim; ++d) {
856: realSpaceDer[d] = 0.0;
857: for(g = 0; g < dim; ++g) {
858: realSpaceDer[d] += invJ[g*dim+d]*basisDer[(q*Nb*Ncomp+cidx)*dim+g];
859: }
860: elemVec[eOffset+cidx] += realSpaceDer[d]*f1[(q*Ncomp+comp)*dim+d];
861: }
862: }
863: }
864: }
865: if (debug > 1) {
866: PetscInt b, comp;
868: for(b = 0; b < Nb; ++b) {
869: for(comp = 0; comp < Ncomp; ++comp) {
870: PetscPrintf(PETSC_COMM_SELF, " elemVec[%d,%d]: %g\n", b, comp, elemVec[eOffset+b*Ncomp+comp]);
871: }
872: }
873: }
874: }
875: eOffset += Nb*Ncomp;
876: }
877: }
878: /* PetscLogFlops((((2+(2+2*dim)*dim)*Ncomp*Nb+(2+2)*dim*Ncomp)*Nq + (2+2*dim)*dim*Nq*Ncomp*Nb)*Ne); */
879: PetscLogEventEnd(user->integrateResCPUEvent,0,0,0,0);
880: return(0);
881: };
885: /*
886: FormFunctionLocal - Form the local residual F from the local input X
888: Input Parameters:
889: + dm - The mesh
890: . X - Local input vector
891: - user - The user context
893: Output Parameter:
894: . F - Local output vector
896: Note:
897: We form the residual one batch of elements at a time. This allows us to offload work onto an accelerator,
898: like a GPU, or vectorize on a multicore machine.
900: .seealso: FormJacobianLocal()
901: */
902: PetscErrorCode FormFunctionLocal(DM dm, Vec X, Vec F, AppCtx *user)
903: {
904: const PetscInt debug = user->debug;
905: const PetscInt dim = user->dim;
906: PetscReal *coords, *v0, *J, *invJ, *detJ;
907: PetscScalar *elemVec;
908: PetscInt cStart, cEnd, c, field;
909: PetscErrorCode ierr;
912: PetscLogEventBegin(user->residualEvent,0,0,0,0);
913: VecSet(F, 0.0);
914: PetscMalloc3(dim,PetscReal,&coords,dim,PetscReal,&v0,dim*dim,PetscReal,&J);
915: DMComplexGetHeightStratum(dm, 0, &cStart, &cEnd);
916: const PetscInt numCells = cEnd - cStart;
917: PetscInt cellDof = 0;
918: PetscScalar *u;
920: for(field = 0; field < numFields; ++field) {
921: cellDof += user->q[field].numBasisFuncs*user->q[field].numComponents;
922: }
923: PetscMalloc4(numCells*cellDof,PetscScalar,&u,numCells*dim*dim,PetscReal,&invJ,numCells,PetscReal,&detJ,numCells*cellDof,PetscScalar,&elemVec);
924: for(c = cStart; c < cEnd; ++c) {
925: const PetscScalar *x;
926: PetscInt i;
928: DMComplexComputeCellGeometry(dm, c, v0, J, &invJ[c*dim*dim], &detJ[c]);
929: if (detJ[c] <= 0.0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid determinant %g for element %d", detJ[c], c);
930: DMComplexVecGetClosure(dm, PETSC_NULL, X, c, &x);
932: for(i = 0; i < cellDof; ++i) {
933: u[c*cellDof+i] = x[i];
934: }
935: }
936: for(field = 0; field < numFields; ++field) {
937: const PetscInt numQuadPoints = user->q[field].numQuadPoints;
938: const PetscInt numBasisFuncs = user->q[field].numBasisFuncs;
939: void (*f0)(PetscScalar u[], const PetscScalar gradU[], PetscScalar f0[]) = user->f0Funcs[field];
940: void (*f1)(PetscScalar u[], const PetscScalar gradU[], PetscScalar f1[]) = user->f1Funcs[field];
941: /* Conforming batches */
942: PetscInt blockSize = numBasisFuncs*numQuadPoints;
943: PetscInt numBlocks = 1;
944: PetscInt batchSize = numBlocks * blockSize;
945: PetscInt numBatches = user->numBatches;
946: PetscInt numChunks = numCells / (numBatches*batchSize);
947: IntegrateResidualBatchCPU(numChunks*numBatches*batchSize, numFields, field, u, invJ, detJ, user->q, f0, f1, elemVec, user);
948: /* Remainder */
949: PetscInt numRemainder = numCells % (numBatches * batchSize);
950: PetscInt offset = numCells - numRemainder;
951: IntegrateResidualBatchCPU(numRemainder, numFields, field, &u[offset*cellDof], &invJ[offset*dim*dim], &detJ[offset],
952: user->q, f0, f1, &elemVec[offset*cellDof], user);
953: }
954: for(c = cStart; c < cEnd; ++c) {
955: if (debug) {DMPrintCellVector(c, "Residual", cellDof, &elemVec[c*cellDof]);}
956: DMComplexVecSetClosure(dm, PETSC_NULL, F, c, &elemVec[c*cellDof], ADD_VALUES);
957: }
958: PetscFree4(u,invJ,detJ,elemVec);
959: PetscFree3(coords,v0,J);
960: if (user->showResidual) {
961: PetscInt p;
963: PetscPrintf(PETSC_COMM_WORLD, "Residual:\n");
964: for(p = 0; p < user->numProcs; ++p) {
965: if (p == user->rank) {
966: Vec f;
968: VecDuplicate(F, &f);
969: VecCopy(F, f);
970: VecChop(f, 1.0e-10);
971: VecView(f, PETSC_VIEWER_STDOUT_SELF);
972: VecDestroy(&f);
973: }
974: PetscBarrier((PetscObject) dm);
975: }
976: }
977: PetscLogEventEnd(user->residualEvent,0,0,0,0);
978: return(0);
979: }
983: /*
984: Loop over batch of elements (e):
985: Loop over element vector entries (f,fc --> i):
986: Sum over element matrix columns entries (g,gc --> j):
987: Loop over quadrature points (q):
988: Make u_q and gradU_q (loops over fields,Nb,Ncomp)
989: elemVec[i] += \psi^{fc}_f(q) g0_{fc,gc}(u, \nabla u) \phi^{gc}_g(q)
990: + \psi^{fc}_f(q) \cdot g1_{fc,gc,dg}(u, \nabla u) \nabla\phi^{gc}_g(q)
991: + \nabla\psi^{fc}_f(q) \cdot g2_{fc,gc,df}(u, \nabla u) \phi^{gc}_g(q)
992: + \nabla\psi^{fc}_f(q) \cdot g3_{fc,gc,df,dg}(u, \nabla u) \nabla\phi^{gc}_g(q)
993: */
994: PetscErrorCode IntegrateJacobianActionBatchCPU(PetscInt Ne, PetscInt numFields, PetscInt fieldI, const PetscScalar coefficients[], const PetscScalar argCoefficients[], const PetscReal jacobianInverses[], const PetscReal jacobianDeterminants[], PetscQuadrature quad[], void (**g0_func)(PetscScalar u[], const PetscScalar gradU[], PetscScalar g0[]), void (**g1_func)(PetscScalar u[], const PetscScalar gradU[], PetscScalar g1[]), void (**g2_func)(PetscScalar u[], const PetscScalar gradU[], PetscScalar g0[]), void (**g3_func)(PetscScalar u[], const PetscScalar gradU[], PetscScalar g1[]), PetscScalar elemVec[], AppCtx *user) {
995: const PetscReal *basisI = quad[fieldI].basis;
996: const PetscReal *basisDerI = quad[fieldI].basisDer;
997: const PetscInt debug = user->debug;
998: const PetscInt dim = SPATIAL_DIM_0;
999: PetscInt cellDof = 0; /* Total number of dof on a cell */
1000: PetscInt cOffset = 0; /* Offset into coefficients[], argCoefficients[], elemVec[] for element e */
1001: PetscInt offsetI = 0; /* Offset into an element vector for fieldI */
1002: PetscInt fieldJ, offsetJ, field, e;
1003: PetscErrorCode ierr;
1006: for(field = 0; field < numFields; ++field) {
1007: if (field == fieldI) {offsetI = cellDof;}
1008: cellDof += quad[field].numBasisFuncs*quad[field].numComponents;
1009: }
1010: PetscLogEventBegin(user->integrateJacActionCPUEvent,0,0,0,0);
1011: for(e = 0; e < Ne; ++e) {
1012: const PetscReal detJ = jacobianDeterminants[e];
1013: const PetscReal *invJ = &jacobianInverses[e*dim*dim];
1014: const PetscInt Nb_i = quad[fieldI].numBasisFuncs;
1015: const PetscInt Ncomp_i = quad[fieldI].numComponents;
1016: PetscInt f, fc, g, gc;
1018: for(f = 0; f < Nb_i; ++f) {
1019: const PetscInt Nq = quad[fieldI].numQuadPoints;
1020: const PetscReal *quadWeights = quad[fieldI].quadWeights;
1021: PetscInt q;
1023: for(fc = 0; fc < Ncomp_i; ++fc) {
1024: const PetscInt fidx = f*Ncomp_i+fc; /* Test function basis index */
1025: const PetscInt i = offsetI+fidx; /* Element vector row */
1026: elemVec[cOffset+i] = 0.0;
1027: }
1028: for(q = 0; q < Nq; ++q) {
1029: PetscScalar u[dim+1];
1030: PetscScalar gradU[dim*(dim+1)];
1031: PetscInt fOffset = 0; /* Offset into u[] for field_q (like offsetI) */
1032: PetscInt dOffset = cOffset; /* Offset into coefficients[] for field_q */
1033: PetscInt field_q, d;
1034: PetscScalar g0[dim*dim]; /* Ncomp_i*Ncomp_j */
1035: PetscScalar g1[dim*dim*dim]; /* Ncomp_i*Ncomp_j*dim */
1036: PetscScalar g2[dim*dim*dim]; /* Ncomp_i*Ncomp_j*dim */
1037: PetscScalar g3[dim*dim*dim*dim]; /* Ncomp_i*Ncomp_j*dim*dim */
1038: PetscInt c;
1040: if (debug) {PetscPrintf(PETSC_COMM_SELF, " quad point %d\n", q);}
1041: for(d = 0; d <= dim; ++d) {u[d] = 0.0;}
1042: for(d = 0; d < dim*(dim+1); ++d) {gradU[d] = 0.0;}
1043: for(field_q = 0; field_q < numFields; ++field_q) {
1044: const PetscInt Nb = quad[field_q].numBasisFuncs;
1045: const PetscInt Ncomp = quad[field_q].numComponents;
1046: const PetscReal *basis = quad[field_q].basis;
1047: const PetscReal *basisDer = quad[field_q].basisDer;
1048: PetscInt b, comp;
1050: for(b = 0; b < Nb; ++b) {
1051: for(comp = 0; comp < Ncomp; ++comp) {
1052: const PetscInt cidx = b*Ncomp+comp;
1053: PetscScalar realSpaceDer[dim];
1054: PetscInt d1, d2;
1056: u[fOffset+comp] += coefficients[dOffset+cidx]*basis[q*Nb*Ncomp+cidx];
1057: for(d1 = 0; d1 < dim; ++d1) {
1058: realSpaceDer[d1] = 0.0;
1059: for(d2 = 0; d2 < dim; ++d2) {
1060: realSpaceDer[d1] += invJ[d2*dim+d1]*basisDer[(q*Nb*Ncomp+cidx)*dim+d2];
1061: }
1062: gradU[(fOffset+comp)*dim+d1] += coefficients[dOffset+cidx]*realSpaceDer[d1];
1063: }
1064: }
1065: }
1066: if (debug > 1) {
1067: for(comp = 0; comp < Ncomp; ++comp) {
1068: PetscPrintf(PETSC_COMM_SELF, " u[%d,%d]: %g\n", f, comp, u[fOffset+comp]);
1069: for(d = 0; d < dim; ++d) {
1070: PetscPrintf(PETSC_COMM_SELF, " gradU[%d,%d]_%c: %g\n", f, comp, 'x'+d, gradU[(fOffset+comp)*dim+d]);
1071: }
1072: }
1073: }
1074: fOffset += Ncomp;
1075: dOffset += Nb*Ncomp;
1076: }
1078: for(fieldJ = 0, offsetJ = 0; fieldJ < numFields; offsetJ += quad[fieldJ].numBasisFuncs*quad[fieldJ].numComponents, ++fieldJ) {
1079: const PetscReal *basisJ = quad[fieldJ].basis;
1080: const PetscReal *basisDerJ = quad[fieldJ].basisDer;
1081: const PetscInt Nb_j = quad[fieldJ].numBasisFuncs;
1082: const PetscInt Ncomp_j = quad[fieldJ].numComponents;
1084: for(g = 0; g < Nb_j; ++g) {
1085: if ((Ncomp_i > dim) || (Ncomp_j > dim)) SETERRQ3(PETSC_COMM_WORLD, PETSC_ERR_LIB, "Number of components %d and %d should be <= %d", Ncomp_i, Ncomp_j, dim);
1086: PetscMemzero(g0, Ncomp_i*Ncomp_j * sizeof(PetscScalar));
1087: PetscMemzero(g1, Ncomp_i*Ncomp_j*dim * sizeof(PetscScalar));
1088: PetscMemzero(g2, Ncomp_i*Ncomp_j*dim * sizeof(PetscScalar));
1089: PetscMemzero(g3, Ncomp_i*Ncomp_j*dim*dim * sizeof(PetscScalar));
1090: if (g0_func[fieldI*numFields+fieldJ]) {
1091: g0_func[fieldI*numFields+fieldJ](u, gradU, g0);
1092: for(c = 0; c < Ncomp_i*Ncomp_j; ++c) {
1093: g0[c] *= detJ*quadWeights[q];
1094: }
1095: }
1096: if (g1_func[fieldI*numFields+fieldJ]) {
1097: g1_func[fieldI*numFields+fieldJ](u, gradU, g1);
1098: for(c = 0; c < Ncomp_i*Ncomp_j*dim; ++c) {
1099: g1[c] *= detJ*quadWeights[q];
1100: }
1101: }
1102: if (g2_func[fieldI*numFields+fieldJ]) {
1103: g2_func[fieldI*numFields+fieldJ](u, gradU, g2);
1104: for(c = 0; c < Ncomp_i*Ncomp_j*dim; ++c) {
1105: g2[c] *= detJ*quadWeights[q];
1106: }
1107: }
1108: if (g3_func[fieldI*numFields+fieldJ]) {
1109: g3_func[fieldI*numFields+fieldJ](u, gradU, g3);
1110: for(c = 0; c < Ncomp_i*Ncomp_j*dim*dim; ++c) {
1111: g3[c] *= detJ*quadWeights[q];
1112: }
1113: }
1115: for(fc = 0; fc < Ncomp_i; ++fc) {
1116: const PetscInt fidx = f*Ncomp_i+fc; /* Test function basis index */
1117: const PetscInt i = offsetI+fidx; /* Element matrix row */
1118: for(gc = 0; gc < Ncomp_j; ++gc) {
1119: const PetscInt gidx = g*Ncomp_j+gc; /* Trial function basis index */
1120: const PetscInt j = offsetJ+gidx; /* Element matrix column */
1121: PetscScalar entry = 0.0; /* The (i,j) entry in the element matrix */
1122: PetscScalar realSpaceDerI[dim];
1123: PetscScalar realSpaceDerJ[dim];
1124: PetscInt d, d2;
1126: for(d = 0; d < dim; ++d) {
1127: realSpaceDerI[d] = 0.0;
1128: realSpaceDerJ[d] = 0.0;
1129: for(d2 = 0; d2 < dim; ++d2) {
1130: realSpaceDerI[d] += invJ[d2*dim+d]*basisDerI[(q*Nb_i*Ncomp_i+fidx)*dim+d2];
1131: realSpaceDerJ[d] += invJ[d2*dim+d]*basisDerJ[(q*Nb_j*Ncomp_j+gidx)*dim+d2];
1132: }
1133: }
1134: entry += basisI[q*Nb_i*Ncomp_i+fidx]*g0[fc*Ncomp_j+gc]*basisJ[q*Nb_j*Ncomp_j+gidx];
1135: for(d = 0; d < dim; ++d) {
1136: entry += basisI[q*Nb_i*Ncomp_i+fidx]*g1[(fc*Ncomp_j+gc)*dim+d]*realSpaceDerJ[d];
1137: entry += realSpaceDerI[d]*g2[(fc*Ncomp_j+gc)*dim+d]*basisJ[q*Nb_j*Ncomp_j+gidx];
1138: for(d2 = 0; d2 < dim; ++d2) {
1139: entry += realSpaceDerI[d]*g3[((fc*Ncomp_j+gc)*dim+d)*dim+d2]*realSpaceDerJ[d2];
1140: }
1141: }
1142: elemVec[cOffset+i] += entry*argCoefficients[cOffset+j];
1143: }
1144: }
1145: }
1146: }
1147: }
1148: }
1149: if (debug > 1) {
1150: PetscInt fc, f;
1152: PetscPrintf(PETSC_COMM_SELF, "Element %d action vector for field %d\n", e, fieldI);
1153: for(fc = 0; fc < Ncomp_i; ++fc) {
1154: for(f = 0; f < Nb_i; ++f) {
1155: const PetscInt i = offsetI + f*Ncomp_i+fc;
1156: PetscPrintf(PETSC_COMM_SELF, " argCoef[%d,%d]: %g\n", f, fc, argCoefficients[cOffset+i]);
1157: }
1158: }
1159: for(fc = 0; fc < Ncomp_i; ++fc) {
1160: for(f = 0; f < Nb_i; ++f) {
1161: const PetscInt i = offsetI + f*Ncomp_i+fc;
1162: PetscPrintf(PETSC_COMM_SELF, " elemVec[%d,%d]: %g\n", f, fc, elemVec[cOffset+i]);
1163: }
1164: }
1165: }
1166: cOffset += cellDof;
1167: }
1168: PetscLogEventEnd(user->integrateJacActionCPUEvent,0,0,0,0);
1169: return(0);
1170: }
1174: /*
1175: FormJacobianActionLocal - Form the local portion of the action of Jacobian matrix J on the local input X.
1177: Input Parameters:
1178: + dm - The mesh
1179: . J - The Jacobian shell matrix
1180: . X - Local input vector
1181: - user - The user context
1183: Output Parameter:
1184: . F - Local output vector
1186: Note:
1187: We form the residual one batch of elements at a time. This allows us to offload work onto an accelerator,
1188: like a GPU, or vectorize on a multicore machine.
1190: .seealso: FormJacobianLocal()
1191: */
1192: PetscErrorCode FormJacobianActionLocal(DM dm, Mat Jac, Vec X, Vec F, AppCtx *user)
1193: {
1194: const PetscInt debug = user->debug;
1195: const PetscInt dim = user->dim;
1196: JacActionCtx *jctx;
1197: PetscReal *coords, *v0, *J, *invJ, *detJ;
1198: PetscScalar *elemVec;
1199: PetscInt cStart, cEnd, c, field;
1200: PetscErrorCode ierr;
1203: PetscLogEventBegin(user->jacobianEvent,0,0,0,0);
1204: MatShellGetContext(Jac, &jctx);
1205: VecSet(F, 0.0);
1206: PetscMalloc3(dim,PetscReal,&coords,dim,PetscReal,&v0,dim*dim,PetscReal,&J);
1207: DMComplexGetHeightStratum(dm, 0, &cStart, &cEnd);
1208: const PetscInt numCells = cEnd - cStart;
1209: PetscInt cellDof = 0;
1210: PetscScalar *u, *a;
1212: for(field = 0; field < numFields; ++field) {
1213: cellDof += user->q[field].numBasisFuncs*user->q[field].numComponents;
1214: }
1215: PetscMalloc5(numCells*cellDof,PetscScalar,&u,numCells*cellDof,PetscScalar,&a,numCells*dim*dim,PetscReal,&invJ,numCells,PetscReal,&detJ,numCells*cellDof,PetscScalar,&elemVec);
1216: for(c = cStart; c < cEnd; ++c) {
1217: const PetscScalar *x;
1218: PetscInt i;
1220: DMComplexComputeCellGeometry(dm, c, v0, J, &invJ[c*dim*dim], &detJ[c]);
1221: if (detJ[c] <= 0.0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid determinant %g for element %d", detJ[c], c);
1222: DMComplexVecGetClosure(dm, PETSC_NULL, jctx->u, c, &x);
1223: for(i = 0; i < cellDof; ++i) {
1224: u[c*cellDof+i] = x[i];
1225: }
1226: DMComplexVecGetClosure(dm, PETSC_NULL, X, c, &x);
1227: for(i = 0; i < cellDof; ++i) {
1228: a[c*cellDof+i] = x[i];
1229: }
1230: }
1231: for(field = 0; field < numFields; ++field) {
1232: const PetscInt numQuadPoints = user->q[field].numQuadPoints;
1233: const PetscInt numBasisFuncs = user->q[field].numBasisFuncs;
1234: /* Conforming batches */
1235: PetscInt blockSize = numBasisFuncs*numQuadPoints;
1236: PetscInt numBlocks = 1;
1237: PetscInt batchSize = numBlocks * blockSize;
1238: PetscInt numBatches = user->numBatches;
1239: PetscInt numChunks = numCells / (numBatches*batchSize);
1240: IntegrateJacobianActionBatchCPU(numChunks*numBatches*batchSize, numFields, field, u, a, invJ, detJ, user->q, user->g0Funcs, user->g1Funcs, user->g2Funcs, user->g3Funcs, elemVec, user);
1241: /* Remainder */
1242: PetscInt numRemainder = numCells % (numBatches * batchSize);
1243: PetscInt offset = numCells - numRemainder;
1244: IntegrateJacobianActionBatchCPU(numRemainder, numFields, field, &u[offset*cellDof], &a[offset*cellDof], &invJ[offset*dim*dim], &detJ[offset],
1245: user->q, user->g0Funcs, user->g1Funcs, user->g2Funcs, user->g3Funcs, &elemVec[offset*cellDof], user);
1246: }
1247: for(c = cStart; c < cEnd; ++c) {
1248: if (debug) {DMPrintCellVector(c, "Residual", cellDof, &elemVec[c*cellDof]);}
1249: DMComplexVecSetClosure(dm, PETSC_NULL, F, c, &elemVec[c*cellDof], ADD_VALUES);
1250: }
1251: PetscFree5(u,a,invJ,detJ,elemVec);
1252: PetscFree3(coords,v0,J);
1253: if (0) {
1254: PetscInt p;
1256: PetscPrintf(PETSC_COMM_WORLD, "Jacobian Action:\n");
1257: for(p = 0; p < user->numProcs; ++p) {
1258: if (p == user->rank) {VecView(F, PETSC_VIEWER_STDOUT_SELF);}
1259: PetscBarrier((PetscObject) dm);
1260: }
1261: }
1262: PetscLogEventEnd(user->jacobianEvent,0,0,0,0);
1263: return(0);
1264: }
1268: PetscErrorCode FormJacobianAction(Mat J, Vec X, Vec Y)
1269: {
1270: JacActionCtx *ctx;
1271: DM dm;
1272: Vec dummy, localX, localY;
1273: PetscInt N, n;
1274: PetscErrorCode ierr;
1280: MatShellGetContext(J, &ctx);
1281: dm = ctx->dm;
1283: /* determine whether X = localX */
1284: DMGetLocalVector(dm, &dummy);
1285: DMGetLocalVector(dm, &localX);
1286: DMGetLocalVector(dm, &localY);
1287: /* TODO: THIS dummy restore is necessary here so that the first available local vector has boundary conditions in it
1288: I think the right thing to do is have the user put BC into a local vector and give it to us
1289: */
1290: DMRestoreLocalVector(dm, &dummy);
1291: VecGetSize(X, &N);
1292: VecGetSize(localX, &n);
1294: if (n != N){ /* X != localX */
1295: VecSet(localX, 0.0);
1296: DMGlobalToLocalBegin(dm, X, INSERT_VALUES, localX);
1297: DMGlobalToLocalEnd(dm, X, INSERT_VALUES, localX);
1298: } else {
1299: DMRestoreLocalVector(dm, &localX);
1300: localX = X;
1301: }
1302: FormJacobianActionLocal(dm, J, localX, localY, ctx->user);
1303: if (n != N){
1304: DMRestoreLocalVector(dm, &localX);
1305: }
1306: VecSet(Y, 0.0);
1307: DMLocalToGlobalBegin(dm, localY, ADD_VALUES, Y);
1308: DMLocalToGlobalEnd(dm, localY, ADD_VALUES, Y);
1309: DMRestoreLocalVector(dm, &localY);
1310: if (0) {
1311: Vec r;
1312: PetscReal norm;
1314: VecDuplicate(X, &r);
1315: MatMult(ctx->J, X, r);
1316: VecAXPY(r, -1.0, Y);
1317: VecNorm(r, NORM_2, &norm);
1318: if (norm > 1.0e-8) {
1319: PetscPrintf(PETSC_COMM_WORLD, "Jacobian Action Input:\n");
1320: VecView(X, PETSC_VIEWER_STDOUT_WORLD);
1321: PetscPrintf(PETSC_COMM_WORLD, "Jacobian Action Result:\n");
1322: VecView(Y, PETSC_VIEWER_STDOUT_WORLD);
1323: PetscPrintf(PETSC_COMM_WORLD, "Difference:\n");
1324: VecView(r, PETSC_VIEWER_STDOUT_WORLD);
1325: SETERRQ1(((PetscObject) J)->comm, PETSC_ERR_ARG_WRONG, "The difference with assembled multiply is too large %g", norm);
1326: }
1327: VecDestroy(&r);
1328: }
1329: return(0);
1330: }
1334: /*
1335: Loop over batch of elements (e):
1336: Loop over element matrix entries (f,fc,g,gc --> i,j):
1337: Loop over quadrature points (q):
1338: Make u_q and gradU_q (loops over fields,Nb,Ncomp)
1339: elemMat[i,j] += \psi^{fc}_f(q) g0_{fc,gc}(u, \nabla u) \phi^{gc}_g(q)
1340: + \psi^{fc}_f(q) \cdot g1_{fc,gc,dg}(u, \nabla u) \nabla\phi^{gc}_g(q)
1341: + \nabla\psi^{fc}_f(q) \cdot g2_{fc,gc,df}(u, \nabla u) \phi^{gc}_g(q)
1342: + \nabla\psi^{fc}_f(q) \cdot g3_{fc,gc,df,dg}(u, \nabla u) \nabla\phi^{gc}_g(q)
1343: */
1344: PetscErrorCode IntegrateJacobianBatchCPU(PetscInt Ne, PetscInt numFields, PetscInt fieldI, PetscInt fieldJ, const PetscScalar coefficients[], const PetscReal jacobianInverses[], const PetscReal jacobianDeterminants[], PetscQuadrature quad[], void (*g0_func)(PetscScalar u[], const PetscScalar gradU[], PetscScalar g0[]), void (*g1_func)(PetscScalar u[], const PetscScalar gradU[], PetscScalar g1[]), void (*g2_func)(PetscScalar u[], const PetscScalar gradU[], PetscScalar g0[]), void (*g3_func)(PetscScalar u[], const PetscScalar gradU[], PetscScalar g1[]), PetscScalar elemMat[], AppCtx *user) {
1345: const PetscReal *basisI = quad[fieldI].basis;
1346: const PetscReal *basisDerI = quad[fieldI].basisDer;
1347: const PetscReal *basisJ = quad[fieldJ].basis;
1348: const PetscReal *basisDerJ = quad[fieldJ].basisDer;
1349: const PetscInt debug = user->debug;
1350: const PetscInt dim = SPATIAL_DIM_0;
1351: PetscInt cellDof = 0; /* Total number of dof on a cell */
1352: PetscInt cOffset = 0; /* Offset into coefficients[] for element e */
1353: PetscInt eOffset = 0; /* Offset into elemMat[] for element e */
1354: PetscInt offsetI = 0; /* Offset into an element vector for fieldI */
1355: PetscInt offsetJ = 0; /* Offset into an element vector for fieldJ */
1356: PetscInt field, e;
1357: PetscErrorCode ierr;
1360: for(field = 0; field < numFields; ++field) {
1361: if (field == fieldI) {offsetI = cellDof;}
1362: if (field == fieldJ) {offsetJ = cellDof;}
1363: cellDof += quad[field].numBasisFuncs*quad[field].numComponents;
1364: }
1365: PetscLogEventBegin(user->integrateJacCPUEvent,0,0,0,0);
1366: for(e = 0; e < Ne; ++e) {
1367: const PetscReal detJ = jacobianDeterminants[e];
1368: const PetscReal *invJ = &jacobianInverses[e*dim*dim];
1369: const PetscInt Nb_i = quad[fieldI].numBasisFuncs;
1370: const PetscInt Ncomp_i = quad[fieldI].numComponents;
1371: const PetscInt Nb_j = quad[fieldJ].numBasisFuncs;
1372: const PetscInt Ncomp_j = quad[fieldJ].numComponents;
1373: PetscInt f, g;
1375: for(f = 0; f < Nb_i; ++f) {
1376: for(g = 0; g < Nb_j; ++g) {
1377: const PetscInt Nq = quad[fieldI].numQuadPoints;
1378: const PetscReal *quadWeights = quad[fieldI].quadWeights;
1379: PetscInt q;
1381: for(q = 0; q < Nq; ++q) {
1382: PetscScalar u[dim+1];
1383: PetscScalar gradU[dim*(dim+1)];
1384: PetscInt fOffset = 0; /* Offset into u[] for field_q (like offsetI) */
1385: PetscInt dOffset = cOffset; /* Offset into coefficients[] for field_q */
1386: PetscInt field_q, d;
1387: PetscScalar g0[dim*dim]; /* Ncomp_i*Ncomp_j */
1388: PetscScalar g1[dim*dim*dim]; /* Ncomp_i*Ncomp_j*dim */
1389: PetscScalar g2[dim*dim*dim]; /* Ncomp_i*Ncomp_j*dim */
1390: PetscScalar g3[dim*dim*dim*dim]; /* Ncomp_i*Ncomp_j*dim*dim */
1391: PetscInt fc, gc, c;
1393: if (debug) {PetscPrintf(PETSC_COMM_SELF, " quad point %d\n", q);}
1394: for(d = 0; d <= dim; ++d) {u[d] = 0.0;}
1395: for(d = 0; d < dim*(dim+1); ++d) {gradU[d] = 0.0;}
1396: for(field_q = 0; field_q < numFields; ++field_q) {
1397: const PetscInt Nb = quad[field_q].numBasisFuncs;
1398: const PetscInt Ncomp = quad[field_q].numComponents;
1399: const PetscReal *basis = quad[field_q].basis;
1400: const PetscReal *basisDer = quad[field_q].basisDer;
1401: PetscInt b, comp;
1403: for(b = 0; b < Nb; ++b) {
1404: for(comp = 0; comp < Ncomp; ++comp) {
1405: const PetscInt cidx = b*Ncomp+comp;
1406: PetscScalar realSpaceDer[dim];
1407: PetscInt d1, d2;
1409: u[fOffset+comp] += coefficients[dOffset+cidx]*basis[q*Nb*Ncomp+cidx];
1410: for(d1 = 0; d1 < dim; ++d1) {
1411: realSpaceDer[d1] = 0.0;
1412: for(d2 = 0; d2 < dim; ++d2) {
1413: realSpaceDer[d1] += invJ[d2*dim+d1]*basisDer[(q*Nb*Ncomp+cidx)*dim+d2];
1414: }
1415: gradU[(fOffset+comp)*dim+d1] += coefficients[dOffset+cidx]*realSpaceDer[d1];
1416: }
1417: }
1418: }
1419: if (debug > 1) {
1420: for(comp = 0; comp < Ncomp; ++comp) {
1421: PetscPrintf(PETSC_COMM_SELF, " u[%d,%d]: %g\n", f, comp, u[fOffset+comp]);
1422: for(d = 0; d < dim; ++d) {
1423: PetscPrintf(PETSC_COMM_SELF, " gradU[%d,%d]_%c: %g\n", f, comp, 'x'+d, gradU[(fOffset+comp)*dim+d]);
1424: }
1425: }
1426: }
1427: fOffset += Ncomp;
1428: dOffset += Nb*Ncomp;
1429: }
1431: if ((Ncomp_i > dim) || (Ncomp_j > dim)) SETERRQ3(PETSC_COMM_WORLD, PETSC_ERR_LIB, "Number of components %d and %d should be <= %d", Ncomp_i, Ncomp_j, dim);
1432: PetscMemzero(g0, Ncomp_i*Ncomp_j * sizeof(PetscScalar));
1433: PetscMemzero(g1, Ncomp_i*Ncomp_j*dim * sizeof(PetscScalar));
1434: PetscMemzero(g2, Ncomp_i*Ncomp_j*dim * sizeof(PetscScalar));
1435: PetscMemzero(g3, Ncomp_i*Ncomp_j*dim*dim * sizeof(PetscScalar));
1436: if (g0_func) {
1437: g0_func(u, gradU, g0);
1438: for(c = 0; c < Ncomp_i*Ncomp_j; ++c) {
1439: g0[c] *= detJ*quadWeights[q];
1440: }
1441: }
1442: if (g1_func) {
1443: g1_func(u, gradU, g1);
1444: for(c = 0; c < Ncomp_i*Ncomp_j*dim; ++c) {
1445: g1[c] *= detJ*quadWeights[q];
1446: }
1447: }
1448: if (g2_func) {
1449: g2_func(u, gradU, g2);
1450: for(c = 0; c < Ncomp_i*Ncomp_j*dim; ++c) {
1451: g2[c] *= detJ*quadWeights[q];
1452: }
1453: }
1454: if (g3_func) {
1455: g3_func(u, gradU, g3);
1456: for(c = 0; c < Ncomp_i*Ncomp_j*dim*dim; ++c) {
1457: g3[c] *= detJ*quadWeights[q];
1458: }
1459: }
1461: for(fc = 0; fc < Ncomp_i; ++fc) {
1462: const PetscInt fidx = f*Ncomp_i+fc; /* Test function basis index */
1463: const PetscInt i = offsetI+fidx; /* Element matrix row */
1464: for(gc = 0; gc < Ncomp_j; ++gc) {
1465: const PetscInt gidx = g*Ncomp_j+gc; /* Trial function basis index */
1466: const PetscInt j = offsetJ+gidx; /* Element matrix column */
1467: PetscScalar realSpaceDerI[dim];
1468: PetscScalar realSpaceDerJ[dim];
1469: PetscInt d, d2;
1471: for(d = 0; d < dim; ++d) {
1472: realSpaceDerI[d] = 0.0;
1473: realSpaceDerJ[d] = 0.0;
1474: for(d2 = 0; d2 < dim; ++d2) {
1475: realSpaceDerI[d] += invJ[d2*dim+d]*basisDerI[(q*Nb_i*Ncomp_i+fidx)*dim+d2];
1476: realSpaceDerJ[d] += invJ[d2*dim+d]*basisDerJ[(q*Nb_j*Ncomp_j+gidx)*dim+d2];
1477: }
1478: }
1479: elemMat[eOffset+i*cellDof+j] += basisI[q*Nb_i*Ncomp_i+fidx]*g0[fc*Ncomp_j+gc]*basisJ[q*Nb_j*Ncomp_j+gidx];
1480: for(d = 0; d < dim; ++d) {
1481: elemMat[eOffset+i*cellDof+j] += basisI[q*Nb_i*Ncomp_i+fidx]*g1[(fc*Ncomp_j+gc)*dim+d]*realSpaceDerJ[d];
1482: elemMat[eOffset+i*cellDof+j] += realSpaceDerI[d]*g2[(fc*Ncomp_j+gc)*dim+d]*basisJ[q*Nb_j*Ncomp_j+gidx];
1483: for(d2 = 0; d2 < dim; ++d2) {
1484: elemMat[eOffset+i*cellDof+j] += realSpaceDerI[d]*g3[((fc*Ncomp_j+gc)*dim+d)*dim+d2]*realSpaceDerJ[d2];
1485: }
1486: }
1487: }
1488: }
1489: }
1490: }
1491: }
1492: if (debug > 1) {
1493: PetscInt fc, f, gc, g;
1495: PetscPrintf(PETSC_COMM_SELF, "Element matrix for fields %d and %d\n", fieldI, fieldJ);
1496: for(fc = 0; fc < Ncomp_i; ++fc) {
1497: for(f = 0; f < Nb_i; ++f) {
1498: const PetscInt i = offsetI + f*Ncomp_i+fc;
1499: for(gc = 0; gc < Ncomp_j; ++gc) {
1500: for(g = 0; g < Nb_j; ++g) {
1501: const PetscInt j = offsetJ + g*Ncomp_j+gc;
1502: PetscPrintf(PETSC_COMM_SELF, " elemMat[%d,%d,%d,%d]: %g\n", f, fc, g, gc, elemMat[eOffset+i*cellDof+j]);
1503: }
1504: }
1505: PetscPrintf(PETSC_COMM_SELF, "\n");
1506: }
1507: }
1508: }
1509: cOffset += cellDof;
1510: eOffset += cellDof*cellDof;
1511: }
1512: PetscLogEventEnd(user->integrateJacCPUEvent,0,0,0,0);
1513: return(0);
1514: }
1518: /*
1519: FormJacobianLocal - Form the local portion of the Jacobian matrix J from the local input X.
1521: Input Parameters:
1522: + dm - The mesh
1523: . X - Local input vector
1524: - user - The user context
1526: Output Parameter:
1527: . Jac - Jacobian matrix
1529: Note:
1530: We form the residual one batch of elements at a time. This allows us to offload work onto an accelerator,
1531: like a GPU, or vectorize on a multicore machine.
1533: .seealso: FormFunctionLocal()
1534: */
1535: PetscErrorCode FormJacobianLocal(DM dm, Vec X, Mat Jac, Mat JacP, AppCtx *user)
1536: {
1537: const PetscInt debug = user->debug;
1538: const PetscInt dim = user->dim;
1539: PetscReal *v0, *J, *invJ, *detJ;
1540: PetscScalar *elemMat, *u;
1541: PetscInt numCells, cStart, cEnd, c, field, fieldI;
1542: PetscInt cellDof = 0;
1546: PetscLogEventBegin(user->jacobianEvent,0,0,0,0);
1547: MatZeroEntries(JacP);
1548: PetscMalloc2(dim,PetscReal,&v0,dim*dim,PetscReal,&J);
1549: DMComplexGetHeightStratum(dm, 0, &cStart, &cEnd);
1550: numCells = cEnd - cStart;
1551: for(field = 0; field < numFields; ++field) {
1552: cellDof += user->q[field].numBasisFuncs*user->q[field].numComponents;
1553: }
1554: PetscMalloc4(numCells*cellDof,PetscScalar,&u,numCells*dim*dim,PetscReal,&invJ,numCells,PetscReal,&detJ,numCells*cellDof*cellDof,PetscScalar,&elemMat);
1555: for(c = cStart; c < cEnd; ++c) {
1556: const PetscScalar *x;
1557: PetscInt i;
1559: DMComplexComputeCellGeometry(dm, c, v0, J, &invJ[c*dim*dim], &detJ[c]);
1560: if (detJ[c] <= 0.0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid determinant %g for element %d", detJ[c], c);
1561: DMComplexVecGetClosure(dm, PETSC_NULL, X, c, &x);
1563: for(i = 0; i < cellDof; ++i) {
1564: u[c*cellDof+i] = x[i];
1565: }
1566: }
1567: PetscMemzero(elemMat, numCells*cellDof*cellDof * sizeof(PetscScalar));
1568: for(fieldI = 0; fieldI < numFields; ++fieldI) {
1569: const PetscInt numQuadPoints = user->q[fieldI].numQuadPoints;
1570: const PetscInt numBasisFuncs = user->q[fieldI].numBasisFuncs;
1571: PetscInt fieldJ;
1573: for(fieldJ = 0; fieldJ < numFields; ++fieldJ) {
1574: void (*g0)(PetscScalar u[], const PetscScalar gradU[], PetscScalar g0[]) = user->g0Funcs[fieldI*numFields+fieldJ];
1575: void (*g1)(PetscScalar u[], const PetscScalar gradU[], PetscScalar g1[]) = user->g1Funcs[fieldI*numFields+fieldJ];
1576: void (*g2)(PetscScalar u[], const PetscScalar gradU[], PetscScalar g2[]) = user->g2Funcs[fieldI*numFields+fieldJ];
1577: void (*g3)(PetscScalar u[], const PetscScalar gradU[], PetscScalar g3[]) = user->g3Funcs[fieldI*numFields+fieldJ];
1578: /* Conforming batches */
1579: PetscInt blockSize = numBasisFuncs*numQuadPoints;
1580: PetscInt numBlocks = 1;
1581: PetscInt batchSize = numBlocks * blockSize;
1582: PetscInt numBatches = user->numBatches;
1583: PetscInt numChunks = numCells / (numBatches*batchSize);
1584: IntegrateJacobianBatchCPU(numChunks*numBatches*batchSize, numFields, fieldI, fieldJ, u, invJ, detJ, user->q, g0, g1, g2, g3, elemMat, user);
1585: /* Remainder */
1586: PetscInt numRemainder = numCells % (numBatches * batchSize);
1587: PetscInt offset = numCells - numRemainder;
1588: IntegrateJacobianBatchCPU(numRemainder, numFields, fieldI, fieldJ, &u[offset*cellDof], &invJ[offset*dim*dim], &detJ[offset],
1589: user->q, g0, g1, g2, g3, &elemMat[offset*cellDof*cellDof], user);
1590: }
1591: }
1592: for(c = cStart; c < cEnd; ++c) {
1593: if (debug) {DMPrintCellMatrix(c, "Jacobian", cellDof, cellDof, &elemMat[c*cellDof*cellDof]);}
1594: DMComplexMatSetClosure(dm, PETSC_NULL, PETSC_NULL, JacP, c, &elemMat[c*cellDof*cellDof], ADD_VALUES);
1595: }
1596: PetscFree4(u,invJ,detJ,elemMat);
1597: PetscFree2(v0,J);
1599: /* Assemble matrix, using the 2-step process:
1600: MatAssemblyBegin(), MatAssemblyEnd(). */
1601: MatAssemblyBegin(JacP, MAT_FINAL_ASSEMBLY);
1602: MatAssemblyEnd(JacP, MAT_FINAL_ASSEMBLY);
1604: if (user->showJacobian) {
1605: PetscPrintf(PETSC_COMM_WORLD, "Jacobian:\n");
1606: MatChop(JacP, 1.0e-10);
1607: MatView(JacP, PETSC_VIEWER_STDOUT_WORLD);
1608: }
1609: PetscLogEventEnd(user->jacobianEvent,0,0,0,0);
1610: if (user->jacobianMF) {
1611: JacActionCtx *jctx;
1613: MatShellGetContext(Jac, &jctx);
1614: VecCopy(X, jctx->u);
1615: }
1616: return(0);
1617: }
1621: int main(int argc, char **argv)
1622: {
1623: SNES snes; /* nonlinear solver */
1624: Vec u,r; /* solution, residual vectors */
1625: Mat A,J; /* Jacobian matrix */
1626: MatNullSpace nullSpace; /* May be necessary for pressure */
1627: AppCtx user; /* user-defined work context */
1628: JacActionCtx userJ; /* context for Jacobian MF action */
1629: PetscInt its; /* iterations for convergence */
1630: PetscReal error = 0.0; /* L_2 error in the solution */
1633: PetscInitialize(&argc, &argv, PETSC_NULL, help);
1634: ProcessOptions(PETSC_COMM_WORLD, &user);
1635: SNESCreate(PETSC_COMM_WORLD, &snes);
1636: CreateMesh(PETSC_COMM_WORLD, &user, &user.dm);
1637: SNESSetDM(snes, user.dm);
1639: SetupExactSolution(&user);
1640: SetupQuadrature(&user);
1641: SetupSection(user.dm, &user);
1643: DMCreateGlobalVector(user.dm, &u);
1644: VecDuplicate(u, &r);
1646: DMCreateMatrix(user.dm, MATAIJ, &J);
1647: if (user.jacobianMF) {
1648: PetscInt M, m, N, n;
1650: MatGetSize(J, &M, &N);
1651: MatGetLocalSize(J, &m, &n);
1652: MatCreate(PETSC_COMM_WORLD, &A);
1653: MatSetSizes(A, m, n, M, N);
1654: MatSetType(A, MATSHELL);
1655: MatSetUp(A);
1656: MatShellSetOperation(A, MATOP_MULT, (void (*)(void)) FormJacobianAction);
1657: userJ.dm = user.dm;
1658: userJ.J = J;
1659: userJ.user = &user;
1660: DMCreateLocalVector(user.dm, &userJ.u);
1661: MatShellSetContext(A, &userJ);
1662: } else {
1663: A = J;
1664: }
1665: SNESSetJacobian(snes, A, J, SNESDMComputeJacobian, &user);
1666: CreatePressureNullSpace(user.dm, &user, &nullSpace);
1667: MatSetNullSpace(J, nullSpace);
1668: if (A != J) {
1669: MatSetNullSpace(A, nullSpace);
1670: }
1672: DMSetLocalFunction(user.dm, (DMLocalFunction1) FormFunctionLocal);
1673: DMSetLocalJacobian(user.dm, (DMLocalJacobian1) FormJacobianLocal);
1674: SNESSetFunction(snes, r, SNESDMComputeFunction, &user);
1675: SNESSetFromOptions(snes);
1677: {
1678: KSP ksp; PC pc; Vec crd_vec;
1679: const PetscScalar *v;
1680: PetscInt i,k,j,mlocal;
1681: PetscReal *coords;
1683: SNESGetKSP( snes, &ksp );
1684: KSPGetPC( ksp, &pc );
1685: DMComplexGetCoordinateVec( user.dm, &crd_vec );
1686: VecGetLocalSize(crd_vec,&mlocal);
1687: PetscMalloc(SPATIAL_DIM_0*mlocal*sizeof *coords,&coords);
1688: VecGetArrayRead(crd_vec,&v);
1689: for(k=j=0; j<mlocal; j++)
1690: for(i=0; i<SPATIAL_DIM_0; i++,k++)
1691: coords[k] = PetscRealPart(v[k]);
1692: VecRestoreArrayRead(crd_vec,&v);
1693: PCSetCoordinates( pc, SPATIAL_DIM_0, mlocal, coords );
1694: PetscFree( coords );
1695: }
1697: DMComputeVertexFunction(user.dm, INSERT_ALL_VALUES, u, numComponents, user.exactFuncs, &user);
1698: if (user.runType == RUN_FULL) {
1699: PetscScalar (*initialGuess[numComponents])(const PetscReal x[]);
1700: PetscInt c;
1702: for(c = 0; c < numComponents; ++c) {initialGuess[c] = zero;}
1703: DMComputeVertexFunction(user.dm, INSERT_VALUES, u, numComponents, initialGuess, &user);
1704: if (user.debug) {
1705: PetscPrintf(PETSC_COMM_WORLD, "Initial guess\n");
1706: VecView(u, PETSC_VIEWER_STDOUT_WORLD);
1707: }
1708: SNESSolve(snes, PETSC_NULL, u);
1709: SNESGetIterationNumber(snes, &its);
1710: PetscPrintf(PETSC_COMM_WORLD, "Number of SNES iterations = %D\n", its);
1711: ComputeError(u, &error, &user);
1712: PetscPrintf(PETSC_COMM_WORLD, "L_2 Error: %.3g\n", error);
1713: if (user.showSolution) {
1714: PetscPrintf(PETSC_COMM_WORLD, "Solution\n");
1715: VecChop(u, 3.0e-9);
1716: VecView(u, PETSC_VIEWER_STDOUT_WORLD);
1717: }
1718: } else {
1719: PetscReal res = 0.0;
1721: /* Check discretization error */
1722: PetscPrintf(PETSC_COMM_WORLD, "Initial guess\n");
1723: VecView(u, PETSC_VIEWER_STDOUT_WORLD);
1724: ComputeError(u, &error, &user);
1725: PetscPrintf(PETSC_COMM_WORLD, "L_2 Error: %g\n", error);
1726: /* Check residual */
1727: SNESDMComputeFunction(snes, u, r, &user);
1728: PetscPrintf(PETSC_COMM_WORLD, "Initial Residual\n");
1729: VecChop(r, 1.0e-10);
1730: VecView(r, PETSC_VIEWER_STDOUT_WORLD);
1731: VecNorm(r, NORM_2, &res);
1732: PetscPrintf(PETSC_COMM_WORLD, "L_2 Residual: %g\n", res);
1733: /* Check Jacobian */
1734: {
1735: Vec b;
1736: MatStructure flag;
1737: PetscBool isNull;
1739: SNESDMComputeJacobian(snes, u, &A, &A, &flag, &user);
1740: MatNullSpaceTest(nullSpace, J, &isNull);
1741: if (!isNull) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "The null space calculated for the system operator is invalid.");
1742: VecDuplicate(u, &b);
1743: VecSet(r, 0.0);
1744: SNESDMComputeFunction(snes, r, b, &user);
1745: MatMult(A, u, r);
1746: VecAXPY(r, 1.0, b);
1747: VecDestroy(&b);
1748: PetscPrintf(PETSC_COMM_WORLD, "Au - b = Au + F(0)\n");
1749: VecChop(r, 1.0e-10);
1750: VecView(r, PETSC_VIEWER_STDOUT_WORLD);
1751: VecNorm(r, NORM_2, &res);
1752: PetscPrintf(PETSC_COMM_WORLD, "Linear L_2 Residual: %g\n", res);
1753: }
1754: }
1756: if (user.runType == RUN_FULL) {
1757: PetscViewer viewer;
1759: PetscViewerCreate(PETSC_COMM_WORLD, &viewer);
1760: /*PetscViewerSetType(viewer, PETSCVIEWERDRAW);
1761: PetscViewerDrawSetInfo(viewer, PETSC_NULL, "Solution", PETSC_DECIDE, PETSC_DECIDE, PETSC_DECIDE, PETSC_DECIDE); */
1762: PetscViewerSetType(viewer, PETSCVIEWERASCII);
1763: PetscViewerFileSetName(viewer, "ex62_sol.vtk");
1764: PetscViewerSetFormat(viewer, PETSC_VIEWER_ASCII_VTK);
1765: DMView(user.dm, viewer);
1766: VecView(u, viewer);
1767: PetscViewerDestroy(&viewer);
1768: }
1770: MatNullSpaceDestroy(&nullSpace);
1771: if (user.jacobianMF) {
1772: VecDestroy(&userJ.u);
1773: }
1774: if (A != J) {
1775: MatDestroy(&A);
1776: }
1777: MatDestroy(&J);
1778: VecDestroy(&u);
1779: VecDestroy(&r);
1780: SNESDestroy(&snes);
1781: DMDestroy(&user.dm);
1782: PetscFinalize();
1783: return 0;
1784: }