Actual source code: ex52.c
petsc-3.4.5 2014-06-29
1: static const char help[] = "Testbed for FEM operations on the GPU.\n\n";
3: /*
4: #if 0
5: - Automate testing of different:
6: - elements, batches, blocks, operators
7: - Process log_summary with Python (use existing tools)
8: - Put results in PyTables
10: PYLITH:
11: - Redo setup/cleanup in PyLith
12: - Test with examples/3d/hex8/step01.cfg (Brad is making a flag)
13: - Code up Jacobian (looks like original code)
14: - Consider taking in coordinates rather than J^{-T}/|J|
16: MUST CHECK WITH:
17: - 3D elements
18: - Different quadrature (1pt and 3pt)
19: - Different basis (linear and quadratic)
20: #endif
21: */
23: #include<petscdmplex.h>
24: #include<petscsnes.h>
26: typedef enum {LAPLACIAN, ELASTICITY} OpType;
28: typedef struct {
29: DM dm; /* REQUIRED in order to use SNES evaluation functions */
30: PetscInt debug; /* The debugging level */
31: PetscMPIInt rank; /* The process rank */
32: PetscMPIInt numProcs; /* The number of processes */
33: PetscInt dim; /* The topological mesh dimension */
34: PetscBool interpolate; /* Generate intermediate mesh elements */
35: PetscReal refinementLimit; /* The largest allowable cell volume */
36: char partitioner[2048]; /* The graph partitioner */
37: PetscBool computeFunction; /* The flag for computing a residual */
38: PetscBool computeJacobian; /* The flag for computing a Jacobian */
39: PetscBool batch; /* The flag for batch assembly */
40: PetscBool gpu; /* The flag for GPU integration */
41: OpType op; /* The type of PDE operator (should use FFC/Ignition here) */
42: PetscBool showResidual, showJacobian;
43: PetscLogEvent createMeshEvent, residualEvent, residualBatchEvent, jacobianEvent, jacobianBatchEvent, integrateBatchCPUEvent, integrateBatchGPUEvent, integrateGPUOnlyEvent;
44: /* GPU partitioning */
45: PetscInt numBatches; /* The number of cell batches per kernel */
46: PetscInt numBlocks; /* The number of concurrent blocks per kernel */
47: /* Element quadrature */
48: PetscQuadrature q;
49: } AppCtx;
51: /*------------------------------------------------------------------------------
52: This code can be generated using 'bin/pythonscripts/PetscGenerateFEMQuadrature.py dim 1 numComp 1 laplacian src/snes/examples/tutorials/ex52.h'
53: -----------------------------------------------------------------------------*/
54: #include "ex52.h"
56: void quadratic_2d(const PetscReal x[], PetscScalar u[])
57: {
58: u[0] = x[0]*x[0] + x[1]*x[1];
59: };
61: void quadratic_2d_elas(const PetscReal x[], PetscScalar u[])
62: {
63: u[0] = x[0]*x[0] + x[1]*x[1];
64: u[1] = x[0]*x[0] + x[1]*x[1];
65: };
69: PetscErrorCode ProcessOptions(MPI_Comm comm, AppCtx *options)
70: {
71: const char *opTypes[2] = {"laplacian", "elasticity"};
72: PetscInt op;
76: options->debug = 0;
77: options->dim = 2;
78: options->interpolate = PETSC_FALSE;
79: options->refinementLimit = 0.0;
80: options->computeFunction = PETSC_FALSE;
81: options->computeJacobian = PETSC_FALSE;
82: options->batch = PETSC_FALSE;
83: options->gpu = PETSC_FALSE;
84: options->numBatches = 1;
85: options->numBlocks = 1;
86: options->op = LAPLACIAN;
87: options->showResidual = PETSC_TRUE;
88: options->showJacobian = PETSC_TRUE;
90: MPI_Comm_size(comm, &options->numProcs);
91: MPI_Comm_rank(comm, &options->rank);
92: PetscOptionsBegin(comm, "", "Bratu Problem Options", "DMPLEX");
93: PetscOptionsInt("-debug", "The debugging level", "ex52.c", options->debug, &options->debug, NULL);
94: PetscOptionsInt("-dim", "The topological mesh dimension", "ex52.c", options->dim, &options->dim, NULL);
95: PetscOptionsBool("-interpolate", "Generate intermediate mesh elements", "ex52.c", options->interpolate, &options->interpolate, NULL);
96: PetscOptionsReal("-refinement_limit", "The largest allowable cell volume", "ex52.c", options->refinementLimit, &options->refinementLimit, NULL);
97: PetscStrcpy(options->partitioner, "chaco");
98: PetscOptionsString("-partitioner", "The graph partitioner", "ex52.c", options->partitioner, options->partitioner, 2048, NULL);
99: PetscOptionsBool("-compute_function", "Compute the residual", "ex52.c", options->computeFunction, &options->computeFunction, NULL);
100: PetscOptionsBool("-compute_jacobian", "Compute the Jacobian", "ex52.c", options->computeJacobian, &options->computeJacobian, NULL);
101: PetscOptionsBool("-batch", "Use the batch assembly method", "ex52.c", options->batch, &options->batch, NULL);
102: PetscOptionsBool("-gpu", "Use the GPU for integration method", "ex52.c", options->gpu, &options->gpu, NULL);
103: PetscOptionsInt("-gpu_batches", "The number of cell batches per kernel", "ex52.c", options->numBatches, &options->numBatches, NULL);
104: PetscOptionsInt("-gpu_blocks", "The number of concurrent blocks per kernel", "ex52.c", options->numBlocks, &options->numBlocks, NULL);
106: op = options->op;
108: PetscOptionsEList("-op_type","Type of PDE operator","ex52.c",opTypes,2,opTypes[options->op],&op,NULL);
110: options->op = (OpType) op;
112: PetscOptionsBool("-show_residual", "Output the residual for verification", "ex52.c", options->showResidual, &options->showResidual, NULL);
113: PetscOptionsBool("-show_jacobian", "Output the Jacobian for verification", "ex52.c", options->showJacobian, &options->showJacobian, NULL);
114: PetscOptionsEnd();
116: PetscLogEventRegister("CreateMesh", DM_CLASSID, &options->createMeshEvent);
117: PetscLogEventRegister("Residual", SNES_CLASSID, &options->residualEvent);
118: PetscLogEventRegister("ResidualBatch", SNES_CLASSID, &options->residualBatchEvent);
119: PetscLogEventRegister("Jacobian", SNES_CLASSID, &options->jacobianEvent);
120: PetscLogEventRegister("JacobianBatch", SNES_CLASSID, &options->jacobianBatchEvent);
121: PetscLogEventRegister("IntegBatchCPU", SNES_CLASSID, &options->integrateBatchCPUEvent);
122: PetscLogEventRegister("IntegBatchGPU", SNES_CLASSID, &options->integrateBatchGPUEvent);
123: PetscLogEventRegister("IntegGPUOnly", SNES_CLASSID, &options->integrateGPUOnlyEvent);
124: return(0);
125: };
129: PetscErrorCode CreateMesh(MPI_Comm comm, AppCtx *user, DM *dm)
130: {
131: PetscInt dim = user->dim;
132: PetscBool interpolate = user->interpolate;
133: PetscReal refinementLimit = user->refinementLimit;
134: const char *partitioner = user->partitioner;
138: PetscLogEventBegin(user->createMeshEvent,0,0,0,0);
139: DMPlexCreateBoxMesh(comm, dim, interpolate, dm);
140: {
141: DM refinedMesh = NULL;
142: DM distributedMesh = NULL;
144: /* Refine mesh using a volume constraint */
145: DMPlexSetRefinementLimit(*dm, refinementLimit);
146: DMRefine(*dm, comm, &refinedMesh);
147: if (refinedMesh) {
148: DMDestroy(dm);
149: *dm = refinedMesh;
150: }
151: /* Distribute mesh over processes */
152: DMPlexDistribute(*dm, partitioner, 0, &distributedMesh);
153: if (distributedMesh) {
154: DMDestroy(dm);
155: *dm = distributedMesh;
156: }
157: }
158: DMSetFromOptions(*dm);
159: PetscLogEventEnd(user->createMeshEvent,0,0,0,0);
161: user->dm = *dm;
162: return(0);
163: }
167: PetscErrorCode SetupQuadrature(AppCtx *user)
168: {
170: switch (user->dim) {
171: case 2:
172: case 3:
173: user->q.numQuadPoints = NUM_QUADRATURE_POINTS_0;
174: user->q.quadPoints = points_0;
175: user->q.quadWeights = weights_0;
176: user->q.numBasisFuncs = NUM_BASIS_FUNCTIONS_0;
177: user->q.numComponents = NUM_BASIS_COMPONENTS_0;
178: user->q.basis = Basis_0;
179: user->q.basisDer = BasisDerivatives_0;
180: break;
181: default:
182: SETERRQ1(PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Invalid dimension %d", user->dim);
183: }
184: return(0);
185: }
189: PetscErrorCode SetupSection(DM dm, AppCtx *user)
190: {
191: PetscSection section;
192: /* These can be generated using config/PETSc/FEM.py */
193: PetscInt dim = user->dim;
194: PetscInt numBC = 0;
195: PetscInt numComp[1] = {NUM_BASIS_COMPONENTS_0};
199: if (dim != SPATIAL_DIM_0) SETERRQ2(PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_SIZ, "Spatial dimension %d should be %d", dim, SPATIAL_DIM_0);
200: DMPlexCreateSection(dm, dim, 1, numComp, numDof_0, numBC, NULL, NULL, §ion);
201: DMSetDefaultSection(dm, section);
202: return(0);
203: }
207: /*
208: FormInitialGuess - Forms initial approximation.
210: Input Parameters:
211: + user - user-defined application context
212: - guessFunc - The coordinate function to use for the guess
214: Output Parameter:
215: . X - vector
216: */
217: PetscErrorCode FormInitialGuess(Vec X, void (*guessFunc)(const PetscReal [], PetscScalar []), InsertMode mode, AppCtx *user)
218: {
219: Vec localX, coordinates;
220: PetscSection section, cSection;
221: PetscInt vStart, vEnd, v;
225: DMGetLocalVector(user->dm, &localX);
226: DMPlexGetDepthStratum(user->dm, 0, &vStart, &vEnd);
227: DMGetDefaultSection(user->dm, §ion);
228: DMPlexGetCoordinateSection(user->dm, &cSection);
229: DMGetCoordinatesLocal(user->dm, &coordinates);
230: for (v = vStart; v < vEnd; ++v) {
231: PetscScalar values[3];
232: PetscScalar *coords;
234: VecGetValuesSection(coordinates, cSection, v, &coords);
235: (*guessFunc)(coords, values);
236: VecSetValuesSection(localX, section, v, values, mode);
237: }
239: DMLocalToGlobalBegin(user->dm, localX, INSERT_VALUES, X);
240: DMLocalToGlobalEnd(user->dm, localX, INSERT_VALUES, X);
241: DMRestoreLocalVector(user->dm, &localX);
242: return(0);
243: }
247: /*
248: dm - The mesh
249: X - Local intput vector
250: F - Local output vector
251: */
252: PetscErrorCode FormFunctionLocalLaplacian(DM dm, Vec X, Vec F, AppCtx *user)
253: {
254: /*PetscScalar (*rhsFunc)(const PetscReal []) = user->rhsFunc; */
255: const PetscInt debug = user->debug;
256: const PetscInt dim = user->dim;
257: const PetscInt numQuadPoints = user->q.numQuadPoints;
258: const PetscReal *quadPoints = user->q.quadPoints;
259: const PetscReal *quadWeights = user->q.quadWeights;
260: const PetscInt numBasisFuncs = user->q.numBasisFuncs;
261: const PetscReal *basis = user->q.basis;
262: const PetscReal *basisDer = user->q.basisDer;
263: PetscReal *coords, *v0, *J, *invJ, detJ;
264: PetscScalar *realSpaceDer, *fieldGrad, *elemVec;
265: PetscInt cStart, cEnd, c, p;
266: PetscErrorCode ierr;
269: PetscLogEventBegin(user->residualEvent,0,0,0,0);
270: VecSet(F, 0.0);
271: PetscMalloc3(dim,PetscScalar,&realSpaceDer,dim,PetscScalar,&fieldGrad,numBasisFuncs,PetscScalar,&elemVec);
272: PetscMalloc4(dim,PetscReal,&coords,dim,PetscReal,&v0,dim*dim,PetscReal,&J,dim*dim,PetscReal,&invJ);
273: DMPlexGetHeightStratum(dm, 0, &cStart, &cEnd);
274: for (c = cStart; c < cEnd; ++c) {
275: PetscScalar *x;
276: PetscInt q, f, d, e;
278: PetscMemzero(elemVec, numBasisFuncs * sizeof(PetscScalar));
279: DMPlexComputeCellGeometry(dm, c, v0, J, invJ, &detJ);
280: if (detJ <= 0.0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid determinant %g for element %d", detJ, c);
281: DMPlexVecGetClosure(dm, NULL, X, c, NULL, &x);
282: if (debug > 1) {DMPrintCellVector(c, "Solution", numBasisFuncs, x);}
284: for (q = 0; q < numQuadPoints; ++q) {
285: PetscScalar fieldVal = 0.0;
287: if (debug > 1) {PetscPrintf(PETSC_COMM_SELF, " quad point %d\n", q);}
288: for (d = 0; d < dim; ++d) {
289: fieldGrad[d] = 0.0;
290: coords[d] = v0[d];
291: for (e = 0; e < dim; ++e) coords[d] += J[d*dim+e]*(quadPoints[q*dim+e] + 1.0);
292: if (debug > 1) {PetscPrintf(PETSC_COMM_SELF, " coords[%d] %g\n", d, coords[d]);}
293: }
294: for (f = 0; f < numBasisFuncs; ++f) {
295: fieldVal += x[f]*basis[q*numBasisFuncs+f];
297: for (d = 0; d < dim; ++d) {
298: realSpaceDer[d] = 0.0;
299: for (e = 0; e < dim; ++e) realSpaceDer[d] += invJ[e*dim+d]*basisDer[(q*numBasisFuncs+f)*dim+e];
300: fieldGrad[d] += realSpaceDer[d]*x[f];
301: }
302: }
303: if (debug > 1) {
304: for (d = 0; d < dim; ++d) PetscPrintf(PETSC_COMM_SELF, " fieldGrad[%d] %g\n", d, fieldGrad[d]);
305: }
306: const PetscScalar funcVal = 0.0; /* (*rhsFunc)(coords); */
307: for (f = 0; f < numBasisFuncs; ++f) {
308: /* Constant term: -f(x) */
309: elemVec[f] -= basis[q*numBasisFuncs+f]*funcVal*quadWeights[q]*detJ;
310: /* Linear term: -\Delta u */
311: PetscScalar product = 0.0;
312: for (d = 0; d < dim; ++d) {
313: realSpaceDer[d] = 0.0;
314: for (e = 0; e < dim; ++e) realSpaceDer[d] += invJ[e*dim+d]*basisDer[(q*numBasisFuncs+f)*dim+e];
315: product += realSpaceDer[d]*fieldGrad[d];
316: }
317: elemVec[f] += product*quadWeights[q]*detJ;
318: /* Nonlinear term: -\lambda e^{u} */
319: elemVec[f] -= basis[q*numBasisFuncs+f]*0.0*PetscExpScalar(fieldVal)*quadWeights[q]*detJ;
320: }
321: }
322: DMPlexVecRestoreClosure(dm, NULL, X, c, NULL, &x);
323: if (debug) {DMPrintCellVector(c, "Residual", numBasisFuncs, elemVec);}
324: DMPlexVecSetClosure(dm, NULL, F, c, elemVec, ADD_VALUES);
325: }
326: PetscLogFlops((cEnd-cStart)*numQuadPoints*numBasisFuncs*(dim*(dim*5+4)+14));
327: PetscFree3(realSpaceDer,fieldGrad,elemVec);
328: PetscFree4(coords,v0,J,invJ);
330: PetscPrintf(PETSC_COMM_WORLD, "Residual:\n");
331: for (p = 0; p < user->numProcs; ++p) {
332: if (p == user->rank) {VecView(F, PETSC_VIEWER_STDOUT_SELF);}
333: PetscBarrier((PetscObject) dm);
334: }
335: PetscLogEventEnd(user->residualEvent,0,0,0,0);
336: return(0);
337: }
341: /*
342: dm - The mesh
343: X - Local intput vector
344: F - Local output vector
345: */
346: PetscErrorCode FormFunctionLocalElasticity(DM dm, Vec X, Vec F, AppCtx *user)
347: {
348: /*PetscScalar (*rhsFunc)(const PetscReal []) = user->rhsFunc;*/
349: const PetscInt debug = user->debug;
350: const PetscInt dim = user->dim;
351: const PetscInt numQuadPoints = user->q.numQuadPoints;
352: const PetscReal *quadPoints = user->q.quadPoints;
353: const PetscReal *quadWeights = user->q.quadWeights;
354: const PetscInt numBasisFuncs = user->q.numBasisFuncs;
355: const PetscInt numBasisComps = user->q.numComponents;
356: const PetscReal *basis = user->q.basis;
357: const PetscReal *basisDer = user->q.basisDer;
358: PetscReal *coords, *v0, *J, *invJ, detJ;
359: PetscScalar *realSpaceDer, *fieldGrad, *elemVec;
360: PetscInt cStart, cEnd, c, p;
361: PetscErrorCode ierr;
364: PetscLogEventBegin(user->residualEvent,0,0,0,0);
365: VecSet(F, 0.0);
366: PetscMalloc3(dim,PetscScalar,&realSpaceDer,dim*numBasisComps,PetscScalar,&fieldGrad,numBasisFuncs*numBasisComps,PetscScalar,&elemVec);
367: PetscMalloc4(dim,PetscReal,&coords,dim,PetscReal,&v0,dim*dim,PetscReal,&J,dim*dim,PetscReal,&invJ);
368: DMPlexGetHeightStratum(dm, 0, &cStart, &cEnd);
369: for (c = cStart; c < cEnd; ++c) {
370: PetscScalar *x;
371: PetscInt q, d, e, f, comp;
373: PetscMemzero(elemVec, numBasisFuncs*numBasisComps * sizeof(PetscScalar));
374: DMPlexComputeCellGeometry(dm, c, v0, J, invJ, &detJ);
375: if (detJ <= 0.0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid determinant %g for element %d", detJ, c);
376: DMPlexVecGetClosure(dm, NULL, X, c, NULL, &x);
377: if (debug > 1) {DMPrintCellVector(c, "Solution", numBasisFuncs*numBasisComps, x);}
379: for (q = 0; q < numQuadPoints; ++q) {
380: PetscScalar fieldVal[3] = {0.0,0.0,0.0};
382: if (debug > 1) {PetscPrintf(PETSC_COMM_SELF, " quad point %d\n", q);}
383: for (d = 0; d < dim; ++d) {
384: for (comp = 0; comp < numBasisComps; ++comp) fieldGrad[comp*dim+d] = 0.0;
385: coords[d] = v0[d];
386: for (e = 0; e < dim; ++e) coords[d] += J[d*dim+e]*(quadPoints[q*dim+e] + 1.0);
387: if (debug > 1) {PetscPrintf(PETSC_COMM_SELF, " coords[%d] %g\n", d, coords[d]);}
388: }
389: for (f = 0; f < numBasisFuncs; ++f) {
390: for (comp = 0; comp < numBasisComps; ++comp) {
391: const PetscInt cidx = f*numBasisComps+comp;
393: fieldVal[comp] += x[cidx]*basis[q*numBasisFuncs*numBasisComps+cidx];
395: for (d = 0; d < dim; ++d) {
396: realSpaceDer[d] = 0.0;
397: for (e = 0; e < dim; ++e) realSpaceDer[d] += invJ[e*dim+d]*basisDer[(q*numBasisFuncs*numBasisComps+cidx)*dim+e];
398: fieldGrad[comp*dim+d] += realSpaceDer[d]*x[cidx];
399: }
400: }
401: }
402: if (debug > 1) {
403: for (comp = 0; comp < numBasisComps; ++comp) {
404: for (d = 0; d < dim; ++d) {
405: PetscPrintf(PETSC_COMM_SELF, " field %d gradient[%d] %g\n", comp, d, fieldGrad[comp*dim+d]);
406: }
407: }
408: }
409: const PetscScalar funcVal[3] = {0.0, 0.0, 0.0}; /* (*rhsFunc)(coords); */
410: for (f = 0; f < numBasisFuncs; ++f) {
411: for (comp = 0; comp < numBasisComps; ++comp) {
412: const PetscInt cidx = f*numBasisComps+comp;
414: /* Constant term: -f(x) */
415: elemVec[cidx] -= basis[q*numBasisFuncs*numBasisComps+cidx]*funcVal[comp]*quadWeights[q]*detJ;
416: /* Linear term: -\Delta u */
417: PetscScalar product = 0.0;
418: for (d = 0; d < dim; ++d) {
419: realSpaceDer[d] = 0.0;
420: for (e = 0; e < dim; ++e) {
421: realSpaceDer[d] += invJ[e*dim+d]*basisDer[(q*numBasisFuncs*numBasisComps+cidx)*dim+e];
422: }
423: product += realSpaceDer[d]*0.5*(fieldGrad[comp*dim+d] + fieldGrad[d*dim+comp]);
424: }
425: elemVec[cidx] += product*quadWeights[q]*detJ;
426: /* Nonlinear term: -\lambda e^{u} */
427: elemVec[cidx] -= basis[q*numBasisFuncs*numBasisComps+cidx]*0.0*PetscExpScalar(fieldVal[comp])*quadWeights[q]*detJ;
428: }
429: }
430: }
431: DMPlexVecRestoreClosure(dm, NULL, X, c, NULL, &x);
432: if (debug) {DMPrintCellVector(c, "Residual", numBasisFuncs*numBasisComps, elemVec);}
433: DMPlexVecSetClosure(dm, NULL, F, c, elemVec, ADD_VALUES);
434: }
435: PetscLogFlops((cEnd-cStart)*numQuadPoints*numBasisFuncs*(dim*(dim*5+4)+14));
436: PetscFree3(realSpaceDer,fieldGrad,elemVec);
437: PetscFree4(coords,v0,J,invJ);
439: PetscPrintf(PETSC_COMM_WORLD, "Residual:\n");
440: for (p = 0; p < user->numProcs; ++p) {
441: if (p == user->rank) {VecView(F, PETSC_VIEWER_STDOUT_SELF);}
442: PetscBarrier((PetscObject) dm);
443: }
444: PetscLogEventEnd(user->residualEvent,0,0,0,0);
445: return(0);
446: }
448: PETSC_EXTERN PetscErrorCode IntegrateElementBatchGPU(PetscInt Ne, PetscInt Nbatch, PetscInt Nbc, PetscInt Nbl, const PetscScalar coefficients[], const PetscReal jacobianInverses[], const PetscReal jacobianDeterminants[], PetscScalar elemVec[], PetscLogEvent event, PetscInt debug);
450: void f1_laplacian(PetscScalar u, const PetscScalar gradU[], PetscScalar f1[])
451: {
452: const PetscInt dim = SPATIAL_DIM_0;
453: PetscInt d;
455: for (d = 0; d < dim; ++d) f1[d] = gradU[d];
456: return;
457: }
461: PetscErrorCode IntegrateLaplacianBatchCPU(PetscInt Ne, PetscInt Nb, const PetscScalar coefficients[], const PetscReal jacobianInverses[], const PetscReal jacobianDeterminants[], PetscInt Nq, const PetscReal quadPoints[], const PetscReal quadWeights[], const PetscReal basisTabulation[], const PetscReal basisDerTabulation[], PetscScalar elemVec[], AppCtx *user)
462: {
463: const PetscInt debug = user->debug;
464: const PetscInt dim = SPATIAL_DIM_0;
465: PetscInt e;
469: PetscLogEventBegin(user->integrateBatchCPUEvent,0,0,0,0);
470: for (e = 0; e < Ne; ++e) {
471: const PetscReal detJ = jacobianDeterminants[e];
472: const PetscReal *invJ = &jacobianInverses[e*dim*dim];
473: PetscScalar f1[SPATIAL_DIM_0 /*1*/]; /* Nq = 1 */
474: PetscInt q, b, d, f;
476: if (debug > 1) {
477: PetscPrintf(PETSC_COMM_SELF, " detJ: %g\n", detJ);
478: PetscPrintf(PETSC_COMM_SELF, " invJ: %g %g %g %g\n", invJ[0], invJ[1], invJ[2], invJ[3]);
479: }
480: for (q = 0; q < Nq; ++q) {
481: if (debug) {PetscPrintf(PETSC_COMM_SELF, " quad point %d\n", q);}
482: PetscScalar u = 0.0;
483: PetscScalar gradU[SPATIAL_DIM_0];
485: for (d = 0; d < dim; ++d) gradU[d] = 0.0;
486: for (b = 0; b < Nb; ++b) {
487: PetscScalar realSpaceDer[SPATIAL_DIM_0];
489: u += coefficients[e*Nb+b]*basisTabulation[q*Nb+b];
490: for (d = 0; d < dim; ++d) {
491: realSpaceDer[d] = 0.0;
492: for (f = 0; f < dim; ++f) {
493: realSpaceDer[d] += invJ[f*dim+d]*basisDerTabulation[(q*Nb+b)*dim+f];
494: }
495: gradU[d] += coefficients[e*Nb+b]*realSpaceDer[d];
496: }
497: }
498: f1_laplacian(u, gradU, &f1[q*dim]);
499: f1[q*dim+0] *= detJ*quadWeights[q];
500: f1[q*dim+1] *= detJ*quadWeights[q];
501: if (debug > 1) {
502: PetscPrintf(PETSC_COMM_SELF, " f1[%d]: %g\n", 0, f1[q*dim+0]);
503: PetscPrintf(PETSC_COMM_SELF, " f1[%d]: %g\n", 1, f1[q*dim+1]);
504: }
505: }
506: for (b = 0; b < Nb; ++b) {
507: elemVec[e*Nb+b] = 0.0;
508: for (q = 0; q < Nq; ++q) {
509: PetscScalar realSpaceDer[SPATIAL_DIM_0];
511: for (d = 0; d < dim; ++d) {
512: realSpaceDer[d] = 0.0;
513: for (f = 0; f < dim; ++f) {
514: realSpaceDer[d] += invJ[f*dim+d]*basisDerTabulation[(q*Nb+b)*dim+f];
515: }
516: elemVec[e*Nb+b] += realSpaceDer[d]*f1[q*dim+d];
517: }
518: }
519: }
520: }
521: PetscLogFlops((((2+(2+2*dim)*dim)*Nb+2*dim)*Nq + (2+2*dim)*dim*Nq*Nb)*Ne);
522: PetscLogEventEnd(user->integrateBatchCPUEvent,0,0,0,0);
523: return(0);
524: };
526: void f1_elasticity(PetscScalar u[], const PetscScalar gradU[], PetscScalar f1[])
527: {
528: const PetscInt dim = SPATIAL_DIM_0;
529: const PetscInt Ncomp = dim;
530: PetscInt comp, d;
532: for (comp = 0; comp < Ncomp; ++comp) {
533: for (d = 0; d < dim; ++d) {
534: f1[comp*dim+d] = 0.5*(gradU[comp*dim+d] + gradU[d*dim+comp]);
535: }
536: }
537: return;
538: }
542: PetscErrorCode IntegrateElasticityBatchCPU(PetscInt Ne, PetscInt Nb, PetscInt Ncomp, const PetscScalar coefficients[], const PetscReal jacobianInverses[], const PetscReal jacobianDeterminants[], PetscInt Nq, const PetscReal quadPoints[], const PetscReal quadWeights[], const PetscReal basisTabulation[], const PetscReal basisDerTabulation[], PetscScalar elemVec[], AppCtx *user)
543: {
544: const PetscInt debug = user->debug;
545: const PetscInt dim = SPATIAL_DIM_0;
546: PetscInt e;
550: PetscLogEventBegin(user->integrateBatchCPUEvent,0,0,0,0);
551: for (e = 0; e < Ne; ++e) {
552: const PetscReal detJ = jacobianDeterminants[e];
553: const PetscReal *invJ = &jacobianInverses[e*dim*dim];
554: PetscScalar f1[SPATIAL_DIM_0*SPATIAL_DIM_0 /*1*/]; /* Nq = 1 */
555: PetscInt q, b, comp, i;
557: if (debug > 1) {
558: PetscPrintf(PETSC_COMM_SELF, " detJ: %g\n", detJ);
559: PetscPrintf(PETSC_COMM_SELF, " invJ:");
560: for (i = 0; i < dim*dim; ++i) {
561: PetscPrintf(PETSC_COMM_SELF, " %g", invJ[i]);
562: }
563: PetscPrintf(PETSC_COMM_SELF, "\n");
564: }
565: for (q = 0; q < Nq; ++q) {
566: if (debug) {PetscPrintf(PETSC_COMM_SELF, " quad point %d\n", q);}
567: PetscScalar u[SPATIAL_DIM_0];
568: PetscScalar gradU[SPATIAL_DIM_0*SPATIAL_DIM_0];
570: for (i = 0; i < dim; ++i) u[i] = 0.0;
571: for (i = 0; i < dim*dim; ++i) gradU[i] = 0.0;
572: for (b = 0; b < Nb; ++b) {
573: for (comp = 0; comp < Ncomp; ++comp) {
574: const PetscInt cidx = b*Ncomp+comp;
575: PetscScalar realSpaceDer[SPATIAL_DIM_0];
576: PetscInt d, f;
578: u[comp] += coefficients[e*Nb*Ncomp+cidx]*basisTabulation[q*Nb*Ncomp+cidx];
579: for (d = 0; d < dim; ++d) {
580: realSpaceDer[d] = 0.0;
581: for (f = 0; f < dim; ++f) {
582: realSpaceDer[d] += invJ[f*dim+d]*basisDerTabulation[(q*Nb*Ncomp+cidx)*dim+f];
583: }
584: gradU[comp*dim+d] += coefficients[e*Nb*Ncomp+cidx]*realSpaceDer[d];
585: }
586: }
587: }
588: f1_elasticity(u, gradU, &f1[q*Ncomp*dim]);
589: for (i = 0; i < Ncomp*dim; ++i) {
590: f1[q*Ncomp*dim+i] *= detJ*quadWeights[q];
591: }
592: if (debug > 1) {
593: PetscInt c, d;
594: for (c = 0; c < Ncomp; ++c) {
595: for (d = 0; d < dim; ++d) {
596: PetscPrintf(PETSC_COMM_SELF, " gradU[%d]_%c: %g f1[%d]_%c: %g\n", c, 'x'+d, gradU[c*dim+d], c, 'x'+d, f1[(q*Ncomp + c)*dim+d]);
597: }
598: }
599: }
600: }
601: for (b = 0; b < Nb; ++b) {
602: for (comp = 0; comp < Ncomp; ++comp) {
603: const PetscInt cidx = b*Ncomp+comp;
604: PetscInt q, d, f;
606: elemVec[e*Nb*Ncomp+cidx] = 0.0;
607: for (q = 0; q < Nq; ++q) {
608: PetscScalar realSpaceDer[SPATIAL_DIM_0];
610: for (d = 0; d < dim; ++d) {
611: realSpaceDer[d] = 0.0;
612: for (f = 0; f < dim; ++f) {
613: realSpaceDer[d] += invJ[f*dim+d]*basisDerTabulation[(q*Nb*Ncomp+cidx)*dim+f];
614: }
615: elemVec[e*Nb*Ncomp+cidx] += realSpaceDer[d]*f1[(q*Ncomp+comp)*dim+d];
616: }
617: }
618: }
619: }
620: }
621: PetscLogFlops((((2+(2+2*dim)*dim)*Ncomp*Nb+(2+2)*dim*Ncomp)*Nq + (2+2*dim)*dim*Nq*Ncomp*Nb)*Ne);
622: PetscLogEventEnd(user->integrateBatchCPUEvent,0,0,0,0);
623: return(0);
624: };
628: /*
629: dm - The mesh
630: X - Local intput vector
631: F - Local output vector
632: */
633: PetscErrorCode FormFunctionLocalBatch(DM dm, Vec X, Vec F, AppCtx *user)
634: {
635: /* PetscScalar (*rhsFunc)(const PetscReal []) = user->rhsFunc; */
636: const PetscInt debug = user->debug;
637: const PetscInt dim = user->dim;
638: const PetscInt numQuadPoints = user->q.numQuadPoints;
639: const PetscReal *quadPoints = user->q.quadPoints;
640: const PetscReal *quadWeights = user->q.quadWeights;
641: const PetscInt numBasisFuncs = user->q.numBasisFuncs;
642: const PetscInt numBasisComps = user->q.numComponents;
643: const PetscReal *basis = user->q.basis;
644: const PetscReal *basisDer = user->q.basisDer;
645: PetscReal *coords, *v0, *J, *invJ, *detJ;
646: PetscScalar *elemVec;
647: PetscInt cStart, cEnd, c;
648: PetscErrorCode ierr;
651: PetscLogEventBegin(user->residualBatchEvent,0,0,0,0);
652: VecSet(F, 0.0);
653: /* PetscMalloc3(dim,PetscScalar,&realSpaceDer,dim,PetscScalar,&fieldGrad,numBasisFuncs,PetscScalar,&elemVec); */
654: PetscMalloc3(dim,PetscReal,&coords,dim,PetscReal,&v0,dim*dim,PetscReal,&J);
655: DMPlexGetHeightStratum(dm, 0, &cStart, &cEnd);
656: const PetscInt numCells = cEnd - cStart;
657: PetscScalar *u;
659: PetscMalloc4(numCells*numBasisFuncs*numBasisComps,PetscScalar,&u,numCells*dim*dim,PetscReal,&invJ,numCells,PetscReal,&detJ,numCells*numBasisFuncs*numBasisComps,PetscScalar,&elemVec);
660: for (c = cStart; c < cEnd; ++c) {
661: PetscScalar *x;
662: PetscInt f;
664: DMPlexComputeCellGeometry(dm, c, v0, J, &invJ[c*dim*dim], &detJ[c]);
665: DMPlexVecGetClosure(dm, NULL, X, c, NULL, &x);
667: for (f = 0; f < numBasisFuncs*numBasisComps; ++f) {
668: u[c*numBasisFuncs*numBasisComps+f] = x[f];
669: }
670: DMPlexVecRestoreClosure(dm, NULL, X, c, NULL, &x);
671: }
672: /* Conforming batches */
673: PetscInt blockSize = numBasisFuncs*numQuadPoints;
674: PetscInt numBlocks = user->numBlocks;
675: PetscInt batchSize = numBlocks * blockSize;
676: PetscInt numBatches = user->numBatches;
677: PetscInt numChunks = numCells / (numBatches*batchSize);
678: if (user->gpu) {
679: PetscLogEventBegin(user->integrateBatchGPUEvent,0,0,0,0);
680: IntegrateElementBatchGPU(numChunks*numBatches*batchSize, numBatches, batchSize, numBlocks, u, invJ, detJ, elemVec, user->integrateGPUOnlyEvent, user->debug);
681: PetscLogFlops((((2+(2+2*dim)*dim)*numBasisComps*numBasisFuncs+(2+2)*dim*numBasisComps)*numQuadPoints + (2+2*dim)*dim*numQuadPoints*numBasisComps*numBasisFuncs)*numChunks*numBatches*batchSize);
682: PetscLogEventEnd(user->integrateBatchGPUEvent,0,0,0,0);
683: } else {
684: switch (user->op) {
685: case LAPLACIAN:
686: IntegrateLaplacianBatchCPU(numChunks*numBatches*batchSize, numBasisFuncs, u, invJ, detJ, numQuadPoints, quadPoints, quadWeights, basis, basisDer, elemVec, user);break;
687: case ELASTICITY:
688: IntegrateElasticityBatchCPU(numChunks*numBatches*batchSize, numBasisFuncs, numBasisComps, u, invJ, detJ, numQuadPoints, quadPoints, quadWeights, basis, basisDer, elemVec, user);break;
689: default:
690: SETERRQ1(PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Invalid PDE operator %d", user->op);
691: }
692: }
693: /* Remainder */
694: PetscInt numRemainder = numCells % (numBatches * batchSize);
695: PetscInt offset = numCells - numRemainder;
696: switch (user->op) {
697: case LAPLACIAN:
698: IntegrateLaplacianBatchCPU(numRemainder, numBasisFuncs, &u[offset*numBasisFuncs*numBasisComps], &invJ[offset*dim*dim], &detJ[offset],
699: numQuadPoints, quadPoints, quadWeights, basis, basisDer, &elemVec[offset*numBasisFuncs*numBasisComps], user);break;
700: case ELASTICITY:
701: IntegrateElasticityBatchCPU(numRemainder, numBasisFuncs, numBasisComps, &u[offset*numBasisFuncs*numBasisComps], &invJ[offset*dim*dim], &detJ[offset],
702: numQuadPoints, quadPoints, quadWeights, basis, basisDer, &elemVec[offset*numBasisFuncs*numBasisComps], user);break;
703: default:
704: SETERRQ1(PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Invalid PDE operator %d", user->op);
705: }
706: for (c = cStart; c < cEnd; ++c) {
707: if (debug) {DMPrintCellVector(c, "Residual", numBasisFuncs*numBasisComps, &elemVec[c*numBasisFuncs*numBasisComps]);}
708: DMPlexVecSetClosure(dm, NULL, F, c, &elemVec[c*numBasisFuncs*numBasisComps], ADD_VALUES);
709: }
710: PetscFree4(u,invJ,detJ,elemVec);
711: PetscFree3(coords,v0,J);
712: if (user->showResidual) {
713: PetscInt p;
714: PetscPrintf(PETSC_COMM_WORLD, "Residual:\n");
715: for (p = 0; p < user->numProcs; ++p) {
716: if (p == user->rank) {VecView(F, PETSC_VIEWER_STDOUT_SELF);}
717: PetscBarrier((PetscObject) dm);
718: }
719: }
720: PetscLogEventEnd(user->residualBatchEvent,0,0,0,0);
721: return(0);
722: }
726: /*
727: dm - The mesh
728: X - The local input vector
729: Jac - The output matrix
730: */
731: PetscErrorCode FormJacobianLocalLaplacian(DM dm, Vec X, Mat Jac, Mat JacPre, MatStructure *flag, AppCtx *user)
732: {
733: const PetscInt debug = user->debug;
734: const PetscInt dim = user->dim;
735: const PetscInt numQuadPoints = user->q.numQuadPoints;
736: const PetscReal *quadWeights = user->q.quadWeights;
737: const PetscInt numBasisFuncs = user->q.numBasisFuncs;
738: const PetscReal *basis = user->q.basis;
739: const PetscReal *basisDer = user->q.basisDer;
740: PetscReal *v0, *J, *invJ, detJ;
741: PetscScalar *realSpaceTestDer, *realSpaceBasisDer, *elemMat;
742: PetscInt cStart, cEnd, c;
743: PetscErrorCode ierr;
746: PetscLogEventBegin(user->jacobianEvent,0,0,0,0);
747: MatZeroEntries(Jac);
748: PetscMalloc3(dim,PetscScalar,&realSpaceTestDer,dim,PetscScalar,&realSpaceBasisDer,numBasisFuncs*numBasisFuncs,PetscScalar,&elemMat);
749: PetscMalloc3(dim,PetscReal,&v0,dim*dim,PetscReal,&J,dim*dim,PetscReal,&invJ);
750: DMPlexGetHeightStratum(dm, 0, &cStart, &cEnd);
751: for (c = cStart; c < cEnd; ++c) {
752: PetscScalar *x;
753: PetscInt q;
755: PetscMemzero(elemMat, numBasisFuncs*numBasisFuncs * sizeof(PetscScalar));
756: DMPlexComputeCellGeometry(dm, c, v0, J, invJ, &detJ);
757: if (detJ <= 0.0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid determinant %g for element %d", detJ, c);
758: DMPlexVecGetClosure(dm, NULL, X, c, NULL, &x);
760: for (q = 0; q < numQuadPoints; ++q) {
761: PetscScalar fieldVal = 0.0;
762: PetscInt f, g, d, e;
764: for (f = 0; f < numBasisFuncs; ++f) fieldVal += x[f]*basis[q*numBasisFuncs+f];
765: for (f = 0; f < numBasisFuncs; ++f) {
766: for (d = 0; d < dim; ++d) {
767: realSpaceTestDer[d] = 0.0;
768: for (e = 0; e < dim; ++e) {
769: realSpaceTestDer[d] += invJ[e*dim+d]*basisDer[(q*numBasisFuncs+f)*dim+e];
770: }
771: }
772: for (g = 0; g < numBasisFuncs; ++g) {
773: for (d = 0; d < dim; ++d) {
774: realSpaceBasisDer[d] = 0.0;
775: for (e = 0; e < dim; ++e) {
776: realSpaceBasisDer[d] += invJ[e*dim+d]*basisDer[(q*numBasisFuncs+g)*dim+e];
777: }
778: }
779: /* Linear term: -\Delta u */
780: PetscScalar product = 0.0;
781: for (d = 0; d < dim; ++d) product += realSpaceTestDer[d]*realSpaceBasisDer[d];
782: elemMat[f*numBasisFuncs+g] += product*quadWeights[q]*detJ;
783: /* Nonlinear term: -\lambda e^{u} */
784: elemMat[f*numBasisFuncs+g] -= basis[q*numBasisFuncs+f]*basis[q*numBasisFuncs+g]*0.0*PetscExpScalar(fieldVal)*quadWeights[q]*detJ;
785: }
786: }
787: }
788: DMPlexVecRestoreClosure(dm, NULL, X, c, NULL, &x);
789: if (debug) {DMPrintCellMatrix(c, "Jacobian", numBasisFuncs, numBasisFuncs, elemMat);}
790: DMPlexMatSetClosure(dm, NULL, NULL, Jac, c, elemMat, ADD_VALUES);
791: }
792: PetscLogFlops((cEnd-cStart)*numQuadPoints*numBasisFuncs*(dim*(dim*5+4)+14));
793: PetscFree3(realSpaceTestDer,realSpaceBasisDer,elemMat);
794: PetscFree3(v0,J,invJ);
796: /* Assemble matrix, using the 2-step process:
797: MatAssemblyBegin(), MatAssemblyEnd(). */
798: MatAssemblyBegin(Jac, MAT_FINAL_ASSEMBLY);
799: MatAssemblyEnd(Jac, MAT_FINAL_ASSEMBLY);
800: /* Tell the matrix we will never add a new nonzero location to the
801: matrix. If we do, it will generate an error. */
802: MatSetOption(Jac, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
803: PetscLogEventEnd(user->jacobianEvent,0,0,0,0);
804: return(0);
805: }
809: /*
810: dm - The mesh
811: X - The local input vector
812: Jac - The output matrix
813: */
814: PetscErrorCode FormJacobianLocalElasticity(DM dm, Vec X, Mat Jac, Mat JacPre, MatStructure *flag, AppCtx *user)
815: {
816: const PetscInt debug = user->debug;
817: const PetscInt dim = user->dim;
818: const PetscInt numQuadPoints = user->q.numQuadPoints;
819: const PetscReal *quadWeights = user->q.quadWeights;
820: const PetscInt numBasisFuncs = user->q.numBasisFuncs;
821: const PetscReal *basis = user->q.basis;
822: const PetscReal *basisDer = user->q.basisDer;
823: PetscReal *v0, *J, *invJ, detJ;
824: PetscScalar *realSpaceTestDer, *realSpaceBasisDer, *elemMat;
825: PetscInt cStart, cEnd, c;
826: PetscErrorCode ierr;
829: PetscLogEventBegin(user->jacobianEvent,0,0,0,0);
830: MatZeroEntries(Jac);
831: PetscMalloc3(dim,PetscScalar,&realSpaceTestDer,dim,PetscScalar,&realSpaceBasisDer,numBasisFuncs*numBasisFuncs,PetscScalar,&elemMat);
832: PetscMalloc3(dim,PetscReal,&v0,dim*dim,PetscReal,&J,dim*dim,PetscReal,&invJ);
833: DMPlexGetHeightStratum(dm, 0, &cStart, &cEnd);
834: for (c = cStart; c < cEnd; ++c) {
835: PetscScalar *x;
836: PetscInt q;
838: PetscMemzero(elemMat, numBasisFuncs*numBasisFuncs * sizeof(PetscScalar));
839: DMPlexComputeCellGeometry(dm, c, v0, J, invJ, &detJ);
840: if (detJ <= 0.0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid determinant %g for element %d", detJ, c);
841: DMPlexVecGetClosure(dm, NULL, X, c, NULL, &x);
843: for (q = 0; q < numQuadPoints; ++q) {
844: PetscScalar fieldVal = 0.0;
845: PetscInt f, g, d, e;
847: for (f = 0; f < numBasisFuncs; ++f) fieldVal += x[f]*basis[q*numBasisFuncs+f];
849: for (f = 0; f < numBasisFuncs; ++f) {
850: for (d = 0; d < dim; ++d) {
851: realSpaceTestDer[d] = 0.0;
852: for (e = 0; e < dim; ++e) {
853: realSpaceTestDer[d] += invJ[e*dim+d]*basisDer[(q*numBasisFuncs+f)*dim+e];
854: }
855: }
856: for (g = 0; g < numBasisFuncs; ++g) {
857: for (d = 0; d < dim; ++d) {
858: realSpaceBasisDer[d] = 0.0;
859: for (e = 0; e < dim; ++e) {
860: realSpaceBasisDer[d] += invJ[e*dim+d]*basisDer[(q*numBasisFuncs+g)*dim+e];
861: }
862: }
863: /* Linear term: -\Delta u */
864: PetscScalar product = 0.0;
865: for (d = 0; d < dim; ++d) product += realSpaceTestDer[d]*realSpaceBasisDer[d];
866: elemMat[f*numBasisFuncs+g] += product*quadWeights[q]*detJ;
867: /* Nonlinear term: -\lambda e^{u} */
868: elemMat[f*numBasisFuncs+g] -= basis[q*numBasisFuncs+f]*basis[q*numBasisFuncs+g]*0.0*PetscExpScalar(fieldVal)*quadWeights[q]*detJ;
869: }
870: }
871: }
872: DMPlexVecRestoreClosure(dm, NULL, X, c, NULL, &x);
873: if (debug) {DMPrintCellMatrix(c, "Jacobian", numBasisFuncs, numBasisFuncs, elemMat);}
874: DMPlexMatSetClosure(dm, NULL, NULL, Jac, c, elemMat, ADD_VALUES);
875: }
876: PetscLogFlops((cEnd-cStart)*numQuadPoints*numBasisFuncs*(dim*(dim*5+4)+14));
877: PetscFree3(realSpaceTestDer,realSpaceBasisDer,elemMat);
878: PetscFree3(v0,J,invJ);
880: /* Assemble matrix, using the 2-step process:
881: MatAssemblyBegin(), MatAssemblyEnd(). */
882: MatAssemblyBegin(Jac, MAT_FINAL_ASSEMBLY);
883: MatAssemblyEnd(Jac, MAT_FINAL_ASSEMBLY);
884: /* Tell the matrix we will never add a new nonzero location to the
885: matrix. If we do, it will generate an error. */
886: MatSetOption(Jac, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
887: PetscLogEventEnd(user->jacobianEvent,0,0,0,0);
888: return(0);
889: }
893: /*
894: dm - The mesh
895: X - The local input vector
896: Jac - The output matrix
897: */
898: PetscErrorCode FormJacobianLocalBatch(DM dm, Vec X, Mat Jac, Mat JacPre, MatStructure *flag, AppCtx *user)
899: {
900: const PetscInt debug = user->debug;
901: const PetscInt dim = user->dim;
902: const PetscInt numQuadPoints = user->q.numQuadPoints;
903: const PetscReal *quadWeights = user->q.quadWeights;
904: const PetscInt numBasisFuncs = user->q.numBasisFuncs;
905: const PetscReal *basis = user->q.basis;
906: const PetscReal *basisDer = user->q.basisDer;
907: PetscReal *v0, *J, *invJ, detJ;
908: PetscScalar *realSpaceTestDer, *realSpaceBasisDer, *elemMat;
909: PetscInt cStart, cEnd, c;
910: PetscErrorCode ierr;
913: PetscLogEventBegin(user->jacobianBatchEvent,0,0,0,0);
914: MatZeroEntries(Jac);
915: PetscMalloc3(dim,PetscScalar,&realSpaceTestDer,dim,PetscScalar,&realSpaceBasisDer,numBasisFuncs*numBasisFuncs,PetscScalar,&elemMat);
916: PetscMalloc3(dim,PetscReal,&v0,dim*dim,PetscReal,&J,dim*dim,PetscReal,&invJ);
917: DMPlexGetHeightStratum(dm, 0, &cStart, &cEnd);
918: for (c = cStart; c < cEnd; ++c) {
919: PetscScalar *x;
920: PetscInt q;
922: PetscMemzero(elemMat, numBasisFuncs*numBasisFuncs * sizeof(PetscScalar));
923: DMPlexComputeCellGeometry(dm, c, v0, J, invJ, &detJ);
924: if (detJ <= 0.0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid determinant %g for element %d", detJ, c);
925: DMPlexVecGetClosure(dm, NULL, X, c, NULL, &x);
927: for (q = 0; q < numQuadPoints; ++q) {
928: PetscScalar fieldVal = 0.0;
929: PetscInt f, g, d, e;
931: for (f = 0; f < numBasisFuncs; ++f) fieldVal += x[f]*basis[q*numBasisFuncs+f];
933: for (f = 0; f < numBasisFuncs; ++f) {
934: for (d = 0; d < dim; ++d) {
935: realSpaceTestDer[d] = 0.0;
936: for (e = 0; e < dim; ++e) {
937: realSpaceTestDer[d] += invJ[e*dim+d]*basisDer[(q*numBasisFuncs+f)*dim+e];
938: }
939: }
940: for (g = 0; g < numBasisFuncs; ++g) {
941: for (d = 0; d < dim; ++d) {
942: realSpaceBasisDer[d] = 0.0;
943: for (e = 0; e < dim; ++e) {
944: realSpaceBasisDer[d] += invJ[e*dim+d]*basisDer[(q*numBasisFuncs+g)*dim+e];
945: }
946: }
947: /* Linear term: -\Delta u */
948: PetscScalar product = 0.0;
949: for (d = 0; d < dim; ++d) product += realSpaceTestDer[d]*realSpaceBasisDer[d];
950: elemMat[f*numBasisFuncs+g] += product*quadWeights[q]*detJ;
951: /* Nonlinear term: -\lambda e^{u} */
952: elemMat[f*numBasisFuncs+g] -= basis[q*numBasisFuncs+f]*basis[q*numBasisFuncs+g]*0.0*PetscExpScalar(fieldVal)*quadWeights[q]*detJ;
953: }
954: }
955: }
956: DMPlexVecRestoreClosure(dm, NULL, X, c, NULL, &x);
957: if (debug) {DMPrintCellMatrix(c, "Jacobian", numBasisFuncs, numBasisFuncs, elemMat);}
958: DMPlexMatSetClosure(dm, NULL, NULL, Jac, c, elemMat, ADD_VALUES);
959: }
960: PetscLogFlops((cEnd-cStart)*numQuadPoints*numBasisFuncs*(dim*(dim*5+4)+14));
961: PetscFree3(realSpaceTestDer,realSpaceBasisDer,elemMat);
962: PetscFree3(v0,J,invJ);
964: /* Assemble matrix, using the 2-step process:
965: MatAssemblyBegin(), MatAssemblyEnd(). */
966: MatAssemblyBegin(Jac, MAT_FINAL_ASSEMBLY);
967: MatAssemblyEnd(Jac, MAT_FINAL_ASSEMBLY);
968: /* Tell the matrix we will never add a new nonzero location to the
969: matrix. If we do, it will generate an error. */
970: MatSetOption(Jac, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
971: PetscLogEventEnd(user->jacobianBatchEvent,0,0,0,0);
972: return(0);
973: }
977: int main(int argc, char **argv)
978: {
979: DM dm;
980: SNES snes;
981: AppCtx user;
984: PetscInitialize(&argc, &argv, NULL, help);
985: #if !defined(PETSC_HAVE_CUDA)
986: SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_SUP, "This example requires CUDA support.");
987: #endif
988: ProcessOptions(PETSC_COMM_WORLD, &user);
989: CreateMesh(PETSC_COMM_WORLD, &user, &dm);
990: SetupQuadrature(&user);
991: SetupSection(dm, &user);
993: SNESCreate(PETSC_COMM_WORLD, &snes);
994: SNESSetDM(snes, dm);
995: if (user.computeFunction) {
996: Vec X, F;
998: DMGetGlobalVector(dm, &X);
999: DMGetGlobalVector(dm, &F);
1000: if (user.batch) {
1001: switch (user.op) {
1002: case LAPLACIAN:
1003: FormInitialGuess(X, quadratic_2d, INSERT_VALUES, &user);break;
1004: case ELASTICITY:
1005: FormInitialGuess(X, quadratic_2d_elas, INSERT_VALUES, &user);break;
1006: default:
1007: SETERRQ1(PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Invalid PDE operator %d", user.op);
1008: }
1009: DMSNESSetFunctionLocal(dm, (PetscErrorCode (*)(DM, Vec, Vec, void*))FormFunctionLocalBatch, &user);
1010: } else {
1011: switch (user.op) {
1012: case LAPLACIAN:
1013: FormInitialGuess(X, quadratic_2d, INSERT_VALUES, &user);
1014: DMSNESSetFunctionLocal(dm, (PetscErrorCode (*)(DM, Vec, Vec, void*))FormFunctionLocalLaplacian, &user);break;
1015: case ELASTICITY:
1016: FormInitialGuess(X, quadratic_2d_elas, INSERT_VALUES, &user);
1017: DMSNESSetFunctionLocal(dm, (PetscErrorCode (*)(DM, Vec, Vec, void*))FormFunctionLocalElasticity, &user);break;
1018: default:
1019: SETERRQ1(PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Invalid PDE operator %d", user.op);
1020: }
1021: }
1022: SNESComputeFunction(snes, X, F);
1023: DMRestoreGlobalVector(dm, &X);
1024: DMRestoreGlobalVector(dm, &F);
1025: }
1026: if (user.computeJacobian) {
1027: Vec X;
1028: Mat J;
1029: MatStructure flag;
1031: DMGetGlobalVector(dm, &X);
1032: DMCreateMatrix(dm, MATAIJ, &J);
1033: if (user.batch) {
1034: DMSNESSetJacobianLocal(dm, (PetscErrorCode (*)(DM, Vec, Mat, Mat, MatStructure*, void*))FormJacobianLocalBatch, &user);
1035: } else {
1036: switch (user.op) {
1037: case LAPLACIAN:
1038: DMSNESSetJacobianLocal(dm, (PetscErrorCode (*)(DM, Vec, Mat, Mat, MatStructure*, void*))FormJacobianLocalLaplacian, &user);break;
1039: case ELASTICITY:
1040: DMSNESSetJacobianLocal(dm, (PetscErrorCode (*)(DM, Vec, Mat, Mat, MatStructure*, void*))FormJacobianLocalElasticity, &user);break;
1041: default:
1042: SETERRQ1(PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Invalid PDE operator %d", user.op);
1043: }
1044: }
1045: SNESComputeJacobian(snes, X, &J, &J, &flag);
1046: MatDestroy(&J);
1047: DMRestoreGlobalVector(dm, &X);
1048: }
1049: SNESDestroy(&snes);
1050: DMDestroy(&dm);
1051: PetscFinalize();
1052: return 0;
1053: }