Actual source code: ex52.c
petsc-3.3-p7 2013-05-11
1: static const char help[] = "Testbed for FEM operations on the GPU.\n\n";
3: #if 0
4: - Automate testing of different:
5: - elements, batches, blocks, operators
6: - Process log_summary with Python (use existing tools)
7: - Put results in PyTables
9: PYLITH:
10: - Redo setup/cleanup in PyLith
11: - Test with examples/3d/hex8/step01.cfg (Brad is making a flag)
12: - Code up Jacobian (looks like original code)
13: - Consider taking in coordinates rather than J^{-T}/|J|
15: MUST CHECK WITH:
16: - 3D elements
17: - Different quadrature (1pt and 3pt)
18: - Different basis (linear and quadratic)
19: #endif
21: #include<petscdmcomplex.h>
22: #include<petscsnes.h>
24: typedef enum {LAPLACIAN, ELASTICITY} OpType;
26: typedef struct {
27: DM dm; /* REQUIRED in order to use SNES evaluation functions */
28: PetscInt debug; /* The debugging level */
29: PetscMPIInt rank; /* The process rank */
30: PetscMPIInt numProcs; /* The number of processes */
31: PetscInt dim; /* The topological mesh dimension */
32: PetscBool interpolate; /* Generate intermediate mesh elements */
33: PetscReal refinementLimit; /* The largest allowable cell volume */
34: char partitioner[2048]; /* The graph partitioner */
35: PetscBool computeFunction; /* The flag for computing a residual */
36: PetscBool computeJacobian; /* The flag for computing a Jacobian */
37: PetscBool batch; /* The flag for batch assembly */
38: PetscBool gpu; /* The flag for GPU integration */
39: OpType op; /* The type of PDE operator (should use FFC/Ignition here) */
40: PetscBool showResidual, showJacobian;
41: PetscLogEvent createMeshEvent, residualEvent, residualBatchEvent, jacobianEvent, jacobianBatchEvent, integrateBatchCPUEvent, integrateBatchGPUEvent, integrateGPUOnlyEvent;
42: /* GPU partitioning */
43: PetscInt numBatches; /* The number of cell batches per kernel */
44: PetscInt numBlocks; /* The number of concurrent blocks per kernel */
45: /* Element quadrature */
46: PetscQuadrature q;
47: } AppCtx;
49: /*------------------------------------------------------------------------------
50: This code can be generated using 'bin/pythonscripts/PetscGenerateFEMQuadrature.py dim 1 numComp 1 laplacian src/snes/examples/tutorials/ex52.h'
51: -----------------------------------------------------------------------------*/
52: #include "ex52.h"
54: void quadratic_2d(const PetscReal x[], PetscScalar u[]) {
55: u[0] = x[0]*x[0] + x[1]*x[1];
56: };
58: void quadratic_2d_elas(const PetscReal x[], PetscScalar u[]) {
59: u[0] = x[0]*x[0] + x[1]*x[1];
60: u[1] = x[0]*x[0] + x[1]*x[1];
61: };
65: PetscErrorCode ProcessOptions(MPI_Comm comm, AppCtx *options) {
66: const char *opTypes[2] = {"laplacian", "elasticity"};
67: PetscInt op;
71: options->debug = 0;
72: options->dim = 2;
73: options->interpolate = PETSC_FALSE;
74: options->refinementLimit = 0.0;
75: options->computeFunction = PETSC_FALSE;
76: options->computeJacobian = PETSC_FALSE;
77: options->batch = PETSC_FALSE;
78: options->gpu = PETSC_FALSE;
79: options->numBatches = 1;
80: options->numBlocks = 1;
81: options->op = LAPLACIAN;
82: options->showResidual = PETSC_TRUE;
83: options->showJacobian = PETSC_TRUE;
85: MPI_Comm_size(comm, &options->numProcs);
86: MPI_Comm_rank(comm, &options->rank);
87: PetscOptionsBegin(comm, "", "Bratu Problem Options", "DMCOMPLEX");
88: PetscOptionsInt("-debug", "The debugging level", "ex52.c", options->debug, &options->debug, PETSC_NULL);
89: PetscOptionsInt("-dim", "The topological mesh dimension", "ex52.c", options->dim, &options->dim, PETSC_NULL);
90: PetscOptionsBool("-interpolate", "Generate intermediate mesh elements", "ex52.c", options->interpolate, &options->interpolate, PETSC_NULL);
91: PetscOptionsReal("-refinement_limit", "The largest allowable cell volume", "ex52.c", options->refinementLimit, &options->refinementLimit, PETSC_NULL);
92: PetscStrcpy(options->partitioner, "chaco");
93: PetscOptionsString("-partitioner", "The graph partitioner", "ex52.c", options->partitioner, options->partitioner, 2048, PETSC_NULL);
94: PetscOptionsBool("-compute_function", "Compute the residual", "ex52.c", options->computeFunction, &options->computeFunction, PETSC_NULL);
95: PetscOptionsBool("-compute_jacobian", "Compute the Jacobian", "ex52.c", options->computeJacobian, &options->computeJacobian, PETSC_NULL);
96: PetscOptionsBool("-batch", "Use the batch assembly method", "ex52.c", options->batch, &options->batch, PETSC_NULL);
97: PetscOptionsBool("-gpu", "Use the GPU for integration method", "ex52.c", options->gpu, &options->gpu, PETSC_NULL);
98: PetscOptionsInt("-gpu_batches", "The number of cell batches per kernel", "ex52.c", options->numBatches, &options->numBatches, PETSC_NULL);
99: PetscOptionsInt("-gpu_blocks", "The number of concurrent blocks per kernel", "ex52.c", options->numBlocks, &options->numBlocks, PETSC_NULL);
100: op = options->op;
101: PetscOptionsEList("-op_type","Type of PDE operator","ex52.c",opTypes,2,opTypes[options->op],&op,PETSC_NULL);
102: options->op = (OpType) op;
103: PetscOptionsBool("-show_residual", "Output the residual for verification", "ex52.c", options->showResidual, &options->showResidual, PETSC_NULL);
104: PetscOptionsBool("-show_jacobian", "Output the Jacobian for verification", "ex52.c", options->showJacobian, &options->showJacobian, PETSC_NULL);
105: PetscOptionsEnd();
107: PetscLogEventRegister("CreateMesh", DM_CLASSID, &options->createMeshEvent);
108: PetscLogEventRegister("Residual", SNES_CLASSID, &options->residualEvent);
109: PetscLogEventRegister("ResidualBatch", SNES_CLASSID, &options->residualBatchEvent);
110: PetscLogEventRegister("Jacobian", SNES_CLASSID, &options->jacobianEvent);
111: PetscLogEventRegister("JacobianBatch", SNES_CLASSID, &options->jacobianBatchEvent);
112: PetscLogEventRegister("IntegBatchCPU", SNES_CLASSID, &options->integrateBatchCPUEvent);
113: PetscLogEventRegister("IntegBatchGPU", SNES_CLASSID, &options->integrateBatchGPUEvent);
114: PetscLogEventRegister("IntegGPUOnly", SNES_CLASSID, &options->integrateGPUOnlyEvent);
115: return(0);
116: };
120: PetscErrorCode CreateMesh(MPI_Comm comm, AppCtx *user, DM *dm)
121: {
122: PetscInt dim = user->dim;
123: PetscBool interpolate = user->interpolate;
124: PetscReal refinementLimit = user->refinementLimit;
125: const char *partitioner = user->partitioner;
129: PetscLogEventBegin(user->createMeshEvent,0,0,0,0);
130: DMComplexCreateBoxMesh(comm, dim, interpolate, dm);
131: {
132: DM refinedMesh = PETSC_NULL;
133: DM distributedMesh = PETSC_NULL;
135: /* Refine mesh using a volume constraint */
136: DMComplexSetRefinementLimit(*dm, refinementLimit);
137: DMRefine(*dm, comm, &refinedMesh);
138: if (refinedMesh) {
139: DMDestroy(dm);
140: *dm = refinedMesh;
141: }
142: /* Distribute mesh over processes */
143: DMComplexDistribute(*dm, partitioner, &distributedMesh);
144: if (distributedMesh) {
145: DMDestroy(dm);
146: *dm = distributedMesh;
147: }
148: }
149: DMSetFromOptions(*dm);
150: PetscLogEventEnd(user->createMeshEvent,0,0,0,0);
151: user->dm = *dm;
152: return(0);
153: }
157: PetscErrorCode SetupQuadrature(AppCtx *user) {
159: switch(user->dim) {
160: case 2:
161: case 3:
162: user->q.numQuadPoints = NUM_QUADRATURE_POINTS_0;
163: user->q.quadPoints = points_0;
164: user->q.quadWeights = weights_0;
165: user->q.numBasisFuncs = NUM_BASIS_FUNCTIONS_0;
166: user->q.numComponents = NUM_BASIS_COMPONENTS_0;
167: user->q.basis = Basis_0;
168: user->q.basisDer = BasisDerivatives_0;
169: break;
170: default:
171: SETERRQ1(PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Invalid dimension %d", user->dim);
172: }
173: return(0);
174: }
178: PetscErrorCode SetupSection(DM dm, AppCtx *user) {
179: PetscSection section;
180: /* These can be generated using config/PETSc/FEM.py */
181: PetscInt dim = user->dim;
182: PetscInt numBC = 0;
183: PetscInt numComp[1] = {NUM_BASIS_COMPONENTS_0};
187: if (dim != SPATIAL_DIM_0) SETERRQ2(((PetscObject) dm)->comm, PETSC_ERR_ARG_SIZ, "Spatial dimension %d should be %d", dim, SPATIAL_DIM_0);
188: DMComplexCreateSection(dm, dim, 1, numComp, numDof_0, numBC, PETSC_NULL, PETSC_NULL, §ion);
189: DMComplexSetDefaultSection(dm, section);
190: return(0);
191: }
195: /*
196: FormInitialGuess - Forms initial approximation.
198: Input Parameters:
199: + user - user-defined application context
200: - guessFunc - The coordinate function to use for the guess
202: Output Parameter:
203: . X - vector
204: */
205: PetscErrorCode FormInitialGuess(Vec X, void (*guessFunc)(const PetscReal [], PetscScalar []), InsertMode mode, AppCtx *user)
206: {
207: Vec localX, coordinates;
208: PetscSection section, cSection;
209: PetscInt vStart, vEnd, v;
213: DMGetLocalVector(user->dm, &localX);
214: DMComplexGetDepthStratum(user->dm, 0, &vStart, &vEnd);
215: DMComplexGetDefaultSection(user->dm, §ion);
216: DMComplexGetCoordinateSection(user->dm, &cSection);
217: DMComplexGetCoordinateVec(user->dm, &coordinates);
218: for(v = vStart; v < vEnd; ++v) {
219: PetscScalar values[3];
220: PetscScalar *coords;
222: VecGetValuesSection(coordinates, cSection, v, &coords);
223: (*guessFunc)(coords, values);
224: VecSetValuesSection(localX, section, v, values, mode);
225: }
227: DMLocalToGlobalBegin(user->dm, localX, INSERT_VALUES, X);
228: DMLocalToGlobalEnd(user->dm, localX, INSERT_VALUES, X);
229: DMRestoreLocalVector(user->dm, &localX);
230: return(0);
231: }
235: /*
236: dm - The mesh
237: X - Local intput vector
238: F - Local output vector
239: */
240: PetscErrorCode FormFunctionLocalLaplacian(DM dm, Vec X, Vec F, AppCtx *user)
241: {
242: //PetscScalar (*rhsFunc)(const PetscReal []) = user->rhsFunc;
243: const PetscInt debug = user->debug;
244: const PetscInt dim = user->dim;
245: const PetscInt numQuadPoints = user->q.numQuadPoints;
246: const PetscReal *quadPoints = user->q.quadPoints;
247: const PetscReal *quadWeights = user->q.quadWeights;
248: const PetscInt numBasisFuncs = user->q.numBasisFuncs;
249: const PetscReal *basis = user->q.basis;
250: const PetscReal *basisDer = user->q.basisDer;
251: PetscReal *coords, *v0, *J, *invJ, detJ;
252: PetscScalar *realSpaceDer, *fieldGrad, *elemVec;
253: PetscInt cStart, cEnd, c, p;
254: PetscErrorCode ierr;
257: PetscLogEventBegin(user->residualEvent,0,0,0,0);
258: VecSet(F, 0.0);
259: PetscMalloc3(dim,PetscScalar,&realSpaceDer,dim,PetscScalar,&fieldGrad,numBasisFuncs,PetscScalar,&elemVec);
260: PetscMalloc4(dim,PetscReal,&coords,dim,PetscReal,&v0,dim*dim,PetscReal,&J,dim*dim,PetscReal,&invJ);
261: DMComplexGetHeightStratum(dm, 0, &cStart, &cEnd);
262: for(c = cStart; c < cEnd; ++c) {
263: const PetscScalar *x;
264: PetscInt q, f, d, e;
266: PetscMemzero(elemVec, numBasisFuncs * sizeof(PetscScalar));
267: DMComplexComputeCellGeometry(dm, c, v0, J, invJ, &detJ);
268: if (detJ <= 0.0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid determinant %g for element %d", detJ, c);
269: DMComplexVecGetClosure(dm, PETSC_NULL, X, c, &x);
270: if (debug > 1) {DMPrintCellVector(c, "Solution", numBasisFuncs, x);}
272: for(q = 0; q < numQuadPoints; ++q) {
273: PetscScalar fieldVal = 0.0;
275: if (debug > 1) {PetscPrintf(PETSC_COMM_SELF, " quad point %d\n", q);}
276: for(d = 0; d < dim; ++d) {
277: fieldGrad[d] = 0.0;
278: coords[d] = v0[d];
279: for(e = 0; e < dim; ++e) {
280: coords[d] += J[d*dim+e]*(quadPoints[q*dim+e] + 1.0);
281: }
282: if (debug > 1) {PetscPrintf(PETSC_COMM_SELF, " coords[%d] %g\n", d, coords[d]);}
283: }
284: for(f = 0; f < numBasisFuncs; ++f) {
285: fieldVal += x[f]*basis[q*numBasisFuncs+f];
287: for(d = 0; d < dim; ++d) {
288: realSpaceDer[d] = 0.0;
289: for(e = 0; e < dim; ++e) {
290: realSpaceDer[d] += invJ[e*dim+d]*basisDer[(q*numBasisFuncs+f)*dim+e];
291: }
292: fieldGrad[d] += realSpaceDer[d]*x[f];
293: }
294: }
295: if (debug > 1) {
296: for(d = 0; d < dim; ++d) {
297: PetscPrintf(PETSC_COMM_SELF, " fieldGrad[%d] %g\n", d, fieldGrad[d]);
298: }
299: }
300: const PetscScalar funcVal = 0.0; //(*rhsFunc)(coords);
301: for(f = 0; f < numBasisFuncs; ++f) {
302: /* Constant term: -f(x) */
303: elemVec[f] -= basis[q*numBasisFuncs+f]*funcVal*quadWeights[q]*detJ;
304: /* Linear term: -\Delta u */
305: PetscScalar product = 0.0;
306: for(d = 0; d < dim; ++d) {
307: realSpaceDer[d] = 0.0;
308: for(e = 0; e < dim; ++e) {
309: realSpaceDer[d] += invJ[e*dim+d]*basisDer[(q*numBasisFuncs+f)*dim+e];
310: }
311: product += realSpaceDer[d]*fieldGrad[d];
312: }
313: elemVec[f] += product*quadWeights[q]*detJ;
314: /* Nonlinear term: -\lambda e^{u} */
315: elemVec[f] -= basis[q*numBasisFuncs+f]*0.0*PetscExpScalar(fieldVal)*quadWeights[q]*detJ;
316: }
317: }
318: if (debug) {DMPrintCellVector(c, "Residual", numBasisFuncs, elemVec);}
319: DMComplexVecSetClosure(dm, PETSC_NULL, F, c, elemVec, ADD_VALUES);
320: }
321: PetscLogFlops((cEnd-cStart)*numQuadPoints*numBasisFuncs*(dim*(dim*5+4)+14));
322: PetscFree3(realSpaceDer,fieldGrad,elemVec);
323: PetscFree4(coords,v0,J,invJ);
325: PetscPrintf(PETSC_COMM_WORLD, "Residual:\n");
326: for(p = 0; p < user->numProcs; ++p) {
327: if (p == user->rank) {VecView(F, PETSC_VIEWER_STDOUT_SELF);}
328: PetscBarrier((PetscObject) dm);
329: }
330: PetscLogEventEnd(user->residualEvent,0,0,0,0);
331: return(0);
332: }
336: /*
337: dm - The mesh
338: X - Local intput vector
339: F - Local output vector
340: */
341: PetscErrorCode FormFunctionLocalElasticity(DM dm, Vec X, Vec F, AppCtx *user)
342: {
343: //PetscScalar (*rhsFunc)(const PetscReal []) = user->rhsFunc;
344: const PetscInt debug = user->debug;
345: const PetscInt dim = user->dim;
346: const PetscInt numQuadPoints = user->q.numQuadPoints;
347: const PetscReal *quadPoints = user->q.quadPoints;
348: const PetscReal *quadWeights = user->q.quadWeights;
349: const PetscInt numBasisFuncs = user->q.numBasisFuncs;
350: const PetscInt numBasisComps = user->q.numComponents;
351: const PetscReal *basis = user->q.basis;
352: const PetscReal *basisDer = user->q.basisDer;
353: PetscReal *coords, *v0, *J, *invJ, detJ;
354: PetscScalar *realSpaceDer, *fieldGrad, *elemVec;
355: PetscInt cStart, cEnd, c, p;
356: PetscErrorCode ierr;
359: PetscLogEventBegin(user->residualEvent,0,0,0,0);
360: VecSet(F, 0.0);
361: PetscMalloc3(dim,PetscScalar,&realSpaceDer,dim*numBasisComps,PetscScalar,&fieldGrad,numBasisFuncs*numBasisComps,PetscScalar,&elemVec);
362: PetscMalloc4(dim,PetscReal,&coords,dim,PetscReal,&v0,dim*dim,PetscReal,&J,dim*dim,PetscReal,&invJ);
363: DMComplexGetHeightStratum(dm, 0, &cStart, &cEnd);
364: for(c = cStart; c < cEnd; ++c) {
365: const PetscScalar *x;
366: PetscInt q, d, e, f, comp;
368: PetscMemzero(elemVec, numBasisFuncs*numBasisComps * sizeof(PetscScalar));
369: DMComplexComputeCellGeometry(dm, c, v0, J, invJ, &detJ);
370: if (detJ <= 0.0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid determinant %g for element %d", detJ, c);
371: DMComplexVecGetClosure(dm, PETSC_NULL, X, c, &x);
372: if (debug > 1) {DMPrintCellVector(c, "Solution", numBasisFuncs*numBasisComps, x);}
374: for(q = 0; q < numQuadPoints; ++q) {
375: PetscScalar fieldVal[3] = {0.0,0.0,0.0};
377: if (debug > 1) {PetscPrintf(PETSC_COMM_SELF, " quad point %d\n", q);}
378: for(d = 0; d < dim; ++d) {
379: for(comp = 0; comp < numBasisComps; ++comp) {
380: fieldGrad[comp*dim+d] = 0.0;
381: }
382: coords[d] = v0[d];
383: for(e = 0; e < dim; ++e) {
384: coords[d] += J[d*dim+e]*(quadPoints[q*dim+e] + 1.0);
385: }
386: if (debug > 1) {PetscPrintf(PETSC_COMM_SELF, " coords[%d] %g\n", d, coords[d]);}
387: }
388: for(f = 0; f < numBasisFuncs; ++f) {
389: for(comp = 0; comp < numBasisComps; ++comp) {
390: const PetscInt cidx = f*numBasisComps+comp;
392: fieldVal[comp] += x[cidx]*basis[q*numBasisFuncs*numBasisComps+cidx];
394: for(d = 0; d < dim; ++d) {
395: realSpaceDer[d] = 0.0;
396: for(e = 0; e < dim; ++e) {
397: realSpaceDer[d] += invJ[e*dim+d]*basisDer[(q*numBasisFuncs*numBasisComps+cidx)*dim+e];
398: }
399: fieldGrad[comp*dim+d] += realSpaceDer[d]*x[cidx];
400: }
401: }
402: }
403: if (debug > 1) {
404: for(comp = 0; comp < numBasisComps; ++comp) {
405: for(d = 0; d < dim; ++d) {
406: PetscPrintf(PETSC_COMM_SELF, " field %d gradient[%d] %g\n", comp, d, fieldGrad[comp*dim+d]);
407: }
408: }
409: }
410: const PetscScalar funcVal[3] = {0.0, 0.0, 0.0}; //(*rhsFunc)(coords);
411: for(f = 0; f < numBasisFuncs; ++f) {
412: for(comp = 0; comp < numBasisComps; ++comp) {
413: const PetscInt cidx = f*numBasisComps+comp;
415: /* Constant term: -f(x) */
416: elemVec[cidx] -= basis[q*numBasisFuncs*numBasisComps+cidx]*funcVal[comp]*quadWeights[q]*detJ;
417: /* Linear term: -\Delta u */
418: PetscScalar product = 0.0;
419: for(d = 0; d < dim; ++d) {
420: realSpaceDer[d] = 0.0;
421: for(e = 0; e < dim; ++e) {
422: realSpaceDer[d] += invJ[e*dim+d]*basisDer[(q*numBasisFuncs*numBasisComps+cidx)*dim+e];
423: }
424: product += realSpaceDer[d]*0.5*(fieldGrad[comp*dim+d] + fieldGrad[d*dim+comp]);
425: }
426: elemVec[cidx] += product*quadWeights[q]*detJ;
427: /* Nonlinear term: -\lambda e^{u} */
428: elemVec[cidx] -= basis[q*numBasisFuncs*numBasisComps+cidx]*0.0*PetscExpScalar(fieldVal[comp])*quadWeights[q]*detJ;
429: }
430: }
431: }
432: if (debug) {DMPrintCellVector(c, "Residual", numBasisFuncs*numBasisComps, elemVec);}
433: DMComplexVecSetClosure(dm, PETSC_NULL, F, c, elemVec, ADD_VALUES);
434: }
435: PetscLogFlops((cEnd-cStart)*numQuadPoints*numBasisFuncs*(dim*(dim*5+4)+14));
436: PetscFree3(realSpaceDer,fieldGrad,elemVec);
437: PetscFree4(coords,v0,J,invJ);
439: PetscPrintf(PETSC_COMM_WORLD, "Residual:\n");
440: for(p = 0; p < user->numProcs; ++p) {
441: if (p == user->rank) {VecView(F, PETSC_VIEWER_STDOUT_SELF);}
442: PetscBarrier((PetscObject) dm);
443: }
444: PetscLogEventEnd(user->residualEvent,0,0,0,0);
445: return(0);
446: }
448: EXTERN_C_BEGIN
449: PetscErrorCode IntegrateElementBatchGPU(PetscInt Ne, PetscInt Nbatch, PetscInt Nbc, PetscInt Nbl, const PetscScalar coefficients[], const PetscReal jacobianInverses[], const PetscReal jacobianDeterminants[], PetscScalar elemVec[], PetscLogEvent event, PetscInt debug);
450: EXTERN_C_END
452: void f1_laplacian(PetscScalar u, const PetscScalar gradU[], PetscScalar f1[]) {
453: const PetscInt dim = SPATIAL_DIM_0;
454: PetscInt d;
456: for(d = 0; d < dim; ++d) {
457: f1[d] = gradU[d];
458: }
459: return;
460: }
464: PetscErrorCode IntegrateLaplacianBatchCPU(PetscInt Ne, PetscInt Nb, const PetscScalar coefficients[], const PetscReal jacobianInverses[], const PetscReal jacobianDeterminants[], PetscInt Nq, const PetscReal quadPoints[], const PetscReal quadWeights[], const PetscReal basisTabulation[], const PetscReal basisDerTabulation[], PetscScalar elemVec[], AppCtx *user) {
465: const PetscInt debug = user->debug;
466: const PetscInt dim = SPATIAL_DIM_0;
467: PetscInt e;
471: PetscLogEventBegin(user->integrateBatchCPUEvent,0,0,0,0);
472: for(e = 0; e < Ne; ++e) {
473: const PetscReal detJ = jacobianDeterminants[e];
474: const PetscReal *invJ = &jacobianInverses[e*dim*dim];
475: PetscScalar f1[SPATIAL_DIM_0/*1*/]; // Nq = 1
476: PetscInt q, b, d, f;
478: if (debug > 1) {
479: PetscPrintf(PETSC_COMM_SELF, " detJ: %g\n", detJ);
480: PetscPrintf(PETSC_COMM_SELF, " invJ: %g %g %g %g\n", invJ[0], invJ[1], invJ[2], invJ[3]);
481: }
482: for(q = 0; q < Nq; ++q) {
483: if (debug) {PetscPrintf(PETSC_COMM_SELF, " quad point %d\n", q);}
484: PetscScalar u = 0.0;
485: PetscScalar gradU[SPATIAL_DIM_0];
487: for(d = 0; d < dim; ++d) {gradU[d] = 0.0;}
488: for(b = 0; b < Nb; ++b) {
489: PetscScalar realSpaceDer[SPATIAL_DIM_0];
491: u += coefficients[e*Nb+b]*basisTabulation[q*Nb+b];
492: for(d = 0; d < dim; ++d) {
493: realSpaceDer[d] = 0.0;
494: for(f = 0; f < dim; ++f) {
495: realSpaceDer[d] += invJ[f*dim+d]*basisDerTabulation[(q*Nb+b)*dim+f];
496: }
497: gradU[d] += coefficients[e*Nb+b]*realSpaceDer[d];
498: }
499: }
500: f1_laplacian(u, gradU, &f1[q*dim]);
501: f1[q*dim+0] *= detJ*quadWeights[q];
502: f1[q*dim+1] *= detJ*quadWeights[q];
503: if (debug > 1) {
504: PetscPrintf(PETSC_COMM_SELF, " f1[%d]: %g\n", 0, f1[q*dim+0]);
505: PetscPrintf(PETSC_COMM_SELF, " f1[%d]: %g\n", 1, f1[q*dim+1]);
506: }
507: }
508: for(b = 0; b < Nb; ++b) {
509: elemVec[e*Nb+b] = 0.0;
510: for(q = 0; q < Nq; ++q) {
511: PetscScalar realSpaceDer[SPATIAL_DIM_0];
513: for(d = 0; d < dim; ++d) {
514: realSpaceDer[d] = 0.0;
515: for(f = 0; f < dim; ++f) {
516: realSpaceDer[d] += invJ[f*dim+d]*basisDerTabulation[(q*Nb+b)*dim+f];
517: }
518: elemVec[e*Nb+b] += realSpaceDer[d]*f1[q*dim+d];
519: }
520: }
521: }
522: }
523: PetscLogFlops((((2+(2+2*dim)*dim)*Nb+2*dim)*Nq + (2+2*dim)*dim*Nq*Nb)*Ne);
524: PetscLogEventEnd(user->integrateBatchCPUEvent,0,0,0,0);
525: return(0);
526: };
528: void f1_elasticity(PetscScalar u[], const PetscScalar gradU[], PetscScalar f1[]) {
529: const PetscInt dim = SPATIAL_DIM_0;
530: const PetscInt Ncomp = dim;
531: PetscInt comp, d;
533: for(comp = 0; comp < Ncomp; ++comp) {
534: for(d = 0; d < dim; ++d) {
535: f1[comp*dim+d] = 0.5*(gradU[comp*dim+d] + gradU[d*dim+comp]);
536: }
537: }
538: return;
539: }
543: PetscErrorCode IntegrateElasticityBatchCPU(PetscInt Ne, PetscInt Nb, PetscInt Ncomp, const PetscScalar coefficients[], const PetscReal jacobianInverses[], const PetscReal jacobianDeterminants[], PetscInt Nq, const PetscReal quadPoints[], const PetscReal quadWeights[], const PetscReal basisTabulation[], const PetscReal basisDerTabulation[], PetscScalar elemVec[], AppCtx *user) {
544: const PetscInt debug = user->debug;
545: const PetscInt dim = SPATIAL_DIM_0;
546: PetscInt e;
550: PetscLogEventBegin(user->integrateBatchCPUEvent,0,0,0,0);
551: for(e = 0; e < Ne; ++e) {
552: const PetscReal detJ = jacobianDeterminants[e];
553: const PetscReal *invJ = &jacobianInverses[e*dim*dim];
554: PetscScalar f1[SPATIAL_DIM_0*SPATIAL_DIM_0/*1*/]; // Nq = 1
555: PetscInt q, b, comp, i;
557: if (debug > 1) {
558: PetscPrintf(PETSC_COMM_SELF, " detJ: %g\n", detJ);
559: PetscPrintf(PETSC_COMM_SELF, " invJ:");
560: for(i = 0; i < dim*dim; ++i) {
561: PetscPrintf(PETSC_COMM_SELF, " %g", invJ[i]);
562: }
563: PetscPrintf(PETSC_COMM_SELF, "\n");
564: }
565: for(q = 0; q < Nq; ++q) {
566: if (debug) {PetscPrintf(PETSC_COMM_SELF, " quad point %d\n", q);}
567: PetscScalar u[SPATIAL_DIM_0];
568: PetscScalar gradU[SPATIAL_DIM_0*SPATIAL_DIM_0];
570: for(i = 0; i < dim; ++i) {u[i] = 0.0;}
571: for(i = 0; i < dim*dim; ++i) {gradU[i] = 0.0;}
572: for(b = 0; b < Nb; ++b) {
573: for(comp = 0; comp < Ncomp; ++comp) {
574: const PetscInt cidx = b*Ncomp+comp;
575: PetscScalar realSpaceDer[SPATIAL_DIM_0];
576: PetscInt d, f;
578: u[comp] += coefficients[e*Nb*Ncomp+cidx]*basisTabulation[q*Nb*Ncomp+cidx];
579: for(d = 0; d < dim; ++d) {
580: realSpaceDer[d] = 0.0;
581: for(f = 0; f < dim; ++f) {
582: realSpaceDer[d] += invJ[f*dim+d]*basisDerTabulation[(q*Nb*Ncomp+cidx)*dim+f];
583: }
584: gradU[comp*dim+d] += coefficients[e*Nb*Ncomp+cidx]*realSpaceDer[d];
585: }
586: }
587: }
588: f1_elasticity(u, gradU, &f1[q*Ncomp*dim]);
589: for(i = 0; i < Ncomp*dim; ++i) {
590: f1[q*Ncomp*dim+i] *= detJ*quadWeights[q];
591: }
592: if (debug > 1) {
593: PetscInt c, d;
594: for(c = 0; c < Ncomp; ++c) {
595: for(d = 0; d < dim; ++d) {
596: PetscPrintf(PETSC_COMM_SELF, " gradU[%d]_%c: %g f1[%d]_%c: %g\n", c, 'x'+d, gradU[c*dim+d], c, 'x'+d, f1[(q*Ncomp + c)*dim+d]);
597: }
598: }
599: }
600: }
601: for(b = 0; b < Nb; ++b) {
602: for(comp = 0; comp < Ncomp; ++comp) {
603: const PetscInt cidx = b*Ncomp+comp;
604: PetscInt q, d, f;
606: elemVec[e*Nb*Ncomp+cidx] = 0.0;
607: for(q = 0; q < Nq; ++q) {
608: PetscScalar realSpaceDer[SPATIAL_DIM_0];
610: for(d = 0; d < dim; ++d) {
611: realSpaceDer[d] = 0.0;
612: for(f = 0; f < dim; ++f) {
613: realSpaceDer[d] += invJ[f*dim+d]*basisDerTabulation[(q*Nb*Ncomp+cidx)*dim+f];
614: }
615: elemVec[e*Nb*Ncomp+cidx] += realSpaceDer[d]*f1[(q*Ncomp+comp)*dim+d];
616: }
617: }
618: }
619: }
620: }
621: PetscLogFlops((((2+(2+2*dim)*dim)*Ncomp*Nb+(2+2)*dim*Ncomp)*Nq + (2+2*dim)*dim*Nq*Ncomp*Nb)*Ne);
622: PetscLogEventEnd(user->integrateBatchCPUEvent,0,0,0,0);
623: return(0);
624: };
628: /*
629: dm - The mesh
630: X - Local intput vector
631: F - Local output vector
632: */
633: PetscErrorCode FormFunctionLocalBatch(DM dm, Vec X, Vec F, AppCtx *user)
634: {
635: //PetscScalar (*rhsFunc)(const PetscReal []) = user->rhsFunc;
636: const PetscInt debug = user->debug;
637: const PetscInt dim = user->dim;
638: const PetscInt numQuadPoints = user->q.numQuadPoints;
639: const PetscReal *quadPoints = user->q.quadPoints;
640: const PetscReal *quadWeights = user->q.quadWeights;
641: const PetscInt numBasisFuncs = user->q.numBasisFuncs;
642: const PetscInt numBasisComps = user->q.numComponents;
643: const PetscReal *basis = user->q.basis;
644: const PetscReal *basisDer = user->q.basisDer;
645: PetscReal *coords, *v0, *J, *invJ, *detJ;
646: PetscScalar *elemVec;
647: PetscInt cStart, cEnd, c;
648: PetscErrorCode ierr;
651: PetscLogEventBegin(user->residualBatchEvent,0,0,0,0);
652: VecSet(F, 0.0);
653: //PetscMalloc3(dim,PetscScalar,&realSpaceDer,dim,PetscScalar,&fieldGrad,numBasisFuncs,PetscScalar,&elemVec);
654: PetscMalloc3(dim,PetscReal,&coords,dim,PetscReal,&v0,dim*dim,PetscReal,&J);
655: DMComplexGetHeightStratum(dm, 0, &cStart, &cEnd);
656: const PetscInt numCells = cEnd - cStart;
657: PetscScalar *u;
659: PetscMalloc4(numCells*numBasisFuncs*numBasisComps,PetscScalar,&u,numCells*dim*dim,PetscReal,&invJ,numCells,PetscReal,&detJ,numCells*numBasisFuncs*numBasisComps,PetscScalar,&elemVec);
660: for(c = cStart; c < cEnd; ++c) {
661: const PetscScalar *x;
662: PetscInt f;
664: DMComplexComputeCellGeometry(dm, c, v0, J, &invJ[c*dim*dim], &detJ[c]);
665: DMComplexVecGetClosure(dm, PETSC_NULL, X, c, &x);
667: for(f = 0; f < numBasisFuncs*numBasisComps; ++f) {
668: u[c*numBasisFuncs*numBasisComps+f] = x[f];
669: }
670: }
671: // Conforming batches
672: PetscInt blockSize = numBasisFuncs*numQuadPoints;
673: PetscInt numBlocks = user->numBlocks;
674: PetscInt batchSize = numBlocks * blockSize;
675: PetscInt numBatches = user->numBatches;
676: PetscInt numChunks = numCells / (numBatches*batchSize);
677: if (user->gpu) {
678: PetscLogEventBegin(user->integrateBatchGPUEvent,0,0,0,0);
679: IntegrateElementBatchGPU(numChunks*numBatches*batchSize, numBatches, batchSize, numBlocks, u, invJ, detJ, elemVec, user->integrateGPUOnlyEvent, user->debug);
680: PetscLogFlops((((2+(2+2*dim)*dim)*numBasisComps*numBasisFuncs+(2+2)*dim*numBasisComps)*numQuadPoints + (2+2*dim)*dim*numQuadPoints*numBasisComps*numBasisFuncs)*numChunks*numBatches*batchSize);
681: PetscLogEventEnd(user->integrateBatchGPUEvent,0,0,0,0);
682: } else {
683: switch(user->op) {
684: case LAPLACIAN:
685: IntegrateLaplacianBatchCPU(numChunks*numBatches*batchSize, numBasisFuncs, u, invJ, detJ, numQuadPoints, quadPoints, quadWeights, basis, basisDer, elemVec, user);break;
686: case ELASTICITY:
687: IntegrateElasticityBatchCPU(numChunks*numBatches*batchSize, numBasisFuncs, numBasisComps, u, invJ, detJ, numQuadPoints, quadPoints, quadWeights, basis, basisDer, elemVec, user);break;
688: default:
689: SETERRQ1(PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Invalid PDE operator %d", user->op);
690: }
691: }
692: // Remainder
693: PetscInt numRemainder = numCells % (numBatches * batchSize);
694: PetscInt offset = numCells - numRemainder;
695: switch(user->op) {
696: case LAPLACIAN:
697: IntegrateLaplacianBatchCPU(numRemainder, numBasisFuncs, &u[offset*numBasisFuncs*numBasisComps], &invJ[offset*dim*dim], &detJ[offset],
698: numQuadPoints, quadPoints, quadWeights, basis, basisDer, &elemVec[offset*numBasisFuncs*numBasisComps], user);break;
699: case ELASTICITY:
700: IntegrateElasticityBatchCPU(numRemainder, numBasisFuncs, numBasisComps, &u[offset*numBasisFuncs*numBasisComps], &invJ[offset*dim*dim], &detJ[offset],
701: numQuadPoints, quadPoints, quadWeights, basis, basisDer, &elemVec[offset*numBasisFuncs*numBasisComps], user);break;
702: default:
703: SETERRQ1(PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Invalid PDE operator %d", user->op);
704: }
705: for(c = cStart; c < cEnd; ++c) {
706: if (debug) {DMPrintCellVector(c, "Residual", numBasisFuncs*numBasisComps, &elemVec[c*numBasisFuncs*numBasisComps]);}
707: DMComplexVecSetClosure(dm, PETSC_NULL, F, c, &elemVec[c*numBasisFuncs*numBasisComps], ADD_VALUES);
708: }
709: PetscFree4(u,invJ,detJ,elemVec);
710: PetscFree3(coords,v0,J);
711: if (user->showResidual) {
712: PetscInt p;
713: PetscPrintf(PETSC_COMM_WORLD, "Residual:\n");
714: for(p = 0; p < user->numProcs; ++p) {
715: if (p == user->rank) {VecView(F, PETSC_VIEWER_STDOUT_SELF);}
716: PetscBarrier((PetscObject) dm);
717: }
718: }
719: PetscLogEventEnd(user->residualBatchEvent,0,0,0,0);
720: return(0);
721: }
725: /*
726: dm - The mesh
727: X - The local input vector
728: Jac - The output matrix
729: */
730: PetscErrorCode FormJacobianLocalLaplacian(DM dm, Vec X, Mat Jac, Mat JacPre, AppCtx *user)
731: {
732: const PetscInt debug = user->debug;
733: const PetscInt dim = user->dim;
734: const PetscInt numQuadPoints = user->q.numQuadPoints;
735: const PetscReal *quadWeights = user->q.quadWeights;
736: const PetscInt numBasisFuncs = user->q.numBasisFuncs;
737: const PetscReal *basis = user->q.basis;
738: const PetscReal *basisDer = user->q.basisDer;
739: PetscReal *v0, *J, *invJ, detJ;
740: PetscScalar *realSpaceTestDer, *realSpaceBasisDer, *elemMat;
741: PetscInt cStart, cEnd, c;
742: PetscErrorCode ierr;
745: PetscLogEventBegin(user->jacobianEvent,0,0,0,0);
746: MatZeroEntries(Jac);
747: PetscMalloc3(dim,PetscScalar,&realSpaceTestDer,dim,PetscScalar,&realSpaceBasisDer,numBasisFuncs*numBasisFuncs,PetscScalar,&elemMat);
748: PetscMalloc3(dim,PetscReal,&v0,dim*dim,PetscReal,&J,dim*dim,PetscReal,&invJ);
749: DMComplexGetHeightStratum(dm, 0, &cStart, &cEnd);
750: for(c = cStart; c < cEnd; ++c) {
751: const PetscScalar *x;
752: PetscInt q;
754: PetscMemzero(elemMat, numBasisFuncs*numBasisFuncs * sizeof(PetscScalar));
755: DMComplexComputeCellGeometry(dm, c, v0, J, invJ, &detJ);
756: if (detJ <= 0.0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid determinant %g for element %d", detJ, c);
757: DMComplexVecGetClosure(dm, PETSC_NULL, X, c, &x);
759: for(q = 0; q < numQuadPoints; ++q) {
760: PetscScalar fieldVal = 0.0;
761: PetscInt f, g, d, e;
763: for(f = 0; f < numBasisFuncs; ++f) {
764: fieldVal += x[f]*basis[q*numBasisFuncs+f];
765: }
766: for(f = 0; f < numBasisFuncs; ++f) {
767: for(d = 0; d < dim; ++d) {
768: realSpaceTestDer[d] = 0.0;
769: for(e = 0; e < dim; ++e) {
770: realSpaceTestDer[d] += invJ[e*dim+d]*basisDer[(q*numBasisFuncs+f)*dim+e];
771: }
772: }
773: for(g = 0; g < numBasisFuncs; ++g) {
774: for(d = 0; d < dim; ++d) {
775: realSpaceBasisDer[d] = 0.0;
776: for(e = 0; e < dim; ++e) {
777: realSpaceBasisDer[d] += invJ[e*dim+d]*basisDer[(q*numBasisFuncs+g)*dim+e];
778: }
779: }
780: /* Linear term: -\Delta u */
781: PetscScalar product = 0.0;
782: for(d = 0; d < dim; ++d) product += realSpaceTestDer[d]*realSpaceBasisDer[d];
783: elemMat[f*numBasisFuncs+g] += product*quadWeights[q]*detJ;
784: /* Nonlinear term: -\lambda e^{u} */
785: elemMat[f*numBasisFuncs+g] -= basis[q*numBasisFuncs+f]*basis[q*numBasisFuncs+g]*0.0*PetscExpScalar(fieldVal)*quadWeights[q]*detJ;
786: }
787: }
788: }
789: if (debug) {DMPrintCellMatrix(c, "Jacobian", numBasisFuncs, numBasisFuncs, elemMat);}
790: DMComplexMatSetClosure(dm, PETSC_NULL, PETSC_NULL, Jac, c, elemMat, ADD_VALUES);
791: }
792: PetscLogFlops((cEnd-cStart)*numQuadPoints*numBasisFuncs*(dim*(dim*5+4)+14));
793: PetscFree3(realSpaceTestDer,realSpaceBasisDer,elemMat);
794: PetscFree3(v0,J,invJ);
796: /* Assemble matrix, using the 2-step process:
797: MatAssemblyBegin(), MatAssemblyEnd(). */
798: MatAssemblyBegin(Jac, MAT_FINAL_ASSEMBLY);
799: MatAssemblyEnd(Jac, MAT_FINAL_ASSEMBLY);
800: /* Tell the matrix we will never add a new nonzero location to the
801: matrix. If we do, it will generate an error. */
802: MatSetOption(Jac, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
803: PetscLogEventEnd(user->jacobianEvent,0,0,0,0);
804: return(0);
805: }
809: /*
810: dm - The mesh
811: X - The local input vector
812: Jac - The output matrix
813: */
814: PetscErrorCode FormJacobianLocalElasticity(DM dm, Vec X, Mat Jac, Mat JacPre, AppCtx *user)
815: {
816: const PetscInt debug = user->debug;
817: const PetscInt dim = user->dim;
818: const PetscInt numQuadPoints = user->q.numQuadPoints;
819: const PetscReal *quadWeights = user->q.quadWeights;
820: const PetscInt numBasisFuncs = user->q.numBasisFuncs;
821: const PetscReal *basis = user->q.basis;
822: const PetscReal *basisDer = user->q.basisDer;
823: PetscReal *v0, *J, *invJ, detJ;
824: PetscScalar *realSpaceTestDer, *realSpaceBasisDer, *elemMat;
825: PetscInt cStart, cEnd, c;
826: PetscErrorCode ierr;
829: PetscLogEventBegin(user->jacobianEvent,0,0,0,0);
830: MatZeroEntries(Jac);
831: PetscMalloc3(dim,PetscScalar,&realSpaceTestDer,dim,PetscScalar,&realSpaceBasisDer,numBasisFuncs*numBasisFuncs,PetscScalar,&elemMat);
832: PetscMalloc3(dim,PetscReal,&v0,dim*dim,PetscReal,&J,dim*dim,PetscReal,&invJ);
833: DMComplexGetHeightStratum(dm, 0, &cStart, &cEnd);
834: for(c = cStart; c < cEnd; ++c) {
835: const PetscScalar *x;
836: PetscInt q;
838: PetscMemzero(elemMat, numBasisFuncs*numBasisFuncs * sizeof(PetscScalar));
839: DMComplexComputeCellGeometry(dm, c, v0, J, invJ, &detJ);
840: if (detJ <= 0.0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid determinant %g for element %d", detJ, c);
841: DMComplexVecGetClosure(dm, PETSC_NULL, X, c, &x);
843: for(q = 0; q < numQuadPoints; ++q) {
844: PetscScalar fieldVal = 0.0;
845: PetscInt f, g, d, e;
847: for(f = 0; f < numBasisFuncs; ++f) {
848: fieldVal += x[f]*basis[q*numBasisFuncs+f];
849: }
850: for(f = 0; f < numBasisFuncs; ++f) {
851: for(d = 0; d < dim; ++d) {
852: realSpaceTestDer[d] = 0.0;
853: for(e = 0; e < dim; ++e) {
854: realSpaceTestDer[d] += invJ[e*dim+d]*basisDer[(q*numBasisFuncs+f)*dim+e];
855: }
856: }
857: for(g = 0; g < numBasisFuncs; ++g) {
858: for(d = 0; d < dim; ++d) {
859: realSpaceBasisDer[d] = 0.0;
860: for(e = 0; e < dim; ++e) {
861: realSpaceBasisDer[d] += invJ[e*dim+d]*basisDer[(q*numBasisFuncs+g)*dim+e];
862: }
863: }
864: /* Linear term: -\Delta u */
865: PetscScalar product = 0.0;
866: for(d = 0; d < dim; ++d) product += realSpaceTestDer[d]*realSpaceBasisDer[d];
867: elemMat[f*numBasisFuncs+g] += product*quadWeights[q]*detJ;
868: /* Nonlinear term: -\lambda e^{u} */
869: elemMat[f*numBasisFuncs+g] -= basis[q*numBasisFuncs+f]*basis[q*numBasisFuncs+g]*0.0*PetscExpScalar(fieldVal)*quadWeights[q]*detJ;
870: }
871: }
872: }
873: if (debug) {DMPrintCellMatrix(c, "Jacobian", numBasisFuncs, numBasisFuncs, elemMat);}
874: DMComplexMatSetClosure(dm, PETSC_NULL, PETSC_NULL, Jac, c, elemMat, ADD_VALUES);
875: }
876: PetscLogFlops((cEnd-cStart)*numQuadPoints*numBasisFuncs*(dim*(dim*5+4)+14));
877: PetscFree3(realSpaceTestDer,realSpaceBasisDer,elemMat);
878: PetscFree3(v0,J,invJ);
880: /* Assemble matrix, using the 2-step process:
881: MatAssemblyBegin(), MatAssemblyEnd(). */
882: MatAssemblyBegin(Jac, MAT_FINAL_ASSEMBLY);
883: MatAssemblyEnd(Jac, MAT_FINAL_ASSEMBLY);
884: /* Tell the matrix we will never add a new nonzero location to the
885: matrix. If we do, it will generate an error. */
886: MatSetOption(Jac, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
887: PetscLogEventEnd(user->jacobianEvent,0,0,0,0);
888: return(0);
889: }
893: /*
894: dm - The mesh
895: X - The local input vector
896: Jac - The output matrix
897: */
898: PetscErrorCode FormJacobianLocalBatch(DM dm, Vec X, Mat Jac, Mat JacPre, AppCtx *user)
899: {
900: const PetscInt debug = user->debug;
901: const PetscInt dim = user->dim;
902: const PetscInt numQuadPoints = user->q.numQuadPoints;
903: const PetscReal *quadWeights = user->q.quadWeights;
904: const PetscInt numBasisFuncs = user->q.numBasisFuncs;
905: const PetscReal *basis = user->q.basis;
906: const PetscReal *basisDer = user->q.basisDer;
907: PetscReal *v0, *J, *invJ, detJ;
908: PetscScalar *realSpaceTestDer, *realSpaceBasisDer, *elemMat;
909: PetscInt cStart, cEnd, c;
910: PetscErrorCode ierr;
913: PetscLogEventBegin(user->jacobianBatchEvent,0,0,0,0);
914: MatZeroEntries(Jac);
915: PetscMalloc3(dim,PetscScalar,&realSpaceTestDer,dim,PetscScalar,&realSpaceBasisDer,numBasisFuncs*numBasisFuncs,PetscScalar,&elemMat);
916: PetscMalloc3(dim,PetscReal,&v0,dim*dim,PetscReal,&J,dim*dim,PetscReal,&invJ);
917: DMComplexGetHeightStratum(dm, 0, &cStart, &cEnd);
918: for(c = cStart; c < cEnd; ++c) {
919: const PetscScalar *x;
920: PetscInt q;
922: PetscMemzero(elemMat, numBasisFuncs*numBasisFuncs * sizeof(PetscScalar));
923: DMComplexComputeCellGeometry(dm, c, v0, J, invJ, &detJ);
924: if (detJ <= 0.0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid determinant %g for element %d", detJ, c);
925: DMComplexVecGetClosure(dm, PETSC_NULL, X, c, &x);
927: for(q = 0; q < numQuadPoints; ++q) {
928: PetscScalar fieldVal = 0.0;
929: PetscInt f, g, d, e;
931: for(f = 0; f < numBasisFuncs; ++f) {
932: fieldVal += x[f]*basis[q*numBasisFuncs+f];
933: }
934: for(f = 0; f < numBasisFuncs; ++f) {
935: for(d = 0; d < dim; ++d) {
936: realSpaceTestDer[d] = 0.0;
937: for(e = 0; e < dim; ++e) {
938: realSpaceTestDer[d] += invJ[e*dim+d]*basisDer[(q*numBasisFuncs+f)*dim+e];
939: }
940: }
941: for(g = 0; g < numBasisFuncs; ++g) {
942: for(d = 0; d < dim; ++d) {
943: realSpaceBasisDer[d] = 0.0;
944: for(e = 0; e < dim; ++e) {
945: realSpaceBasisDer[d] += invJ[e*dim+d]*basisDer[(q*numBasisFuncs+g)*dim+e];
946: }
947: }
948: /* Linear term: -\Delta u */
949: PetscScalar product = 0.0;
950: for(d = 0; d < dim; ++d) product += realSpaceTestDer[d]*realSpaceBasisDer[d];
951: elemMat[f*numBasisFuncs+g] += product*quadWeights[q]*detJ;
952: /* Nonlinear term: -\lambda e^{u} */
953: elemMat[f*numBasisFuncs+g] -= basis[q*numBasisFuncs+f]*basis[q*numBasisFuncs+g]*0.0*PetscExpScalar(fieldVal)*quadWeights[q]*detJ;
954: }
955: }
956: }
957: if (debug) {DMPrintCellMatrix(c, "Jacobian", numBasisFuncs, numBasisFuncs, elemMat);}
958: DMComplexMatSetClosure(dm, PETSC_NULL, PETSC_NULL, Jac, c, elemMat, ADD_VALUES);
959: }
960: PetscLogFlops((cEnd-cStart)*numQuadPoints*numBasisFuncs*(dim*(dim*5+4)+14));
961: PetscFree3(realSpaceTestDer,realSpaceBasisDer,elemMat);
962: PetscFree3(v0,J,invJ);
964: /* Assemble matrix, using the 2-step process:
965: MatAssemblyBegin(), MatAssemblyEnd(). */
966: MatAssemblyBegin(Jac, MAT_FINAL_ASSEMBLY);
967: MatAssemblyEnd(Jac, MAT_FINAL_ASSEMBLY);
968: /* Tell the matrix we will never add a new nonzero location to the
969: matrix. If we do, it will generate an error. */
970: MatSetOption(Jac, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
971: PetscLogEventEnd(user->jacobianBatchEvent,0,0,0,0);
972: return(0);
973: }
977: int main(int argc, char **argv)
978: {
979: DM dm;
980: SNES snes;
981: AppCtx user;
984: PetscInitialize(&argc, &argv, PETSC_NULL, help);
985: #ifndef PETSC_HAVE_CUDA
986: SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_SUP, "This example requires CUDA support.");
987: #endif
988: ProcessOptions(PETSC_COMM_WORLD, &user);
989: CreateMesh(PETSC_COMM_WORLD, &user, &dm);
990: SetupQuadrature(&user);
991: SetupSection(dm, &user);
993: SNESCreate(PETSC_COMM_WORLD, &snes);
994: if (user.computeFunction) {
995: Vec X, F;
997: DMGetGlobalVector(dm, &X);
998: DMGetGlobalVector(dm, &F);
999: if (user.batch) {
1000: switch(user.op) {
1001: case LAPLACIAN:
1002: FormInitialGuess(X, quadratic_2d, INSERT_VALUES, &user);break;
1003: case ELASTICITY:
1004: FormInitialGuess(X, quadratic_2d_elas, INSERT_VALUES, &user);break;
1005: default:
1006: SETERRQ1(PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Invalid PDE operator %d", user.op);
1007: }
1008: DMComplexSetLocalFunction(dm, (PetscErrorCode (*)(DM, Vec, Vec, void*)) FormFunctionLocalBatch);
1009: } else {
1010: switch(user.op) {
1011: case LAPLACIAN:
1012: FormInitialGuess(X, quadratic_2d, INSERT_VALUES, &user);
1013: DMComplexSetLocalFunction(dm, (PetscErrorCode (*)(DM, Vec, Vec, void*)) FormFunctionLocalLaplacian);break;
1014: case ELASTICITY:
1015: FormInitialGuess(X, quadratic_2d_elas, INSERT_VALUES, &user);
1016: DMComplexSetLocalFunction(dm, (PetscErrorCode (*)(DM, Vec, Vec, void*)) FormFunctionLocalElasticity);break;
1017: default:
1018: SETERRQ1(PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Invalid PDE operator %d", user.op);
1019: }
1020: }
1021: SNESDMComplexComputeFunction(snes, X, F, &user);
1022: DMRestoreGlobalVector(dm, &X);
1023: DMRestoreGlobalVector(dm, &F);
1024: }
1025: if (user.computeJacobian) {
1026: Vec X;
1027: Mat J;
1028: MatStructure flag;
1030: DMGetGlobalVector(dm, &X);
1031: DMCreateMatrix(dm, MATAIJ, &J);
1032: if (user.batch) {
1033: DMComplexSetLocalJacobian(dm, (PetscErrorCode (*)(DM, Vec, Mat, Mat, void*)) FormJacobianLocalBatch);
1034: } else {
1035: switch(user.op) {
1036: case LAPLACIAN:
1037: DMComplexSetLocalJacobian(dm, (PetscErrorCode (*)(DM, Vec, Mat, Mat, void*)) FormJacobianLocalLaplacian);break;
1038: case ELASTICITY:
1039: DMComplexSetLocalJacobian(dm, (PetscErrorCode (*)(DM, Vec, Mat, Mat, void*)) FormJacobianLocalElasticity);break;
1040: default:
1041: SETERRQ1(PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Invalid PDE operator %d", user.op);
1042: }
1043: }
1044: SNESDMComplexComputeJacobian(snes, X, &J, &J, &flag, &user);
1045: MatDestroy(&J);
1046: DMRestoreGlobalVector(dm, &X);
1047: }
1048: SNESDestroy(&snes);
1049: DMDestroy(&dm);
1050: PetscFinalize();
1051: return 0;
1052: }