Actual source code: ex52.c

petsc-3.3-p7 2013-05-11
  1: static const char help[] = "Testbed for FEM operations on the GPU.\n\n";

  3: #if 0
  4:  - Automate testing of different:
  5:    - elements, batches, blocks, operators
  6:  - Process log_summary with Python (use existing tools)
  7:  - Put results in PyTables

  9: PYLITH:
 10:  - Redo setup/cleanup in PyLith
 11:  - Test with examples/3d/hex8/step01.cfg (Brad is making a flag)
 12:  - Code up Jacobian (looks like original code)
 13:  - Consider taking in coordinates rather than J^{-T}/|J|

 15: MUST CHECK WITH:
 16:  - 3D elements
 17:  - Different quadrature (1pt and 3pt)
 18:  - Different basis (linear and quadratic)
 19: #endif

 21: #include<petscdmcomplex.h>
 22: #include<petscsnes.h>

 24: typedef enum {LAPLACIAN, ELASTICITY} OpType;

 26: typedef struct {
 27:   DM            dm;                /* REQUIRED in order to use SNES evaluation functions */
 28:   PetscInt      debug;             /* The debugging level */
 29:   PetscMPIInt   rank;              /* The process rank */
 30:   PetscMPIInt   numProcs;          /* The number of processes */
 31:   PetscInt      dim;               /* The topological mesh dimension */
 32:   PetscBool     interpolate;       /* Generate intermediate mesh elements */
 33:   PetscReal     refinementLimit;   /* The largest allowable cell volume */
 34:   char          partitioner[2048]; /* The graph partitioner */
 35:   PetscBool     computeFunction;   /* The flag for computing a residual */
 36:   PetscBool     computeJacobian;   /* The flag for computing a Jacobian */
 37:   PetscBool     batch;             /* The flag for batch assembly */
 38:   PetscBool     gpu;               /* The flag for GPU integration */
 39:   OpType        op;                /* The type of PDE operator (should use FFC/Ignition here) */
 40:   PetscBool     showResidual, showJacobian;
 41:   PetscLogEvent createMeshEvent, residualEvent, residualBatchEvent, jacobianEvent, jacobianBatchEvent, integrateBatchCPUEvent, integrateBatchGPUEvent, integrateGPUOnlyEvent;
 42:   /* GPU partitioning */
 43:   PetscInt      numBatches;        /* The number of cell batches per kernel */
 44:   PetscInt      numBlocks;         /* The number of concurrent blocks per kernel */
 45:   /* Element quadrature */
 46:   PetscQuadrature q;
 47: } AppCtx;

 49: /*------------------------------------------------------------------------------
 50:   This code can be generated using 'bin/pythonscripts/PetscGenerateFEMQuadrature.py dim 1 numComp 1 laplacian src/snes/examples/tutorials/ex52.h'
 51:  -----------------------------------------------------------------------------*/
 52: #include "ex52.h"

 54: void quadratic_2d(const PetscReal x[], PetscScalar u[]) {
 55:   u[0] = x[0]*x[0] + x[1]*x[1];
 56: };

 58: void quadratic_2d_elas(const PetscReal x[], PetscScalar u[]) {
 59:   u[0] = x[0]*x[0] + x[1]*x[1];
 60:   u[1] = x[0]*x[0] + x[1]*x[1];
 61: };

 65: PetscErrorCode ProcessOptions(MPI_Comm comm, AppCtx *options) {
 66:   const char    *opTypes[2]  = {"laplacian", "elasticity"};
 67:   PetscInt       op;

 71:   options->debug           = 0;
 72:   options->dim             = 2;
 73:   options->interpolate     = PETSC_FALSE;
 74:   options->refinementLimit = 0.0;
 75:   options->computeFunction = PETSC_FALSE;
 76:   options->computeJacobian = PETSC_FALSE;
 77:   options->batch           = PETSC_FALSE;
 78:   options->gpu             = PETSC_FALSE;
 79:   options->numBatches      = 1;
 80:   options->numBlocks       = 1;
 81:   options->op              = LAPLACIAN;
 82:   options->showResidual    = PETSC_TRUE;
 83:   options->showJacobian    = PETSC_TRUE;

 85:   MPI_Comm_size(comm, &options->numProcs);
 86:   MPI_Comm_rank(comm, &options->rank);
 87:   PetscOptionsBegin(comm, "", "Bratu Problem Options", "DMCOMPLEX");
 88:   PetscOptionsInt("-debug", "The debugging level", "ex52.c", options->debug, &options->debug, PETSC_NULL);
 89:   PetscOptionsInt("-dim", "The topological mesh dimension", "ex52.c", options->dim, &options->dim, PETSC_NULL);
 90:   PetscOptionsBool("-interpolate", "Generate intermediate mesh elements", "ex52.c", options->interpolate, &options->interpolate, PETSC_NULL);
 91:   PetscOptionsReal("-refinement_limit", "The largest allowable cell volume", "ex52.c", options->refinementLimit, &options->refinementLimit, PETSC_NULL);
 92:   PetscStrcpy(options->partitioner, "chaco");
 93:   PetscOptionsString("-partitioner", "The graph partitioner", "ex52.c", options->partitioner, options->partitioner, 2048, PETSC_NULL);
 94:   PetscOptionsBool("-compute_function", "Compute the residual", "ex52.c", options->computeFunction, &options->computeFunction, PETSC_NULL);
 95:   PetscOptionsBool("-compute_jacobian", "Compute the Jacobian", "ex52.c", options->computeJacobian, &options->computeJacobian, PETSC_NULL);
 96:   PetscOptionsBool("-batch", "Use the batch assembly method", "ex52.c", options->batch, &options->batch, PETSC_NULL);
 97:   PetscOptionsBool("-gpu", "Use the GPU for integration method", "ex52.c", options->gpu, &options->gpu, PETSC_NULL);
 98:   PetscOptionsInt("-gpu_batches", "The number of cell batches per kernel", "ex52.c", options->numBatches, &options->numBatches, PETSC_NULL);
 99:   PetscOptionsInt("-gpu_blocks", "The number of concurrent blocks per kernel", "ex52.c", options->numBlocks, &options->numBlocks, PETSC_NULL);
100:   op = options->op;
101:   PetscOptionsEList("-op_type","Type of PDE operator","ex52.c",opTypes,2,opTypes[options->op],&op,PETSC_NULL);
102:   options->op = (OpType) op;
103:   PetscOptionsBool("-show_residual", "Output the residual for verification", "ex52.c", options->showResidual, &options->showResidual, PETSC_NULL);
104:   PetscOptionsBool("-show_jacobian", "Output the Jacobian for verification", "ex52.c", options->showJacobian, &options->showJacobian, PETSC_NULL);
105:   PetscOptionsEnd();

107:   PetscLogEventRegister("CreateMesh",    DM_CLASSID,   &options->createMeshEvent);
108:   PetscLogEventRegister("Residual",      SNES_CLASSID, &options->residualEvent);
109:   PetscLogEventRegister("ResidualBatch", SNES_CLASSID, &options->residualBatchEvent);
110:   PetscLogEventRegister("Jacobian",      SNES_CLASSID, &options->jacobianEvent);
111:   PetscLogEventRegister("JacobianBatch", SNES_CLASSID, &options->jacobianBatchEvent);
112:   PetscLogEventRegister("IntegBatchCPU", SNES_CLASSID, &options->integrateBatchCPUEvent);
113:   PetscLogEventRegister("IntegBatchGPU", SNES_CLASSID, &options->integrateBatchGPUEvent);
114:   PetscLogEventRegister("IntegGPUOnly",  SNES_CLASSID, &options->integrateGPUOnlyEvent);
115:   return(0);
116: };

120: PetscErrorCode CreateMesh(MPI_Comm comm, AppCtx *user, DM *dm)
121: {
122:   PetscInt       dim             = user->dim;
123:   PetscBool      interpolate     = user->interpolate;
124:   PetscReal      refinementLimit = user->refinementLimit;
125:   const char    *partitioner     = user->partitioner;

129:   PetscLogEventBegin(user->createMeshEvent,0,0,0,0);
130:   DMComplexCreateBoxMesh(comm, dim, interpolate, dm);
131:   {
132:     DM refinedMesh     = PETSC_NULL;
133:     DM distributedMesh = PETSC_NULL;

135:     /* Refine mesh using a volume constraint */
136:     DMComplexSetRefinementLimit(*dm, refinementLimit);
137:     DMRefine(*dm, comm, &refinedMesh);
138:     if (refinedMesh) {
139:       DMDestroy(dm);
140:       *dm  = refinedMesh;
141:     }
142:     /* Distribute mesh over processes */
143:     DMComplexDistribute(*dm, partitioner, &distributedMesh);
144:     if (distributedMesh) {
145:       DMDestroy(dm);
146:       *dm  = distributedMesh;
147:     }
148:   }
149:   DMSetFromOptions(*dm);
150:   PetscLogEventEnd(user->createMeshEvent,0,0,0,0);
151:   user->dm = *dm;
152:   return(0);
153: }

157: PetscErrorCode SetupQuadrature(AppCtx *user) {
159:   switch(user->dim) {
160:   case 2:
161:   case 3:
162:     user->q.numQuadPoints = NUM_QUADRATURE_POINTS_0;
163:     user->q.quadPoints    = points_0;
164:     user->q.quadWeights   = weights_0;
165:     user->q.numBasisFuncs = NUM_BASIS_FUNCTIONS_0;
166:     user->q.numComponents = NUM_BASIS_COMPONENTS_0;
167:     user->q.basis         = Basis_0;
168:     user->q.basisDer      = BasisDerivatives_0;
169:     break;
170:   default:
171:     SETERRQ1(PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Invalid dimension %d", user->dim);
172:   }
173:   return(0);
174: }

178: PetscErrorCode SetupSection(DM dm, AppCtx *user) {
179:   PetscSection   section;
180:   /* These can be generated using config/PETSc/FEM.py */
181:   PetscInt       dim        = user->dim;
182:   PetscInt       numBC      = 0;
183:   PetscInt       numComp[1] = {NUM_BASIS_COMPONENTS_0};

187:   if (dim != SPATIAL_DIM_0) SETERRQ2(((PetscObject) dm)->comm, PETSC_ERR_ARG_SIZ, "Spatial dimension %d should be %d", dim, SPATIAL_DIM_0);
188:   DMComplexCreateSection(dm, dim, 1, numComp, numDof_0, numBC, PETSC_NULL, PETSC_NULL, &section);
189:   DMComplexSetDefaultSection(dm, section);
190:   return(0);
191: }

195: /*
196:   FormInitialGuess - Forms initial approximation.

198:   Input Parameters:
199: + user - user-defined application context
200: - guessFunc - The coordinate function to use for the guess

202:   Output Parameter:
203: . X - vector
204: */
205: PetscErrorCode FormInitialGuess(Vec X, void (*guessFunc)(const PetscReal [], PetscScalar []), InsertMode mode, AppCtx *user)
206: {
207:   Vec            localX, coordinates;
208:   PetscSection   section, cSection;
209:   PetscInt       vStart, vEnd, v;

213:   DMGetLocalVector(user->dm, &localX);
214:   DMComplexGetDepthStratum(user->dm, 0, &vStart, &vEnd);
215:   DMComplexGetDefaultSection(user->dm, &section);
216:   DMComplexGetCoordinateSection(user->dm, &cSection);
217:   DMComplexGetCoordinateVec(user->dm, &coordinates);
218:   for(v = vStart; v < vEnd; ++v) {
219:     PetscScalar  values[3];
220:     PetscScalar *coords;

222:     VecGetValuesSection(coordinates, cSection, v, &coords);
223:     (*guessFunc)(coords, values);
224:     VecSetValuesSection(localX, section, v, values, mode);
225:   }

227:   DMLocalToGlobalBegin(user->dm, localX, INSERT_VALUES, X);
228:   DMLocalToGlobalEnd(user->dm, localX, INSERT_VALUES, X);
229:   DMRestoreLocalVector(user->dm, &localX);
230:   return(0);
231: }

235: /*
236:  dm - The mesh
237:  X  - Local intput vector
238:  F  - Local output vector
239:  */
240: PetscErrorCode FormFunctionLocalLaplacian(DM dm, Vec X, Vec F, AppCtx *user)
241: {
242:   //PetscScalar    (*rhsFunc)(const PetscReal []) = user->rhsFunc;
243:   const PetscInt   debug         = user->debug;
244:   const PetscInt   dim           = user->dim;
245:   const PetscInt   numQuadPoints = user->q.numQuadPoints;
246:   const PetscReal *quadPoints    = user->q.quadPoints;
247:   const PetscReal *quadWeights   = user->q.quadWeights;
248:   const PetscInt   numBasisFuncs = user->q.numBasisFuncs;
249:   const PetscReal *basis         = user->q.basis;
250:   const PetscReal *basisDer      = user->q.basisDer;
251:   PetscReal       *coords, *v0, *J, *invJ, detJ;
252:   PetscScalar     *realSpaceDer, *fieldGrad, *elemVec;
253:   PetscInt         cStart, cEnd, c, p;
254:   PetscErrorCode   ierr;

257:   PetscLogEventBegin(user->residualEvent,0,0,0,0);
258:   VecSet(F, 0.0);
259:   PetscMalloc3(dim,PetscScalar,&realSpaceDer,dim,PetscScalar,&fieldGrad,numBasisFuncs,PetscScalar,&elemVec);
260:   PetscMalloc4(dim,PetscReal,&coords,dim,PetscReal,&v0,dim*dim,PetscReal,&J,dim*dim,PetscReal,&invJ);
261:   DMComplexGetHeightStratum(dm, 0, &cStart, &cEnd);
262:   for(c = cStart; c < cEnd; ++c) {
263:     const PetscScalar *x;
264:     PetscInt           q, f, d, e;

266:     PetscMemzero(elemVec, numBasisFuncs * sizeof(PetscScalar));
267:     DMComplexComputeCellGeometry(dm, c, v0, J, invJ, &detJ);
268:     if (detJ <= 0.0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid determinant %g for element %d", detJ, c);
269:     DMComplexVecGetClosure(dm, PETSC_NULL, X, c, &x);
270:     if (debug > 1) {DMPrintCellVector(c, "Solution", numBasisFuncs, x);}

272:     for(q = 0; q < numQuadPoints; ++q) {
273:       PetscScalar fieldVal = 0.0;

275:       if (debug > 1) {PetscPrintf(PETSC_COMM_SELF, "  quad point %d\n", q);}
276:       for(d = 0; d < dim; ++d) {
277:         fieldGrad[d] = 0.0;
278:         coords[d] = v0[d];
279:         for(e = 0; e < dim; ++e) {
280:           coords[d] += J[d*dim+e]*(quadPoints[q*dim+e] + 1.0);
281:         }
282:         if (debug > 1) {PetscPrintf(PETSC_COMM_SELF, "    coords[%d] %g\n", d, coords[d]);}
283:       }
284:       for(f = 0; f < numBasisFuncs; ++f) {
285:         fieldVal += x[f]*basis[q*numBasisFuncs+f];

287:         for(d = 0; d < dim; ++d) {
288:           realSpaceDer[d] = 0.0;
289:           for(e = 0; e < dim; ++e) {
290:             realSpaceDer[d] += invJ[e*dim+d]*basisDer[(q*numBasisFuncs+f)*dim+e];
291:           }
292:           fieldGrad[d] += realSpaceDer[d]*x[f];
293:         }
294:       }
295:       if (debug > 1) {
296:         for(d = 0; d < dim; ++d) {
297:           PetscPrintf(PETSC_COMM_SELF, "    fieldGrad[%d] %g\n", d, fieldGrad[d]);
298:         }
299:       }
300:       const PetscScalar funcVal = 0.0; //(*rhsFunc)(coords);
301:       for(f = 0; f < numBasisFuncs; ++f) {
302:         /* Constant term: -f(x) */
303:         elemVec[f] -= basis[q*numBasisFuncs+f]*funcVal*quadWeights[q]*detJ;
304:         /* Linear term: -\Delta u */
305:         PetscScalar product = 0.0;
306:         for(d = 0; d < dim; ++d) {
307:           realSpaceDer[d] = 0.0;
308:           for(e = 0; e < dim; ++e) {
309:             realSpaceDer[d] += invJ[e*dim+d]*basisDer[(q*numBasisFuncs+f)*dim+e];
310:           }
311:           product += realSpaceDer[d]*fieldGrad[d];
312:         }
313:         elemVec[f] += product*quadWeights[q]*detJ;
314:         /* Nonlinear term: -\lambda e^{u} */
315:         elemVec[f] -= basis[q*numBasisFuncs+f]*0.0*PetscExpScalar(fieldVal)*quadWeights[q]*detJ;
316:       }
317:     }
318:     if (debug) {DMPrintCellVector(c, "Residual", numBasisFuncs, elemVec);}
319:     DMComplexVecSetClosure(dm, PETSC_NULL, F, c, elemVec, ADD_VALUES);
320:   }
321:   PetscLogFlops((cEnd-cStart)*numQuadPoints*numBasisFuncs*(dim*(dim*5+4)+14));
322:   PetscFree3(realSpaceDer,fieldGrad,elemVec);
323:   PetscFree4(coords,v0,J,invJ);

325:   PetscPrintf(PETSC_COMM_WORLD, "Residual:\n");
326:   for(p = 0; p < user->numProcs; ++p) {
327:     if (p == user->rank) {VecView(F, PETSC_VIEWER_STDOUT_SELF);}
328:     PetscBarrier((PetscObject) dm);
329:   }
330:   PetscLogEventEnd(user->residualEvent,0,0,0,0);
331:   return(0);
332: }

336: /*
337:  dm - The mesh
338:  X  - Local intput vector
339:  F  - Local output vector
340:  */
341: PetscErrorCode FormFunctionLocalElasticity(DM dm, Vec X, Vec F, AppCtx *user)
342: {
343:   //PetscScalar    (*rhsFunc)(const PetscReal []) = user->rhsFunc;
344:   const PetscInt   debug         = user->debug;
345:   const PetscInt   dim           = user->dim;
346:   const PetscInt   numQuadPoints = user->q.numQuadPoints;
347:   const PetscReal *quadPoints    = user->q.quadPoints;
348:   const PetscReal *quadWeights   = user->q.quadWeights;
349:   const PetscInt   numBasisFuncs = user->q.numBasisFuncs;
350:   const PetscInt   numBasisComps = user->q.numComponents;
351:   const PetscReal *basis         = user->q.basis;
352:   const PetscReal *basisDer      = user->q.basisDer;
353:   PetscReal       *coords, *v0, *J, *invJ, detJ;
354:   PetscScalar     *realSpaceDer, *fieldGrad, *elemVec;
355:   PetscInt         cStart, cEnd, c, p;
356:   PetscErrorCode   ierr;

359:   PetscLogEventBegin(user->residualEvent,0,0,0,0);
360:   VecSet(F, 0.0);
361:   PetscMalloc3(dim,PetscScalar,&realSpaceDer,dim*numBasisComps,PetscScalar,&fieldGrad,numBasisFuncs*numBasisComps,PetscScalar,&elemVec);
362:   PetscMalloc4(dim,PetscReal,&coords,dim,PetscReal,&v0,dim*dim,PetscReal,&J,dim*dim,PetscReal,&invJ);
363:   DMComplexGetHeightStratum(dm, 0, &cStart, &cEnd);
364:   for(c = cStart; c < cEnd; ++c) {
365:     const PetscScalar *x;
366:     PetscInt           q, d, e, f, comp;

368:     PetscMemzero(elemVec, numBasisFuncs*numBasisComps * sizeof(PetscScalar));
369:     DMComplexComputeCellGeometry(dm, c, v0, J, invJ, &detJ);
370:     if (detJ <= 0.0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid determinant %g for element %d", detJ, c);
371:     DMComplexVecGetClosure(dm, PETSC_NULL, X, c, &x);
372:     if (debug > 1) {DMPrintCellVector(c, "Solution", numBasisFuncs*numBasisComps, x);}

374:     for(q = 0; q < numQuadPoints; ++q) {
375:       PetscScalar fieldVal[3] = {0.0,0.0,0.0};

377:       if (debug > 1) {PetscPrintf(PETSC_COMM_SELF, "  quad point %d\n", q);}
378:       for(d = 0; d < dim; ++d) {
379:         for(comp = 0; comp < numBasisComps; ++comp) {
380:           fieldGrad[comp*dim+d] = 0.0;
381:         }
382:         coords[d] = v0[d];
383:         for(e = 0; e < dim; ++e) {
384:           coords[d] += J[d*dim+e]*(quadPoints[q*dim+e] + 1.0);
385:         }
386:         if (debug > 1) {PetscPrintf(PETSC_COMM_SELF, "    coords[%d] %g\n", d, coords[d]);}
387:       }
388:       for(f = 0; f < numBasisFuncs; ++f) {
389:         for(comp = 0; comp < numBasisComps; ++comp) {
390:           const PetscInt cidx = f*numBasisComps+comp;

392:           fieldVal[comp] += x[cidx]*basis[q*numBasisFuncs*numBasisComps+cidx];

394:           for(d = 0; d < dim; ++d) {
395:             realSpaceDer[d] = 0.0;
396:             for(e = 0; e < dim; ++e) {
397:               realSpaceDer[d] += invJ[e*dim+d]*basisDer[(q*numBasisFuncs*numBasisComps+cidx)*dim+e];
398:             }
399:             fieldGrad[comp*dim+d] += realSpaceDer[d]*x[cidx];
400:           }
401:         }
402:       }
403:       if (debug > 1) {
404:         for(comp = 0; comp < numBasisComps; ++comp) {
405:           for(d = 0; d < dim; ++d) {
406:             PetscPrintf(PETSC_COMM_SELF, "    field %d gradient[%d] %g\n", comp, d, fieldGrad[comp*dim+d]);
407:           }
408:         }
409:       }
410:       const PetscScalar funcVal[3] = {0.0, 0.0, 0.0}; //(*rhsFunc)(coords);
411:       for(f = 0; f < numBasisFuncs; ++f) {
412:         for(comp = 0; comp < numBasisComps; ++comp) {
413:           const PetscInt cidx = f*numBasisComps+comp;

415:           /* Constant term: -f(x) */
416:           elemVec[cidx] -= basis[q*numBasisFuncs*numBasisComps+cidx]*funcVal[comp]*quadWeights[q]*detJ;
417:           /* Linear term: -\Delta u */
418:           PetscScalar product = 0.0;
419:           for(d = 0; d < dim; ++d) {
420:             realSpaceDer[d] = 0.0;
421:             for(e = 0; e < dim; ++e) {
422:               realSpaceDer[d] += invJ[e*dim+d]*basisDer[(q*numBasisFuncs*numBasisComps+cidx)*dim+e];
423:             }
424:             product += realSpaceDer[d]*0.5*(fieldGrad[comp*dim+d] + fieldGrad[d*dim+comp]);
425:           }
426:           elemVec[cidx] += product*quadWeights[q]*detJ;
427:           /* Nonlinear term: -\lambda e^{u} */
428:           elemVec[cidx] -= basis[q*numBasisFuncs*numBasisComps+cidx]*0.0*PetscExpScalar(fieldVal[comp])*quadWeights[q]*detJ;
429:         }
430:       }
431:     }
432:     if (debug) {DMPrintCellVector(c, "Residual", numBasisFuncs*numBasisComps, elemVec);}
433:     DMComplexVecSetClosure(dm, PETSC_NULL, F, c, elemVec, ADD_VALUES);
434:   }
435:   PetscLogFlops((cEnd-cStart)*numQuadPoints*numBasisFuncs*(dim*(dim*5+4)+14));
436:   PetscFree3(realSpaceDer,fieldGrad,elemVec);
437:   PetscFree4(coords,v0,J,invJ);

439:   PetscPrintf(PETSC_COMM_WORLD, "Residual:\n");
440:   for(p = 0; p < user->numProcs; ++p) {
441:     if (p == user->rank) {VecView(F, PETSC_VIEWER_STDOUT_SELF);}
442:     PetscBarrier((PetscObject) dm);
443:   }
444:   PetscLogEventEnd(user->residualEvent,0,0,0,0);
445:   return(0);
446: }

448: EXTERN_C_BEGIN
449: PetscErrorCode IntegrateElementBatchGPU(PetscInt Ne, PetscInt Nbatch, PetscInt Nbc, PetscInt Nbl, const PetscScalar coefficients[], const PetscReal jacobianInverses[], const PetscReal jacobianDeterminants[], PetscScalar elemVec[], PetscLogEvent event, PetscInt debug);
450: EXTERN_C_END

452: void f1_laplacian(PetscScalar u, const PetscScalar gradU[], PetscScalar f1[]) {
453:   const PetscInt dim = SPATIAL_DIM_0;
454:   PetscInt       d;

456:   for(d = 0; d < dim; ++d) {
457:     f1[d] = gradU[d];
458:   }
459:   return;
460: }

464: PetscErrorCode IntegrateLaplacianBatchCPU(PetscInt Ne, PetscInt Nb, const PetscScalar coefficients[], const PetscReal jacobianInverses[], const PetscReal jacobianDeterminants[], PetscInt Nq, const PetscReal quadPoints[], const PetscReal quadWeights[], const PetscReal basisTabulation[], const PetscReal basisDerTabulation[], PetscScalar elemVec[], AppCtx *user) {
465:   const PetscInt debug = user->debug;
466:   const PetscInt dim   = SPATIAL_DIM_0;
467:   PetscInt       e;

471:   PetscLogEventBegin(user->integrateBatchCPUEvent,0,0,0,0);
472:   for(e = 0; e < Ne; ++e) {
473:     const PetscReal  detJ = jacobianDeterminants[e];
474:     const PetscReal *invJ = &jacobianInverses[e*dim*dim];
475:     PetscScalar      f1[SPATIAL_DIM_0/*1*/]; // Nq = 1
476:     PetscInt         q, b, d, f;

478:     if (debug > 1) {
479:       PetscPrintf(PETSC_COMM_SELF, "  detJ: %g\n", detJ);
480:       PetscPrintf(PETSC_COMM_SELF, "  invJ: %g %g %g %g\n", invJ[0], invJ[1], invJ[2], invJ[3]);
481:     }
482:     for(q = 0; q < Nq; ++q) {
483:       if (debug) {PetscPrintf(PETSC_COMM_SELF, "  quad point %d\n", q);}
484:       PetscScalar u = 0.0;
485:       PetscScalar gradU[SPATIAL_DIM_0];

487:       for(d = 0; d < dim; ++d) {gradU[d] = 0.0;}
488:       for(b = 0; b < Nb; ++b) {
489:         PetscScalar realSpaceDer[SPATIAL_DIM_0];

491:         u += coefficients[e*Nb+b]*basisTabulation[q*Nb+b];
492:         for(d = 0; d < dim; ++d) {
493:           realSpaceDer[d] = 0.0;
494:           for(f = 0; f < dim; ++f) {
495:             realSpaceDer[d] += invJ[f*dim+d]*basisDerTabulation[(q*Nb+b)*dim+f];
496:           }
497:           gradU[d] += coefficients[e*Nb+b]*realSpaceDer[d];
498:         }
499:       }
500:       f1_laplacian(u, gradU, &f1[q*dim]);
501:       f1[q*dim+0] *= detJ*quadWeights[q];
502:       f1[q*dim+1] *= detJ*quadWeights[q];
503:       if (debug > 1) {
504:         PetscPrintf(PETSC_COMM_SELF, "    f1[%d]: %g\n", 0, f1[q*dim+0]);
505:         PetscPrintf(PETSC_COMM_SELF, "    f1[%d]: %g\n", 1, f1[q*dim+1]);
506:       }
507:     }
508:     for(b = 0; b < Nb; ++b) {
509:       elemVec[e*Nb+b] = 0.0;
510:       for(q = 0; q < Nq; ++q) {
511:         PetscScalar realSpaceDer[SPATIAL_DIM_0];

513:         for(d = 0; d < dim; ++d) {
514:           realSpaceDer[d] = 0.0;
515:           for(f = 0; f < dim; ++f) {
516:             realSpaceDer[d] += invJ[f*dim+d]*basisDerTabulation[(q*Nb+b)*dim+f];
517:           }
518:           elemVec[e*Nb+b] += realSpaceDer[d]*f1[q*dim+d];
519:         }
520:       }
521:     }
522:   }
523:   PetscLogFlops((((2+(2+2*dim)*dim)*Nb+2*dim)*Nq + (2+2*dim)*dim*Nq*Nb)*Ne);
524:   PetscLogEventEnd(user->integrateBatchCPUEvent,0,0,0,0);
525:   return(0);
526: };

528: void f1_elasticity(PetscScalar u[], const PetscScalar gradU[], PetscScalar f1[]) {
529:   const PetscInt dim   = SPATIAL_DIM_0;
530:   const PetscInt Ncomp = dim;
531:   PetscInt       comp, d;

533:   for(comp = 0; comp < Ncomp; ++comp) {
534:     for(d = 0; d < dim; ++d) {
535:       f1[comp*dim+d] = 0.5*(gradU[comp*dim+d] + gradU[d*dim+comp]);
536:     }
537:   }
538:   return;
539: }

543: PetscErrorCode IntegrateElasticityBatchCPU(PetscInt Ne, PetscInt Nb, PetscInt Ncomp, const PetscScalar coefficients[], const PetscReal jacobianInverses[], const PetscReal jacobianDeterminants[], PetscInt Nq, const PetscReal quadPoints[], const PetscReal quadWeights[], const PetscReal basisTabulation[], const PetscReal basisDerTabulation[], PetscScalar elemVec[], AppCtx *user) {
544:   const PetscInt debug = user->debug;
545:   const PetscInt dim   = SPATIAL_DIM_0;
546:   PetscInt       e;

550:   PetscLogEventBegin(user->integrateBatchCPUEvent,0,0,0,0);
551:   for(e = 0; e < Ne; ++e) {
552:     const PetscReal  detJ = jacobianDeterminants[e];
553:     const PetscReal *invJ = &jacobianInverses[e*dim*dim];
554:     PetscScalar      f1[SPATIAL_DIM_0*SPATIAL_DIM_0/*1*/]; // Nq = 1
555:     PetscInt         q, b, comp, i;

557:     if (debug > 1) {
558:       PetscPrintf(PETSC_COMM_SELF, "  detJ: %g\n", detJ);
559:       PetscPrintf(PETSC_COMM_SELF, "  invJ:");
560:       for(i = 0; i < dim*dim; ++i) {
561:         PetscPrintf(PETSC_COMM_SELF, " %g", invJ[i]);
562:       }
563:       PetscPrintf(PETSC_COMM_SELF, "\n");
564:     }
565:     for(q = 0; q < Nq; ++q) {
566:       if (debug) {PetscPrintf(PETSC_COMM_SELF, "  quad point %d\n", q);}
567:       PetscScalar u[SPATIAL_DIM_0];
568:       PetscScalar gradU[SPATIAL_DIM_0*SPATIAL_DIM_0];

570:       for(i = 0; i < dim; ++i) {u[i] = 0.0;}
571:       for(i = 0; i < dim*dim; ++i) {gradU[i] = 0.0;}
572:       for(b = 0; b < Nb; ++b) {
573:         for(comp = 0; comp < Ncomp; ++comp) {
574:           const PetscInt cidx = b*Ncomp+comp;
575:           PetscScalar    realSpaceDer[SPATIAL_DIM_0];
576:           PetscInt       d, f;

578:           u[comp] += coefficients[e*Nb*Ncomp+cidx]*basisTabulation[q*Nb*Ncomp+cidx];
579:           for(d = 0; d < dim; ++d) {
580:             realSpaceDer[d] = 0.0;
581:             for(f = 0; f < dim; ++f) {
582:               realSpaceDer[d] += invJ[f*dim+d]*basisDerTabulation[(q*Nb*Ncomp+cidx)*dim+f];
583:             }
584:             gradU[comp*dim+d] += coefficients[e*Nb*Ncomp+cidx]*realSpaceDer[d];
585:           }
586:         }
587:       }
588:       f1_elasticity(u, gradU, &f1[q*Ncomp*dim]);
589:       for(i = 0; i < Ncomp*dim; ++i) {
590:         f1[q*Ncomp*dim+i] *= detJ*quadWeights[q];
591:       }
592:       if (debug > 1) {
593:         PetscInt c, d;
594:         for(c = 0; c < Ncomp; ++c) {
595:           for(d = 0; d < dim; ++d) {
596:             PetscPrintf(PETSC_COMM_SELF, "    gradU[%d]_%c: %g f1[%d]_%c: %g\n", c, 'x'+d, gradU[c*dim+d], c, 'x'+d, f1[(q*Ncomp + c)*dim+d]);
597:           }
598:         }
599:       }
600:     }
601:     for(b = 0; b < Nb; ++b) {
602:       for(comp = 0; comp < Ncomp; ++comp) {
603:         const PetscInt cidx = b*Ncomp+comp;
604:         PetscInt       q, d, f;

606:         elemVec[e*Nb*Ncomp+cidx] = 0.0;
607:         for(q = 0; q < Nq; ++q) {
608:           PetscScalar realSpaceDer[SPATIAL_DIM_0];

610:           for(d = 0; d < dim; ++d) {
611:             realSpaceDer[d] = 0.0;
612:             for(f = 0; f < dim; ++f) {
613:               realSpaceDer[d] += invJ[f*dim+d]*basisDerTabulation[(q*Nb*Ncomp+cidx)*dim+f];
614:             }
615:             elemVec[e*Nb*Ncomp+cidx] += realSpaceDer[d]*f1[(q*Ncomp+comp)*dim+d];
616:           }
617:         }
618:       }
619:     }
620:   }
621:   PetscLogFlops((((2+(2+2*dim)*dim)*Ncomp*Nb+(2+2)*dim*Ncomp)*Nq + (2+2*dim)*dim*Nq*Ncomp*Nb)*Ne);
622:   PetscLogEventEnd(user->integrateBatchCPUEvent,0,0,0,0);
623:   return(0);
624: };

628: /*
629:  dm - The mesh
630:  X  - Local intput vector
631:  F  - Local output vector
632:  */
633: PetscErrorCode FormFunctionLocalBatch(DM dm, Vec X, Vec F, AppCtx *user)
634: {
635:   //PetscScalar    (*rhsFunc)(const PetscReal []) = user->rhsFunc;
636:   const PetscInt   debug         = user->debug;
637:   const PetscInt   dim           = user->dim;
638:   const PetscInt   numQuadPoints = user->q.numQuadPoints;
639:   const PetscReal *quadPoints    = user->q.quadPoints;
640:   const PetscReal *quadWeights   = user->q.quadWeights;
641:   const PetscInt   numBasisFuncs = user->q.numBasisFuncs;
642:   const PetscInt   numBasisComps = user->q.numComponents;
643:   const PetscReal *basis         = user->q.basis;
644:   const PetscReal *basisDer      = user->q.basisDer;
645:   PetscReal       *coords, *v0, *J, *invJ, *detJ;
646:   PetscScalar     *elemVec;
647:   PetscInt         cStart, cEnd, c;
648:   PetscErrorCode   ierr;

651:   PetscLogEventBegin(user->residualBatchEvent,0,0,0,0);
652:   VecSet(F, 0.0);
653:   //PetscMalloc3(dim,PetscScalar,&realSpaceDer,dim,PetscScalar,&fieldGrad,numBasisFuncs,PetscScalar,&elemVec);
654:   PetscMalloc3(dim,PetscReal,&coords,dim,PetscReal,&v0,dim*dim,PetscReal,&J);
655:   DMComplexGetHeightStratum(dm, 0, &cStart, &cEnd);
656:   const PetscInt numCells = cEnd - cStart;
657:   PetscScalar   *u;

659:   PetscMalloc4(numCells*numBasisFuncs*numBasisComps,PetscScalar,&u,numCells*dim*dim,PetscReal,&invJ,numCells,PetscReal,&detJ,numCells*numBasisFuncs*numBasisComps,PetscScalar,&elemVec);
660:   for(c = cStart; c < cEnd; ++c) {
661:     const PetscScalar *x;
662:     PetscInt           f;

664:     DMComplexComputeCellGeometry(dm, c, v0, J, &invJ[c*dim*dim], &detJ[c]);
665:     DMComplexVecGetClosure(dm, PETSC_NULL, X, c, &x);

667:     for(f = 0; f < numBasisFuncs*numBasisComps; ++f) {
668:       u[c*numBasisFuncs*numBasisComps+f] = x[f];
669:     }
670:   }
671:   // Conforming batches
672:   PetscInt blockSize  = numBasisFuncs*numQuadPoints;
673:   PetscInt numBlocks  = user->numBlocks;
674:   PetscInt batchSize  = numBlocks * blockSize;
675:   PetscInt numBatches = user->numBatches;
676:   PetscInt numChunks  = numCells / (numBatches*batchSize);
677:   if (user->gpu) {
678:     PetscLogEventBegin(user->integrateBatchGPUEvent,0,0,0,0);
679:     IntegrateElementBatchGPU(numChunks*numBatches*batchSize, numBatches, batchSize, numBlocks, u, invJ, detJ, elemVec, user->integrateGPUOnlyEvent, user->debug);
680:     PetscLogFlops((((2+(2+2*dim)*dim)*numBasisComps*numBasisFuncs+(2+2)*dim*numBasisComps)*numQuadPoints + (2+2*dim)*dim*numQuadPoints*numBasisComps*numBasisFuncs)*numChunks*numBatches*batchSize);
681:     PetscLogEventEnd(user->integrateBatchGPUEvent,0,0,0,0);
682:   } else {
683:     switch(user->op) {
684:     case LAPLACIAN:
685:       IntegrateLaplacianBatchCPU(numChunks*numBatches*batchSize, numBasisFuncs, u, invJ, detJ, numQuadPoints, quadPoints, quadWeights, basis, basisDer, elemVec, user);break;
686:     case ELASTICITY:
687:       IntegrateElasticityBatchCPU(numChunks*numBatches*batchSize, numBasisFuncs, numBasisComps, u, invJ, detJ, numQuadPoints, quadPoints, quadWeights, basis, basisDer, elemVec, user);break;
688:     default:
689:       SETERRQ1(PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Invalid PDE operator %d", user->op);
690:     }
691:   }
692:   // Remainder
693:   PetscInt numRemainder = numCells % (numBatches * batchSize);
694:   PetscInt offset       = numCells - numRemainder;
695:   switch(user->op) {
696:   case LAPLACIAN:
697:     IntegrateLaplacianBatchCPU(numRemainder, numBasisFuncs, &u[offset*numBasisFuncs*numBasisComps], &invJ[offset*dim*dim], &detJ[offset],
698:                                       numQuadPoints, quadPoints, quadWeights, basis, basisDer, &elemVec[offset*numBasisFuncs*numBasisComps], user);break;
699:   case ELASTICITY:
700:     IntegrateElasticityBatchCPU(numRemainder, numBasisFuncs, numBasisComps, &u[offset*numBasisFuncs*numBasisComps], &invJ[offset*dim*dim], &detJ[offset],
701:                                        numQuadPoints, quadPoints, quadWeights, basis, basisDer, &elemVec[offset*numBasisFuncs*numBasisComps], user);break;
702:   default:
703:     SETERRQ1(PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Invalid PDE operator %d", user->op);
704:   }
705:   for(c = cStart; c < cEnd; ++c) {
706:     if (debug) {DMPrintCellVector(c, "Residual", numBasisFuncs*numBasisComps, &elemVec[c*numBasisFuncs*numBasisComps]);}
707:     DMComplexVecSetClosure(dm, PETSC_NULL, F, c, &elemVec[c*numBasisFuncs*numBasisComps], ADD_VALUES);
708:   }
709:   PetscFree4(u,invJ,detJ,elemVec);
710:   PetscFree3(coords,v0,J);
711:   if (user->showResidual) {
712:     PetscInt p;
713:     PetscPrintf(PETSC_COMM_WORLD, "Residual:\n");
714:     for(p = 0; p < user->numProcs; ++p) {
715:       if (p == user->rank) {VecView(F, PETSC_VIEWER_STDOUT_SELF);}
716:       PetscBarrier((PetscObject) dm);
717:     }
718:   }
719:   PetscLogEventEnd(user->residualBatchEvent,0,0,0,0);
720:   return(0);
721: }

725: /*
726:   dm  - The mesh
727:   X   - The local input vector
728:   Jac - The output matrix
729: */
730: PetscErrorCode FormJacobianLocalLaplacian(DM dm, Vec X, Mat Jac, Mat JacPre, AppCtx *user)
731: {
732:   const PetscInt   debug         = user->debug;
733:   const PetscInt   dim           = user->dim;
734:   const PetscInt   numQuadPoints = user->q.numQuadPoints;
735:   const PetscReal *quadWeights   = user->q.quadWeights;
736:   const PetscInt   numBasisFuncs = user->q.numBasisFuncs;
737:   const PetscReal *basis         = user->q.basis;
738:   const PetscReal *basisDer      = user->q.basisDer;
739:   PetscReal       *v0, *J, *invJ, detJ;
740:   PetscScalar     *realSpaceTestDer, *realSpaceBasisDer, *elemMat;
741:   PetscInt         cStart, cEnd, c;
742:   PetscErrorCode   ierr;

745:   PetscLogEventBegin(user->jacobianEvent,0,0,0,0);
746:   MatZeroEntries(Jac);
747:   PetscMalloc3(dim,PetscScalar,&realSpaceTestDer,dim,PetscScalar,&realSpaceBasisDer,numBasisFuncs*numBasisFuncs,PetscScalar,&elemMat);
748:   PetscMalloc3(dim,PetscReal,&v0,dim*dim,PetscReal,&J,dim*dim,PetscReal,&invJ);
749:   DMComplexGetHeightStratum(dm, 0, &cStart, &cEnd);
750:   for(c = cStart; c < cEnd; ++c) {
751:     const PetscScalar *x;
752:     PetscInt           q;

754:     PetscMemzero(elemMat, numBasisFuncs*numBasisFuncs * sizeof(PetscScalar));
755:     DMComplexComputeCellGeometry(dm, c, v0, J, invJ, &detJ);
756:     if (detJ <= 0.0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid determinant %g for element %d", detJ, c);
757:     DMComplexVecGetClosure(dm, PETSC_NULL, X, c, &x);

759:     for(q = 0; q < numQuadPoints; ++q) {
760:       PetscScalar fieldVal = 0.0;
761:       PetscInt    f, g, d, e;

763:       for(f = 0; f < numBasisFuncs; ++f) {
764:         fieldVal += x[f]*basis[q*numBasisFuncs+f];
765:       }
766:       for(f = 0; f < numBasisFuncs; ++f) {
767:         for(d = 0; d < dim; ++d) {
768:           realSpaceTestDer[d] = 0.0;
769:           for(e = 0; e < dim; ++e) {
770:             realSpaceTestDer[d] += invJ[e*dim+d]*basisDer[(q*numBasisFuncs+f)*dim+e];
771:           }
772:         }
773:         for(g = 0; g < numBasisFuncs; ++g) {
774:           for(d = 0; d < dim; ++d) {
775:             realSpaceBasisDer[d] = 0.0;
776:             for(e = 0; e < dim; ++e) {
777:               realSpaceBasisDer[d] += invJ[e*dim+d]*basisDer[(q*numBasisFuncs+g)*dim+e];
778:             }
779:           }
780:           /* Linear term: -\Delta u */
781:           PetscScalar product = 0.0;
782:           for(d = 0; d < dim; ++d) product += realSpaceTestDer[d]*realSpaceBasisDer[d];
783:           elemMat[f*numBasisFuncs+g] += product*quadWeights[q]*detJ;
784:           /* Nonlinear term: -\lambda e^{u} */
785:           elemMat[f*numBasisFuncs+g] -= basis[q*numBasisFuncs+f]*basis[q*numBasisFuncs+g]*0.0*PetscExpScalar(fieldVal)*quadWeights[q]*detJ;
786:         }
787:       }
788:     }
789:     if (debug) {DMPrintCellMatrix(c, "Jacobian", numBasisFuncs, numBasisFuncs, elemMat);}
790:     DMComplexMatSetClosure(dm, PETSC_NULL, PETSC_NULL, Jac, c, elemMat, ADD_VALUES);
791:   }
792:   PetscLogFlops((cEnd-cStart)*numQuadPoints*numBasisFuncs*(dim*(dim*5+4)+14));
793:   PetscFree3(realSpaceTestDer,realSpaceBasisDer,elemMat);
794:   PetscFree3(v0,J,invJ);

796:   /* Assemble matrix, using the 2-step process:
797:        MatAssemblyBegin(), MatAssemblyEnd(). */
798:   MatAssemblyBegin(Jac, MAT_FINAL_ASSEMBLY);
799:   MatAssemblyEnd(Jac, MAT_FINAL_ASSEMBLY);
800:   /* Tell the matrix we will never add a new nonzero location to the
801:      matrix. If we do, it will generate an error. */
802:   MatSetOption(Jac, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
803:   PetscLogEventEnd(user->jacobianEvent,0,0,0,0);
804:   return(0);
805: }

809: /*
810:   dm  - The mesh
811:   X   - The local input vector
812:   Jac - The output matrix
813: */
814: PetscErrorCode FormJacobianLocalElasticity(DM dm, Vec X, Mat Jac, Mat JacPre, AppCtx *user)
815: {
816:   const PetscInt   debug         = user->debug;
817:   const PetscInt   dim           = user->dim;
818:   const PetscInt   numQuadPoints = user->q.numQuadPoints;
819:   const PetscReal *quadWeights   = user->q.quadWeights;
820:   const PetscInt   numBasisFuncs = user->q.numBasisFuncs;
821:   const PetscReal *basis         = user->q.basis;
822:   const PetscReal *basisDer      = user->q.basisDer;
823:   PetscReal       *v0, *J, *invJ, detJ;
824:   PetscScalar     *realSpaceTestDer, *realSpaceBasisDer, *elemMat;
825:   PetscInt         cStart, cEnd, c;
826:   PetscErrorCode   ierr;

829:   PetscLogEventBegin(user->jacobianEvent,0,0,0,0);
830:   MatZeroEntries(Jac);
831:   PetscMalloc3(dim,PetscScalar,&realSpaceTestDer,dim,PetscScalar,&realSpaceBasisDer,numBasisFuncs*numBasisFuncs,PetscScalar,&elemMat);
832:   PetscMalloc3(dim,PetscReal,&v0,dim*dim,PetscReal,&J,dim*dim,PetscReal,&invJ);
833:   DMComplexGetHeightStratum(dm, 0, &cStart, &cEnd);
834:   for(c = cStart; c < cEnd; ++c) {
835:     const PetscScalar *x;
836:     PetscInt           q;

838:     PetscMemzero(elemMat, numBasisFuncs*numBasisFuncs * sizeof(PetscScalar));
839:     DMComplexComputeCellGeometry(dm, c, v0, J, invJ, &detJ);
840:     if (detJ <= 0.0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid determinant %g for element %d", detJ, c);
841:     DMComplexVecGetClosure(dm, PETSC_NULL, X, c, &x);

843:     for(q = 0; q < numQuadPoints; ++q) {
844:       PetscScalar fieldVal = 0.0;
845:       PetscInt    f, g, d, e;

847:       for(f = 0; f < numBasisFuncs; ++f) {
848:         fieldVal += x[f]*basis[q*numBasisFuncs+f];
849:       }
850:       for(f = 0; f < numBasisFuncs; ++f) {
851:         for(d = 0; d < dim; ++d) {
852:           realSpaceTestDer[d] = 0.0;
853:           for(e = 0; e < dim; ++e) {
854:             realSpaceTestDer[d] += invJ[e*dim+d]*basisDer[(q*numBasisFuncs+f)*dim+e];
855:           }
856:         }
857:         for(g = 0; g < numBasisFuncs; ++g) {
858:           for(d = 0; d < dim; ++d) {
859:             realSpaceBasisDer[d] = 0.0;
860:             for(e = 0; e < dim; ++e) {
861:               realSpaceBasisDer[d] += invJ[e*dim+d]*basisDer[(q*numBasisFuncs+g)*dim+e];
862:             }
863:           }
864:           /* Linear term: -\Delta u */
865:           PetscScalar product = 0.0;
866:           for(d = 0; d < dim; ++d) product += realSpaceTestDer[d]*realSpaceBasisDer[d];
867:           elemMat[f*numBasisFuncs+g] += product*quadWeights[q]*detJ;
868:           /* Nonlinear term: -\lambda e^{u} */
869:           elemMat[f*numBasisFuncs+g] -= basis[q*numBasisFuncs+f]*basis[q*numBasisFuncs+g]*0.0*PetscExpScalar(fieldVal)*quadWeights[q]*detJ;
870:         }
871:       }
872:     }
873:     if (debug) {DMPrintCellMatrix(c, "Jacobian", numBasisFuncs, numBasisFuncs, elemMat);}
874:     DMComplexMatSetClosure(dm, PETSC_NULL, PETSC_NULL, Jac, c, elemMat, ADD_VALUES);
875:   }
876:   PetscLogFlops((cEnd-cStart)*numQuadPoints*numBasisFuncs*(dim*(dim*5+4)+14));
877:   PetscFree3(realSpaceTestDer,realSpaceBasisDer,elemMat);
878:   PetscFree3(v0,J,invJ);

880:   /* Assemble matrix, using the 2-step process:
881:        MatAssemblyBegin(), MatAssemblyEnd(). */
882:   MatAssemblyBegin(Jac, MAT_FINAL_ASSEMBLY);
883:   MatAssemblyEnd(Jac, MAT_FINAL_ASSEMBLY);
884:   /* Tell the matrix we will never add a new nonzero location to the
885:      matrix. If we do, it will generate an error. */
886:   MatSetOption(Jac, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
887:   PetscLogEventEnd(user->jacobianEvent,0,0,0,0);
888:   return(0);
889: }

893: /*
894:   dm  - The mesh
895:   X   - The local input vector
896:   Jac - The output matrix
897: */
898: PetscErrorCode FormJacobianLocalBatch(DM dm, Vec X, Mat Jac, Mat JacPre, AppCtx *user)
899: {
900:   const PetscInt   debug         = user->debug;
901:   const PetscInt   dim           = user->dim;
902:   const PetscInt   numQuadPoints = user->q.numQuadPoints;
903:   const PetscReal *quadWeights   = user->q.quadWeights;
904:   const PetscInt   numBasisFuncs = user->q.numBasisFuncs;
905:   const PetscReal *basis         = user->q.basis;
906:   const PetscReal *basisDer      = user->q.basisDer;
907:   PetscReal       *v0, *J, *invJ, detJ;
908:   PetscScalar     *realSpaceTestDer, *realSpaceBasisDer, *elemMat;
909:   PetscInt         cStart, cEnd, c;
910:   PetscErrorCode   ierr;

913:   PetscLogEventBegin(user->jacobianBatchEvent,0,0,0,0);
914:   MatZeroEntries(Jac);
915:   PetscMalloc3(dim,PetscScalar,&realSpaceTestDer,dim,PetscScalar,&realSpaceBasisDer,numBasisFuncs*numBasisFuncs,PetscScalar,&elemMat);
916:   PetscMalloc3(dim,PetscReal,&v0,dim*dim,PetscReal,&J,dim*dim,PetscReal,&invJ);
917:   DMComplexGetHeightStratum(dm, 0, &cStart, &cEnd);
918:   for(c = cStart; c < cEnd; ++c) {
919:     const PetscScalar *x;
920:     PetscInt           q;

922:     PetscMemzero(elemMat, numBasisFuncs*numBasisFuncs * sizeof(PetscScalar));
923:     DMComplexComputeCellGeometry(dm, c, v0, J, invJ, &detJ);
924:     if (detJ <= 0.0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid determinant %g for element %d", detJ, c);
925:     DMComplexVecGetClosure(dm, PETSC_NULL, X, c, &x);

927:     for(q = 0; q < numQuadPoints; ++q) {
928:       PetscScalar fieldVal = 0.0;
929:       PetscInt    f, g, d, e;

931:       for(f = 0; f < numBasisFuncs; ++f) {
932:         fieldVal += x[f]*basis[q*numBasisFuncs+f];
933:       }
934:       for(f = 0; f < numBasisFuncs; ++f) {
935:         for(d = 0; d < dim; ++d) {
936:           realSpaceTestDer[d] = 0.0;
937:           for(e = 0; e < dim; ++e) {
938:             realSpaceTestDer[d] += invJ[e*dim+d]*basisDer[(q*numBasisFuncs+f)*dim+e];
939:           }
940:         }
941:         for(g = 0; g < numBasisFuncs; ++g) {
942:           for(d = 0; d < dim; ++d) {
943:             realSpaceBasisDer[d] = 0.0;
944:             for(e = 0; e < dim; ++e) {
945:               realSpaceBasisDer[d] += invJ[e*dim+d]*basisDer[(q*numBasisFuncs+g)*dim+e];
946:             }
947:           }
948:           /* Linear term: -\Delta u */
949:           PetscScalar product = 0.0;
950:           for(d = 0; d < dim; ++d) product += realSpaceTestDer[d]*realSpaceBasisDer[d];
951:           elemMat[f*numBasisFuncs+g] += product*quadWeights[q]*detJ;
952:           /* Nonlinear term: -\lambda e^{u} */
953:           elemMat[f*numBasisFuncs+g] -= basis[q*numBasisFuncs+f]*basis[q*numBasisFuncs+g]*0.0*PetscExpScalar(fieldVal)*quadWeights[q]*detJ;
954:         }
955:       }
956:     }
957:     if (debug) {DMPrintCellMatrix(c, "Jacobian", numBasisFuncs, numBasisFuncs, elemMat);}
958:     DMComplexMatSetClosure(dm, PETSC_NULL, PETSC_NULL, Jac, c, elemMat, ADD_VALUES);
959:   }
960:   PetscLogFlops((cEnd-cStart)*numQuadPoints*numBasisFuncs*(dim*(dim*5+4)+14));
961:   PetscFree3(realSpaceTestDer,realSpaceBasisDer,elemMat);
962:   PetscFree3(v0,J,invJ);

964:   /* Assemble matrix, using the 2-step process:
965:        MatAssemblyBegin(), MatAssemblyEnd(). */
966:   MatAssemblyBegin(Jac, MAT_FINAL_ASSEMBLY);
967:   MatAssemblyEnd(Jac, MAT_FINAL_ASSEMBLY);
968:   /* Tell the matrix we will never add a new nonzero location to the
969:      matrix. If we do, it will generate an error. */
970:   MatSetOption(Jac, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
971:   PetscLogEventEnd(user->jacobianBatchEvent,0,0,0,0);
972:   return(0);
973: }

977: int main(int argc, char **argv)
978: {
979:   DM             dm;
980:   SNES           snes;
981:   AppCtx         user;

984:   PetscInitialize(&argc, &argv, PETSC_NULL, help);
985: #ifndef PETSC_HAVE_CUDA
986:   SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_SUP, "This example requires CUDA support.");
987: #endif
988:   ProcessOptions(PETSC_COMM_WORLD, &user);
989:   CreateMesh(PETSC_COMM_WORLD, &user, &dm);
990:   SetupQuadrature(&user);
991:   SetupSection(dm, &user);

993:   SNESCreate(PETSC_COMM_WORLD, &snes);
994:   if (user.computeFunction) {
995:     Vec            X, F;

997:     DMGetGlobalVector(dm, &X);
998:     DMGetGlobalVector(dm, &F);
999:     if (user.batch) {
1000:       switch(user.op) {
1001:       case LAPLACIAN:
1002:         FormInitialGuess(X, quadratic_2d, INSERT_VALUES, &user);break;
1003:       case ELASTICITY:
1004:         FormInitialGuess(X, quadratic_2d_elas, INSERT_VALUES, &user);break;
1005:       default:
1006:         SETERRQ1(PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Invalid PDE operator %d", user.op);
1007:       }
1008:       DMComplexSetLocalFunction(dm, (PetscErrorCode (*)(DM, Vec, Vec, void*)) FormFunctionLocalBatch);
1009:     } else {
1010:       switch(user.op) {
1011:       case LAPLACIAN:
1012:         FormInitialGuess(X, quadratic_2d, INSERT_VALUES, &user);
1013:         DMComplexSetLocalFunction(dm, (PetscErrorCode (*)(DM, Vec, Vec, void*)) FormFunctionLocalLaplacian);break;
1014:       case ELASTICITY:
1015:         FormInitialGuess(X, quadratic_2d_elas, INSERT_VALUES, &user);
1016:         DMComplexSetLocalFunction(dm, (PetscErrorCode (*)(DM, Vec, Vec, void*)) FormFunctionLocalElasticity);break;
1017:       default:
1018:         SETERRQ1(PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Invalid PDE operator %d", user.op);
1019:       }
1020:     }
1021:     SNESDMComplexComputeFunction(snes, X, F, &user);
1022:     DMRestoreGlobalVector(dm, &X);
1023:     DMRestoreGlobalVector(dm, &F);
1024:   }
1025:   if (user.computeJacobian) {
1026:     Vec            X;
1027:     Mat            J;
1028:     MatStructure   flag;

1030:     DMGetGlobalVector(dm, &X);
1031:     DMCreateMatrix(dm, MATAIJ, &J);
1032:     if (user.batch) {
1033:       DMComplexSetLocalJacobian(dm, (PetscErrorCode (*)(DM, Vec, Mat, Mat, void*)) FormJacobianLocalBatch);
1034:     } else {
1035:       switch(user.op) {
1036:       case LAPLACIAN:
1037:         DMComplexSetLocalJacobian(dm, (PetscErrorCode (*)(DM, Vec, Mat, Mat, void*)) FormJacobianLocalLaplacian);break;
1038:       case ELASTICITY:
1039:         DMComplexSetLocalJacobian(dm, (PetscErrorCode (*)(DM, Vec, Mat, Mat, void*)) FormJacobianLocalElasticity);break;
1040:       default:
1041:         SETERRQ1(PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Invalid PDE operator %d", user.op);
1042:       }
1043:     }
1044:     SNESDMComplexComputeJacobian(snes, X, &J, &J, &flag, &user);
1045:     MatDestroy(&J);
1046:     DMRestoreGlobalVector(dm, &X);
1047:   }
1048:   SNESDestroy(&snes);
1049:   DMDestroy(&dm);
1050:   PetscFinalize();
1051:   return 0;
1052: }