Actual source code: ex56.c
  1: /* Portions of this code are under:
  2:    Copyright (C) 2022 Advanced Micro Devices, Inc. All rights reserved.
  3: */
  4: static char help[] = "3D, tensor hexahedra (Q1-K), displacement finite element formulation\n\
  5: of linear elasticity.  E=1.0, nu=1/3.\n\
  6: Unit cube domain with Dirichlet boundary\n\n";

  8: #include <petscdmplex.h>
  9: #include <petscsnes.h>
 10: #include <petscds.h>
 11: #include <petscdmforest.h>

 13: static PetscReal s_soft_alpha = 1.e-3;
 14: static PetscReal s_mu         = 0.4;
 15: static PetscReal s_lambda     = 0.4;

 17: static void f0_bd_u_3d(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], const PetscReal n[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f0[])
 18: {
 19:   f0[0] = 1;     /* x direction pull */
 20:   f0[1] = -x[2]; /* add a twist around x-axis */
 21:   f0[2] = x[1];
 22: }

 24: static void f1_bd_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], const PetscReal n[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f1[])
 25: {
 26:   const PetscInt Ncomp = dim;
 27:   PetscInt       comp, d;
 28:   for (comp = 0; comp < Ncomp; ++comp) {
 29:     for (d = 0; d < dim; ++d) f1[comp * dim + d] = 0.0;
 30:   }
 31: }

 33: /* gradU[comp*dim+d] = {u_x, u_y} or {u_x, u_y, u_z} */
 34: static void f1_u_3d_alpha(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f1[])
 35: {
 36:   PetscReal trace, mu = s_mu, lambda = s_lambda, rad;
 37:   PetscInt  i, j;
 38:   for (i = 0, rad = 0.; i < dim; i++) {
 39:     PetscReal t = x[i];
 40:     rad += t * t;
 41:   }
 42:   rad = PetscSqrtReal(rad);
 43:   if (rad > 0.25) {
 44:     mu *= s_soft_alpha;
 45:     lambda *= s_soft_alpha; /* we could keep the bulk the same like rubberish */
 46:   }
 47:   for (i = 0, trace = 0; i < dim; ++i) trace += PetscRealPart(u_x[i * dim + i]);
 48:   for (i = 0; i < dim; ++i) {
 49:     for (j = 0; j < dim; ++j) f1[i * dim + j] = mu * (u_x[i * dim + j] + u_x[j * dim + i]);
 50:     f1[i * dim + i] += lambda * trace;
 51:   }
 52: }

 54: /* gradU[comp*dim+d] = {u_x, u_y} or {u_x, u_y, u_z} */
 55: static void f1_u_3d(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f1[])
 56: {
 57:   PetscReal trace, mu = s_mu, lambda = s_lambda;
 58:   PetscInt  i, j;
 59:   for (i = 0, trace = 0; i < dim; ++i) trace += PetscRealPart(u_x[i * dim + i]);
 60:   for (i = 0; i < dim; ++i) {
 61:     for (j = 0; j < dim; ++j) f1[i * dim + j] = mu * (u_x[i * dim + j] + u_x[j * dim + i]);
 62:     f1[i * dim + i] += lambda * trace;
 63:   }
 64: }

 66: /* 3D elasticity */
 67: #define IDX(ii, jj, kk, ll) (27 * ii + 9 * jj + 3 * kk + ll)

 69: void g3_uu_3d_private(PetscScalar g3[], const PetscReal mu, const PetscReal lambda)
 70: {
 71:   if (1) {
 72:     g3[0] += lambda;
 73:     g3[0] += mu;
 74:     g3[0] += mu;
 75:     g3[4] += lambda;
 76:     g3[8] += lambda;
 77:     g3[10] += mu;
 78:     g3[12] += mu;
 79:     g3[20] += mu;
 80:     g3[24] += mu;
 81:     g3[28] += mu;
 82:     g3[30] += mu;
 83:     g3[36] += lambda;
 84:     g3[40] += lambda;
 85:     g3[40] += mu;
 86:     g3[40] += mu;
 87:     g3[44] += lambda;
 88:     g3[50] += mu;
 89:     g3[52] += mu;
 90:     g3[56] += mu;
 91:     g3[60] += mu;
 92:     g3[68] += mu;
 93:     g3[70] += mu;
 94:     g3[72] += lambda;
 95:     g3[76] += lambda;
 96:     g3[80] += lambda;
 97:     g3[80] += mu;
 98:     g3[80] += mu;
 99:   } else {
100:     int        i, j, k, l;
101:     static int cc = -1;
102:     cc++;
103:     for (i = 0; i < 3; ++i) {
104:       for (j = 0; j < 3; ++j) {
105:         for (k = 0; k < 3; ++k) {
106:           for (l = 0; l < 3; ++l) {
107:             if (k == l && i == j) g3[IDX(i, j, k, l)] += lambda;
108:             if (i == k && j == l) g3[IDX(i, j, k, l)] += mu;
109:             if (i == l && j == k) g3[IDX(i, j, k, l)] += mu;
110:             if (k == l && i == j && !cc) (void)PetscPrintf(PETSC_COMM_WORLD, "g3[%d] += lambda;\n", IDX(i, j, k, l));
111:             if (i == k && j == l && !cc) (void)PetscPrintf(PETSC_COMM_WORLD, "g3[%d] += mu;\n", IDX(i, j, k, l));
112:             if (i == l && j == k && !cc) (void)PetscPrintf(PETSC_COMM_WORLD, "g3[%d] += mu;\n", IDX(i, j, k, l));
113:           }
114:         }
115:       }
116:     }
117:   }
118: }

120: static void g3_uu_3d_alpha(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, PetscReal u_tShift, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar g3[])
121: {
122:   PetscReal mu = s_mu, lambda = s_lambda, rad;
123:   PetscInt  i;
124:   for (i = 0, rad = 0.; i < dim; i++) {
125:     PetscReal t = x[i];
126:     rad += t * t;
127:   }
128:   rad = PetscSqrtReal(rad);
129:   if (rad > 0.25) {
130:     mu *= s_soft_alpha;
131:     lambda *= s_soft_alpha; /* we could keep the bulk the same like rubberish */
132:   }
133:   g3_uu_3d_private(g3, mu, lambda);
134: }

136: static void g3_uu_3d(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, PetscReal u_tShift, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar g3[])
137: {
138:   g3_uu_3d_private(g3, s_mu, s_lambda);
139: }

141: static void f0_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f0[])
142: {
143:   const PetscInt Ncomp = dim;
144:   PetscInt       comp;

146:   for (comp = 0; comp < Ncomp; ++comp) f0[comp] = 0.0;
147: }

149: /* PI_i (x_i^4 - x_i^2) */
150: static void f0_u_x4(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f0[])
151: {
152:   const PetscInt Ncomp = dim;
153:   PetscInt       comp, i;

155:   for (comp = 0; comp < Ncomp; ++comp) {
156:     f0[comp] = 1e5;
157:     for (i = 0; i < Ncomp; ++i) { f0[comp] *= /* (comp+1)* */ (x[i] * x[i] * x[i] * x[i] - x[i] * x[i]); /* assumes (0,1]^D domain */ }
158:   }
159: }

161: PetscErrorCode zero(PetscInt dim, PetscReal time, const PetscReal x[], PetscInt Nf, PetscScalar *u, void *ctx)
162: {
163:   const PetscInt Ncomp = dim;
164:   PetscInt       comp;

166:   for (comp = 0; comp < Ncomp; ++comp) u[comp] = 0;
167:   return 0;
168: }

170: int main(int argc, char **args)
171: {
172:   Mat         Amat;
173:   SNES        snes;
174:   KSP         ksp;
175:   MPI_Comm    comm;
176:   PetscMPIInt rank;
177: #if defined(PETSC_USE_LOG)
178:   PetscLogStage stage[17];
179: #endif
180:   PetscBool         test_nonzero_cols = PETSC_FALSE, use_nearnullspace = PETSC_TRUE, attach_nearnullspace = PETSC_FALSE;
181:   Vec               xx, bb;
182:   PetscInt          iter, i, N, dim = 3, cells[3] = {1, 1, 1}, max_conv_its, local_sizes[7], run_type = 1;
183:   DM                dm, distdm, basedm;
184:   PetscBool         flg;
185:   char              convType[256];
186:   PetscReal         Lx, mdisp[10], err[10];
187:   const char *const options[10] = {"-ex56_dm_refine 0", "-ex56_dm_refine 1", "-ex56_dm_refine 2", "-ex56_dm_refine 3", "-ex56_dm_refine 4", "-ex56_dm_refine 5", "-ex56_dm_refine 6", "-ex56_dm_refine 7", "-ex56_dm_refine 8", "-ex56_dm_refine 9"};
190:   PetscInitialize(&argc, &args, (char *)0, help);
191:   comm = PETSC_COMM_WORLD;
192:   MPI_Comm_rank(comm, &rank);
193:   /* options */
194:   PetscOptionsBegin(comm, NULL, "3D bilinear Q1 elasticity options", "");
195:   {
196:     i = 3;
197:     PetscOptionsIntArray("-cells", "Number of (flux tube) processor in each dimension", "ex56.c", cells, &i, NULL);

199:     Lx           = 1.; /* or ne for rod */
200:     max_conv_its = 3;
201:     PetscOptionsInt("-max_conv_its", "Number of iterations in convergence study", "", max_conv_its, &max_conv_its, NULL);
203:     PetscOptionsReal("-lx", "Length of domain", "", Lx, &Lx, NULL);
204:     PetscOptionsReal("-alpha", "material coefficient inside circle", "", s_soft_alpha, &s_soft_alpha, NULL);
205:     PetscOptionsBool("-test_nonzero_cols", "nonzero test", "", test_nonzero_cols, &test_nonzero_cols, NULL);
206:     PetscOptionsBool("-use_mat_nearnullspace", "MatNearNullSpace API test", "", use_nearnullspace, &use_nearnullspace, NULL);
207:     PetscOptionsBool("-attach_mat_nearnullspace", "MatNearNullSpace API test (via MatSetNearNullSpace)", "", attach_nearnullspace, &attach_nearnullspace, NULL);
208:     PetscOptionsInt("-run_type", "0: twisting load on cantalever, 1: 3rd order accurate convergence test", "", run_type, &run_type, NULL);
209:   }
210:   PetscOptionsEnd();
211:   PetscLogStageRegister("Mesh Setup", &stage[16]);
212:   for (iter = 0; iter < max_conv_its; iter++) {
213:     char str[] = "Solve 0";
214:     str[6] += iter;
215:     PetscLogStageRegister(str, &stage[iter]);
216:   }
217:   /* create DM, Plex calls DMSetup */
218:   PetscLogStagePush(stage[16]);
219:   DMPlexCreateBoxMesh(comm, dim, PETSC_FALSE, cells, NULL, NULL, NULL, PETSC_TRUE, &dm);
220:   {
221:     DMLabel label;
222:     IS      is;
223:     DMCreateLabel(dm, "boundary");
224:     DMGetLabel(dm, "boundary", &label);
225:     DMPlexMarkBoundaryFaces(dm, 1, label);
226:     if (run_type == 0) {
227:       DMGetStratumIS(dm, "boundary", 1, &is);
228:       DMCreateLabel(dm, "Faces");
229:       if (is) {
230:         PetscInt        d, f, Nf;
231:         const PetscInt *faces;
232:         PetscInt        csize;
233:         PetscSection    cs;
234:         Vec             coordinates;
235:         DM              cdm;
236:         ISGetLocalSize(is, &Nf);
237:         ISGetIndices(is, &faces);
238:         DMGetCoordinatesLocal(dm, &coordinates);
239:         DMGetCoordinateDM(dm, &cdm);
240:         DMGetLocalSection(cdm, &cs);
241:         /* Check for each boundary face if any component of its centroid is either 0.0 or 1.0 */
242:         for (f = 0; f < Nf; ++f) {
243:           PetscReal    faceCoord;
244:           PetscInt     b, v;
245:           PetscScalar *coords = NULL;
246:           PetscInt     Nv;
247:           DMPlexVecGetClosure(cdm, cs, coordinates, faces[f], &csize, &coords);
248:           Nv = csize / dim; /* Calculate mean coordinate vector */
249:           for (d = 0; d < dim; ++d) {
250:             faceCoord = 0.0;
251:             for (v = 0; v < Nv; ++v) faceCoord += PetscRealPart(coords[v * dim + d]);
252:             faceCoord /= Nv;
253:             for (b = 0; b < 2; ++b) {
254:               if (PetscAbs(faceCoord - b) < PETSC_SMALL) { /* domain have not been set yet, still [0,1]^3 */
255:                 DMSetLabelValue(dm, "Faces", faces[f], d * 2 + b + 1);
256:               }
257:             }
258:           }
259:           DMPlexVecRestoreClosure(cdm, cs, coordinates, faces[f], &csize, &coords);
260:         }
261:         ISRestoreIndices(is, &faces);
262:       }
263:       ISDestroy(&is);
264:       DMGetLabel(dm, "Faces", &label);
265:       DMPlexLabelComplete(dm, label);
266:     }
267:   }
268:   {
269:     PetscInt     dimEmbed, i;
270:     PetscInt     nCoords;
271:     PetscScalar *coords, bounds[] = {
272:                            0, 1, -.5, .5, -.5, .5,
273:                          }; /* x_min,x_max,y_min,y_max */
274:     Vec coordinates;
275:     bounds[1] = Lx;
276:     if (run_type == 1) {
277:       for (i = 0; i < 2 * dim; i++) bounds[i] = (i % 2) ? 1 : 0;
278:     }
279:     DMGetCoordinatesLocal(dm, &coordinates);
280:     DMGetCoordinateDim(dm, &dimEmbed);
282:     VecGetLocalSize(coordinates, &nCoords);
284:     VecGetArray(coordinates, &coords);
285:     for (i = 0; i < nCoords; i += dimEmbed) {
286:       PetscInt     j;
287:       PetscScalar *coord = &coords[i];
288:       for (j = 0; j < dimEmbed; j++) coord[j] = bounds[2 * j] + coord[j] * (bounds[2 * j + 1] - bounds[2 * j]);
289:     }
290:     VecRestoreArray(coordinates, &coords);
291:     DMSetCoordinatesLocal(dm, coordinates);
292:   }

294:   /* convert to p4est, and distribute */
295:   PetscOptionsBegin(comm, "", "Mesh conversion options", "DMPLEX");
296:   PetscOptionsFList("-dm_type", "Convert DMPlex to another format (should not be Plex!)", "ex56.c", DMList, DMPLEX, convType, 256, &flg);
297:   PetscOptionsEnd();
298:   if (flg) {
299:     DM newdm;
300:     DMConvert(dm, convType, &newdm);
301:     if (newdm) {
302:       const char *prefix;
303:       PetscBool   isForest;
304:       PetscObjectGetOptionsPrefix((PetscObject)dm, &prefix);
305:       PetscObjectSetOptionsPrefix((PetscObject)newdm, prefix);
306:       DMIsForest(newdm, &isForest);
308:       DMDestroy(&dm);
309:       dm = newdm;
310:     } else SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_USER, "Convert failed?");
311:   } else {
312:     PetscPartitioner part;
313:     /* Plex Distribute mesh over processes */
314:     DMPlexGetPartitioner(dm, &part);
315:     PetscPartitionerSetFromOptions(part);
316:     DMPlexDistribute(dm, 0, NULL, &distdm);
317:     if (distdm) {
318:       const char *prefix;
319:       PetscObjectGetOptionsPrefix((PetscObject)dm, &prefix);
320:       PetscObjectSetOptionsPrefix((PetscObject)distdm, prefix);
321:       DMDestroy(&dm);
322:       dm = distdm;
323:     }
324:   }
325:   PetscLogStagePop();
326:   basedm = dm;
327:   dm     = NULL;

329:   for (iter = 0; iter < max_conv_its; iter++) {
330:     PetscLogStagePush(stage[16]);
331:     /* make new DM */
332:     DMClone(basedm, &dm);
333:     PetscObjectSetOptionsPrefix((PetscObject)dm, "ex56_");
334:     PetscObjectSetName((PetscObject)dm, "Mesh");
335:     if (max_conv_its > 1) {
336:       /* If max_conv_its == 1, then we are not doing a convergence study. */
337:       PetscOptionsInsertString(NULL, options[iter]);
338:     }
339:     DMSetFromOptions(dm); /* refinement done here in Plex, p4est */
340:     /* snes */
341:     SNESCreate(comm, &snes);
342:     SNESSetDM(snes, dm);
343:     /* fem */
344:     {
345:       const PetscInt Ncomp        = dim;
346:       const PetscInt components[] = {0, 1, 2};
347:       const PetscInt Nfid = 1, Npid = 1;
348:       const PetscInt fid[] = {1}; /* The fixed faces (x=0) */
349:       const PetscInt pid[] = {2}; /* The faces with loading (x=L_x) */
350:       PetscFE        fe;
351:       PetscDS        prob;
352:       DMLabel        label;
353:       DM             cdm = dm;

355:       PetscFECreateDefault(PetscObjectComm((PetscObject)dm), dim, dim, PETSC_FALSE, NULL, PETSC_DECIDE, &fe); /* elasticity */
356:       PetscObjectSetName((PetscObject)fe, "deformation");
357:       /* FEM prob */
358:       DMSetField(dm, 0, NULL, (PetscObject)fe);
359:       DMCreateDS(dm);
360:       DMGetDS(dm, &prob);
361:       /* setup problem */
362:       if (run_type == 1) {
363:         PetscDSSetJacobian(prob, 0, 0, NULL, NULL, NULL, g3_uu_3d);
364:         PetscDSSetResidual(prob, 0, f0_u_x4, f1_u_3d);
365:       } else {
366:         PetscWeakForm wf;
367:         PetscInt      bd, i;

369:         PetscDSSetJacobian(prob, 0, 0, NULL, NULL, NULL, g3_uu_3d_alpha);
370:         PetscDSSetResidual(prob, 0, f0_u, f1_u_3d_alpha);

372:         DMGetLabel(dm, "Faces", &label);
373:         DMAddBoundary(dm, DM_BC_NATURAL, "traction", label, Npid, pid, 0, Ncomp, components, NULL, NULL, NULL, &bd);
374:         PetscDSGetBoundary(prob, bd, &wf, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
375:         for (i = 0; i < Npid; ++i) PetscWeakFormSetIndexBdResidual(wf, label, pid[i], 0, 0, 0, f0_bd_u_3d, 0, f1_bd_u);
376:       }
377:       /* bcs */
378:       if (run_type == 1) {
379:         PetscInt id = 1;
380:         DMGetLabel(dm, "boundary", &label);
381:         DMAddBoundary(dm, DM_BC_ESSENTIAL, "wall", label, 1, &id, 0, 0, NULL, (void (*)(void))zero, NULL, NULL, NULL);
382:       } else {
383:         DMGetLabel(dm, "Faces", &label);
384:         DMAddBoundary(dm, DM_BC_ESSENTIAL, "fixed", label, Nfid, fid, 0, Ncomp, components, (void (*)(void))zero, NULL, NULL, NULL);
385:       }
386:       while (cdm) {
387:         DMCopyDisc(dm, cdm);
388:         DMGetCoarseDM(cdm, &cdm);
389:       }
390:       PetscFEDestroy(&fe);
391:     }
392:     /* vecs & mat */
393:     DMCreateGlobalVector(dm, &xx);
394:     VecDuplicate(xx, &bb);
395:     PetscObjectSetName((PetscObject)bb, "b");
396:     PetscObjectSetName((PetscObject)xx, "u");
397:     DMCreateMatrix(dm, &Amat);
398:     MatSetOption(Amat, MAT_SYMMETRIC, PETSC_TRUE);        /* Some matrix kernels can take advantage of symmetry if we set this. */
399:     MatSetOption(Amat, MAT_SYMMETRY_ETERNAL, PETSC_TRUE); /* Inform PETSc that Amat is always symmetric, so info set above isn't lost. */
400:     MatSetBlockSize(Amat, 3);
401:     MatSetOption(Amat, MAT_SPD, PETSC_TRUE);
402:     MatSetOption(Amat, MAT_SPD_ETERNAL, PETSC_TRUE);
403:     VecGetSize(bb, &N);
404:     local_sizes[iter] = N;
405:     PetscInfo(snes, "%" PetscInt_FMT " global equations, %" PetscInt_FMT " vertices\n", N, N / dim);
406:     if ((use_nearnullspace || attach_nearnullspace) && N / dim > 1) {
407:       /* Set up the near null space (a.k.a. rigid body modes) that will be used by the multigrid preconditioner */
408:       DM           subdm;
409:       MatNullSpace nearNullSpace;
410:       PetscInt     fields = 0;
411:       PetscObject  deformation;
412:       DMCreateSubDM(dm, 1, &fields, NULL, &subdm);
413:       DMPlexCreateRigidBody(subdm, 0, &nearNullSpace);
414:       DMGetField(dm, 0, NULL, &deformation);
415:       PetscObjectCompose(deformation, "nearnullspace", (PetscObject)nearNullSpace);
416:       DMDestroy(&subdm);
417:       if (attach_nearnullspace) MatSetNearNullSpace(Amat, nearNullSpace);
418:       MatNullSpaceDestroy(&nearNullSpace); /* created by DM and destroyed by Mat */
419:     }
420:     DMPlexSetSNESLocalFEM(dm, NULL, NULL, NULL);
421:     SNESSetJacobian(snes, Amat, Amat, NULL, NULL);
422:     SNESSetFromOptions(snes);
423:     DMSetUp(dm);
424:     PetscLogStagePop();
425:     PetscLogStagePush(stage[16]);
426:     /* ksp */
427:     SNESGetKSP(snes, &ksp);
428:     KSPSetComputeSingularValues(ksp, PETSC_TRUE);
429:     /* test BCs */
430:     VecZeroEntries(xx);
431:     if (test_nonzero_cols) {
432:       if (rank == 0) VecSetValue(xx, 0, 1.0, INSERT_VALUES);
433:       VecAssemblyBegin(xx);
434:       VecAssemblyEnd(xx);
435:     }
436:     VecZeroEntries(bb);
437:     VecGetSize(bb, &i);
438:     local_sizes[iter] = i;
439:     PetscInfo(snes, "%" PetscInt_FMT " equations in vector, %" PetscInt_FMT " vertices\n", i, i / dim);
440:     PetscLogStagePop();
441:     /* solve */
442:     PetscLogStagePush(stage[iter]);
443:     SNESSolve(snes, bb, xx);
444:     PetscLogStagePop();
445:     VecNorm(xx, NORM_INFINITY, &mdisp[iter]);
446:     DMViewFromOptions(dm, NULL, "-dm_view");
447:     {
448:       PetscViewer       viewer = NULL;
449:       PetscViewerFormat fmt;
450:       PetscOptionsGetViewer(comm, NULL, "ex56_", "-vec_view", &viewer, &fmt, &flg);
451:       if (flg) {
452:         PetscViewerPushFormat(viewer, fmt);
453:         VecView(xx, viewer);
454:         VecView(bb, viewer);
455:         PetscViewerPopFormat(viewer);
456:       }
457:       PetscViewerDestroy(&viewer);
458:     }
459:     /* Free work space */
460:     DMDestroy(&dm);
461:     SNESDestroy(&snes);
462:     VecDestroy(&xx);
463:     VecDestroy(&bb);
464:     MatDestroy(&Amat);
465:   }
466:   DMDestroy(&basedm);
467:   if (run_type == 1) err[0] = 59.975208 - mdisp[0]; /* error with what I think is the exact solution */
468:   else err[0] = 171.038 - mdisp[0];
469:   for (iter = 1; iter < max_conv_its; iter++) {
470:     if (run_type == 1) err[iter] = 59.975208 - mdisp[iter];
471:     else err[iter] = 171.038 - mdisp[iter];
472:     PetscPrintf(PETSC_COMM_WORLD, "[%d] %" PetscInt_FMT ") N=%12" PetscInt_FMT ", max displ=%9.7e, disp diff=%9.2e, error=%4.3e, rate=%3.2g\n", rank, iter, local_sizes[iter], (double)mdisp[iter], (double)(mdisp[iter] - mdisp[iter - 1]), (double)err[iter], (double)(PetscLogReal(err[iter - 1] / err[iter]) / PetscLogReal(2.)));
473:   }

475:   PetscFinalize();
476:   return 0;
477: }

479: /*TEST

481:   test:
482:     suffix: 0
483:     nsize: 4
484:     requires: !single
485:     args: -cells 2,2,1 -max_conv_its 2 -petscspace_degree 3 -snes_max_it 1 -ksp_max_it 100 -ksp_type cg -ksp_rtol 1.e-10 -ksp_norm_type unpreconditioned -pc_type gamg -pc_gamg_coarse_eq_limit 10 -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_coarsening 0 -pc_gamg_threshold 0.001 -ksp_converged_reason -snes_converged_reason -use_mat_nearnullspace true -mg_levels_ksp_max_it 2 -mg_levels_ksp_type chebyshev -mg_levels_ksp_chebyshev_esteig 0,0.2,0,1.1 -mg_levels_pc_type jacobi -petscpartitioner_type simple -ex56_dm_view -snes_lag_jacobian -2 -snes_type ksponly -use_gpu_aware_mpi true
486:     timeoutfactor: 2

488:   # HYPRE PtAP broken with complex numbers
489:   test:
490:     suffix: hypre
491:     requires: hypre !single !complex !defined(PETSC_HAVE_HYPRE_DEVICE)
492:     nsize: 4
493:     args: -cells 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type cg -ksp_monitor_short -ksp_rtol 1.e-8 -pc_type hypre -pc_hypre_type boomeramg -pc_hypre_boomeramg_no_CF true -pc_hypre_boomeramg_agg_nl 1 -pc_hypre_boomeramg_coarsen_type HMIS -pc_hypre_boomeramg_interp_type ext+i -ksp_converged_reason -use_mat_nearnullspace true -petscpartitioner_type simple

495:   test:
496:     suffix: ml
497:     requires: ml !single
498:     nsize: 4
499:     args: -cells 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type cg -ksp_monitor_short -ksp_converged_reason -ksp_rtol 1.e-8 -pc_type ml -mg_levels_ksp_type chebyshev -mg_levels_ksp_max_it 3 -mg_levels_ksp_chebyshev_esteig 0,0.05,0,1.05 -mg_levels_pc_type sor -petscpartitioner_type simple -use_mat_nearnullspace

501:   test:
502:     suffix: hpddm
503:     requires: hpddm slepc !single defined(PETSC_HAVE_DYNAMIC_LIBRARIES) defined(PETSC_USE_SHARED_LIBRARIES)
504:     nsize: 4
505:     args: -cells 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type fgmres -ksp_monitor_short -ksp_converged_reason -ksp_rtol 1.e-8 -pc_type hpddm -petscpartitioner_type simple -pc_hpddm_levels_1_sub_pc_type lu -pc_hpddm_levels_1_eps_nev 6 -pc_hpddm_coarse_p 1 -pc_hpddm_coarse_pc_type svd

507:   test:
508:     suffix: repart
509:     nsize: 4
510:     requires: parmetis !single
511:     args: -cells 8,2,2 -max_conv_its 1 -petscspace_degree 2 -snes_max_it 4 -ksp_max_it 100 -ksp_type cg -ksp_rtol 1.e-2 -ksp_norm_type unpreconditioned -snes_rtol 1.e-3 -pc_type gamg -pc_gamg_esteig_ksp_max_it 10 -pc_gamg_type agg -pc_gamg_agg_nsmooths 1 -pc_gamg_aggressive_coarsening 1 -pc_gamg_threshold 0.05 -pc_gamg_threshold_scale .0 -use_mat_nearnullspace true -mg_levels_ksp_max_it 2 -mg_levels_ksp_type chebyshev -mg_levels_ksp_chebyshev_esteig 0,0.05,0,1.05 -mg_levels_pc_type jacobi -pc_gamg_mat_partitioning_type parmetis -pc_gamg_repartition true -snes_converged_reason -pc_gamg_process_eq_limit 20 -pc_gamg_coarse_eq_limit 10 -ksp_converged_reason -snes_converged_reason -pc_gamg_reuse_interpolation true

513:   test:
514:     suffix: bddc
515:     nsize: 4
516:     requires: !single
517:     args: -cells 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type cg -ksp_monitor_short -ksp_rtol 1.e-8 -ksp_converged_reason -petscpartitioner_type simple -ex56_dm_mat_type is -matis_localmat_type {{sbaij baij aij}} -pc_type bddc

519:   testset:
520:     nsize: 4
521:     requires: !single
522:     args: -cells 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type cg -ksp_monitor_short -ksp_rtol 1.e-10 -ksp_converged_reason -petscpartitioner_type simple -ex56_dm_mat_type is -matis_localmat_type aij -pc_type bddc -attach_mat_nearnullspace {{0 1}separate output}
523:     test:
524:       suffix: bddc_approx_gamg
525:       args: -pc_bddc_switch_static -prefix_push pc_bddc_dirichlet_ -approximate -pc_type gamg -pc_gamg_esteig_ksp_max_it 10 -pc_gamg_type agg -pc_gamg_agg_nsmooths 1 -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_coarsening 1 -pc_gamg_threshold 0.05 -pc_gamg_threshold_scale .0 -mg_levels_ksp_max_it 1 -mg_levels_ksp_type chebyshev -prefix_pop -prefix_push pc_bddc_neumann_ -approximate -pc_type gamg -pc_gamg_esteig_ksp_max_it 10 -pc_gamg_type agg -pc_gamg_agg_nsmooths 1 -pc_gamg_coarse_eq_limit 10 -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_coarsening 1 -pc_gamg_threshold 0.05 -pc_gamg_threshold_scale .0 -mg_levels_ksp_max_it 1 -mg_levels_ksp_type chebyshev -prefix_pop
526:     # HYPRE PtAP broken with complex numbers
527:     test:
528:       requires: hypre !complex !defined(PETSC_HAVE_HYPRE_DEVICE)
529:       suffix: bddc_approx_hypre
530:       args: -pc_bddc_switch_static -prefix_push pc_bddc_dirichlet_ -pc_type hypre -pc_hypre_boomeramg_no_CF true -pc_hypre_boomeramg_strong_threshold 0.75 -pc_hypre_boomeramg_agg_nl 1 -pc_hypre_boomeramg_coarsen_type HMIS -pc_hypre_boomeramg_interp_type ext+i -prefix_pop -prefix_push pc_bddc_neumann_ -pc_type hypre -pc_hypre_boomeramg_no_CF true -pc_hypre_boomeramg_strong_threshold 0.75 -pc_hypre_boomeramg_agg_nl 1 -pc_hypre_boomeramg_coarsen_type HMIS -pc_hypre_boomeramg_interp_type ext+i -prefix_pop
531:     test:
532:       requires: ml
533:       suffix: bddc_approx_ml
534:       args: -pc_bddc_switch_static -prefix_push pc_bddc_dirichlet_ -approximate -pc_type ml -mg_levels_ksp_max_it 1 -mg_levels_ksp_type chebyshev -prefix_pop -prefix_push pc_bddc_neumann_ -approximate -pc_type ml -mg_levels_ksp_max_it 1 -mg_levels_ksp_type chebyshev -prefix_pop

536:   test:
537:     suffix: fetidp
538:     nsize: 4
539:     requires: !single
540:     args: -cells 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type fetidp -fetidp_ksp_type cg -ksp_monitor_short -ksp_rtol 1.e-8 -ksp_converged_reason -petscpartitioner_type simple -ex56_dm_mat_type is -matis_localmat_type {{sbaij baij aij}}

542:   test:
543:     suffix: bddc_elast
544:     nsize: 4
545:     requires: !single
546:     args: -cells 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type cg -ksp_monitor_short -ksp_rtol 1.e-8 -ksp_converged_reason -petscpartitioner_type simple -ex56_dm_mat_type is -matis_localmat_type sbaij -pc_type bddc -pc_bddc_monolithic -attach_mat_nearnullspace

548:   test:
549:     suffix: fetidp_elast
550:     nsize: 4
551:     requires: !single
552:     args: -cells 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type fetidp -fetidp_ksp_type cg -ksp_monitor_short -ksp_rtol 1.e-8 -ksp_converged_reason -petscpartitioner_type simple -ex56_dm_mat_type is -matis_localmat_type sbaij -fetidp_bddc_pc_bddc_monolithic -attach_mat_nearnullspace

554:   test:
555:     suffix: gdsw
556:     nsize: 4
557:     requires: !single
558:     args: -cells 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type cg -ksp_monitor_short -ksp_rtol 1.e-8 -ksp_converged_reason -petscpartitioner_type simple -ex56_dm_mat_type is -attach_mat_nearnullspace \
559:           -pc_type mg -pc_mg_galerkin -pc_mg_adapt_interp_coarse_space gdsw -pc_mg_levels 2 -mg_levels_pc_type bjacobi -mg_levels_sub_pc_type icc

561:   testset:
562:     nsize: 4
563:     requires: !single
564:     args: -cells 2,2,1 -max_conv_its 2 -petscspace_degree 2 -snes_max_it 2 -ksp_max_it 100 -ksp_type cg -ksp_rtol 1.e-10 -ksp_norm_type unpreconditioned -snes_rtol 1.e-10 -pc_type gamg -pc_gamg_esteig_ksp_max_it 10 -pc_gamg_type agg -pc_gamg_agg_nsmooths 1 -pc_gamg_coarse_eq_limit 10 -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_coarsening 1 -pc_gamg_threshold 0.05 -pc_gamg_threshold_scale .0 -use_mat_nearnullspace true -mg_levels_ksp_max_it 2 -mg_levels_ksp_type chebyshev -mg_levels_ksp_chebyshev_esteig 0,0.05,0,1.05 -mg_levels_pc_type jacobi -ksp_monitor_short -ksp_converged_reason -snes_converged_reason -snes_monitor_short -ex56_dm_view -petscpartitioner_type simple -pc_gamg_process_eq_limit 20
565:     output_file: output/ex56_cuda.out

567:     test:
568:       suffix: cuda
569:       requires: cuda
570:       args: -ex56_dm_mat_type aijcusparse -ex56_dm_vec_type cuda

572:     test:
573:       suffix: hip
574:       requires: hip
575:       args: -ex56_dm_mat_type aijhipsparse -ex56_dm_vec_type hip

577:     test:
578:       suffix: viennacl
579:       requires: viennacl
580:       args: -ex56_dm_mat_type aijviennacl -ex56_dm_vec_type viennacl

582:     test:
583:       suffix: kokkos
584:       requires: kokkos_kernels
585:       args: -ex56_dm_mat_type aijkokkos -ex56_dm_vec_type kokkos
586:   # Don't run AIJMKL caes with complex scalars because of convergence issues.
587:   # Note that we need to test both single and multiple MPI rank cases, because these use different sparse MKL routines to implement the PtAP operation.
588:   test:
589:     suffix: seqaijmkl
590:     nsize: 1
591:     requires: defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) !single !complex
592:     args: -cells 2,2,1 -max_conv_its 2 -petscspace_degree 2 -snes_max_it 2 -ksp_max_it 100 -ksp_type cg -ksp_rtol 1.e-11 -ksp_norm_type unpreconditioned -snes_rtol 1.e-10 -pc_type gamg -pc_gamg_type agg -pc_gamg_agg_nsmooths 1 -pc_gamg_coarse_eq_limit 1000 -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_coarsening 1 -pc_gamg_threshold 0.05 -pc_gamg_threshold_scale .0 -ksp_converged_reason -snes_monitor_short -ksp_monitor_short -snes_converged_reason -use_mat_nearnullspace true -mg_levels_ksp_max_it 1 -mg_levels_ksp_type chebyshev -pc_gamg_esteig_ksp_type cg -pc_gamg_esteig_ksp_max_it 10 -mg_levels_ksp_chebyshev_esteig 0,0.05,0,1.1 -mg_levels_pc_type jacobi -petscpartitioner_type simple -mat_block_size 3 -ex56_dm_view -run_type 1 -mat_seqaij_type seqaijmkl
593:     timeoutfactor: 2

595:   test:
596:     suffix: mpiaijmkl
597:     nsize: 2
598:     requires: defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) !single !complex
599:     args: -cells 2,2,1 -max_conv_its 2 -petscspace_degree 2 -snes_max_it 2 -ksp_max_it 100 -ksp_type cg -ksp_rtol 1.e-11 -ksp_norm_type unpreconditioned -snes_rtol 1.e-10 -pc_type gamg -pc_gamg_type agg -pc_gamg_agg_nsmooths 1 -pc_gamg_coarse_eq_limit 1000 -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_coarsening 1 -pc_gamg_threshold 0.05 -pc_gamg_threshold_scale .0 -ksp_converged_reason -snes_monitor_short -ksp_monitor_short -snes_converged_reason -use_mat_nearnullspace true -mg_levels_ksp_max_it 1 -mg_levels_ksp_type chebyshev -pc_gamg_esteig_ksp_type cg -pc_gamg_esteig_ksp_max_it 10 -mg_levels_ksp_chebyshev_esteig 0,0.05,0,1.1 -mg_levels_pc_type jacobi -petscpartitioner_type simple -mat_block_size 3 -ex56_dm_view -run_type 1 -mat_seqaij_type seqaijmkl
600:     timeoutfactor: 2

602: TEST*/