Actual source code: ex48.c

  1: static const char help[] = "Toy hydrostatic ice flow with multigrid in 3D.\n\
  2: \n\
  3: Solves the hydrostatic (aka Blatter/Pattyn/First Order) equations for ice sheet flow\n\
  4: using multigrid.  The ice uses a power-law rheology with \"Glen\" exponent 3 (corresponds\n\
  5: to p=4/3 in a p-Laplacian).  The focus is on ISMIP-HOM experiments which assume periodic\n\
  6: boundary conditions in the x- and y-directions.\n\
  7: \n\
  8: Equations are rescaled so that the domain size and solution are O(1), details of this scaling\n\
  9: can be controlled by the options -units_meter, -units_second, and -units_kilogram.\n\
 10: \n\
 11: A VTK StructuredGrid output file can be written using the option -o filename.vts\n\
 12: \n\n";

 14: /*
 15: The equations for horizontal velocity (u,v) are

 17:   - [eta (4 u_x + 2 v_y)]_x - [eta (u_y + v_x)]_y - [eta u_z]_z + rho g s_x = 0
 18:   - [eta (4 v_y + 2 u_x)]_y - [eta (u_y + v_x)]_x - [eta v_z]_z + rho g s_y = 0

 20: where

 22:   eta = B/2 (epsilon + gamma)^((p-2)/2)

 24: is the nonlinear effective viscosity with regularization epsilon and hardness parameter B,
 25: written in terms of the second invariant

 27:   gamma = u_x^2 + v_y^2 + u_x v_y + (1/4) (u_y + v_x)^2 + (1/4) u_z^2 + (1/4) v_z^2

 29: The surface boundary conditions are the natural conditions.  The basal boundary conditions
 30: are either no-slip, or Navier (linear) slip with spatially variant friction coefficient beta^2.

 32: In the code, the equations for (u,v) are multiplied through by 1/(rho g) so that residuals are O(1).

 34: The discretization is Q1 finite elements, managed by a DMDA.  The grid is never distorted in the
 35: map (x,y) plane, but the bed and surface may be bumpy.  This is handled as usual in FEM, through
 36: the Jacobian of the coordinate transformation from a reference element to the physical element.

 38: Since ice-flow is tightly coupled in the z-direction (within columns), the DMDA is managed
 39: specially so that columns are never distributed, and are always contiguous in memory.
 40: This amounts to reversing the meaning of X,Y,Z compared to the DMDA's internal interpretation,
 41: and then indexing as vec[i][j][k].  The exotic coarse spaces require 2D DMDAs which are made to
 42: use compatible domain decomposition relative to the 3D DMDAs.

 44: There are two compile-time options:

 46:   NO_SSE2:
 47:     If the host supports SSE2, we use integration code that has been vectorized with SSE2
 48:     intrinsics, unless this macro is defined.  The intrinsics speed up integration by about
 49:     30% on my architecture (P8700, gcc-4.5 snapshot).

 51:   COMPUTE_LOWER_TRIANGULAR:
 52:     The element matrices we assemble are lower-triangular so it is not necessary to compute
 53:     all entries explicitly.  If this macro is defined, the lower-triangular entries are
 54:     computed explicitly.

 56: */

 58: #if defined(PETSC_APPLE_FRAMEWORK)
 59:   #import <PETSc/petscsnes.h>
 60:   #import <PETSc/petsc/private/dmdaimpl.h> /* There is not yet a public interface to manipulate dm->ops */
 61: #else

 63: #include <petscsnes.h>
 64: #include <petsc/private/dmdaimpl.h>
 65: #endif
 66: #include <ctype.h> /* toupper() */

 68: #if defined(__cplusplus) || defined(PETSC_HAVE_WINDOWS_COMPILERS) || defined(__PGI)
 69:   /*  c++ cannot handle  [_restrict_] notation like C does */
 70:   #undef PETSC_RESTRICT
 71:   #define PETSC_RESTRICT
 72: #endif

 74: #if defined __SSE2__
 75:   #include <emmintrin.h>
 76: #endif

 78: /* The SSE2 kernels are only for PetscScalar=double on architectures that support it */
 79: #if !defined NO_SSE2 && !defined PETSC_USE_COMPLEX && !defined PETSC_USE_REAL_SINGLE && !defined PETSC_USE_REAL___FLOAT128 && !defined PETSC_USE_REAL___FP16 && defined __SSE2__
 80:   #define USE_SSE2_KERNELS 1
 81: #else
 82:   #define USE_SSE2_KERNELS 0
 83: #endif

 85: static PetscClassId THI_CLASSID;

 87: typedef enum {
 88:   QUAD_GAUSS,
 89:   QUAD_LOBATTO
 90: } QuadratureType;
 91: static const char                  *QuadratureTypes[] = {"gauss", "lobatto", "QuadratureType", "QUAD_", 0};
 92: PETSC_UNUSED static const PetscReal HexQWeights[8]    = {1, 1, 1, 1, 1, 1, 1, 1};
 93: PETSC_UNUSED static const PetscReal HexQNodes[]       = {-0.57735026918962573, 0.57735026918962573};
 94: #define G 0.57735026918962573
 95: #define H (0.5 * (1. + G))
 96: #define L (0.5 * (1. - G))
 97: #define M (-0.5)
 98: #define P (0.5)
 99: /* Special quadrature: Lobatto in horizontal, Gauss in vertical */
100: static const PetscReal HexQInterp_Lobatto[8][8] = {
101:   {H, 0, 0, 0, L, 0, 0, 0},
102:   {0, H, 0, 0, 0, L, 0, 0},
103:   {0, 0, H, 0, 0, 0, L, 0},
104:   {0, 0, 0, H, 0, 0, 0, L},
105:   {L, 0, 0, 0, H, 0, 0, 0},
106:   {0, L, 0, 0, 0, H, 0, 0},
107:   {0, 0, L, 0, 0, 0, H, 0},
108:   {0, 0, 0, L, 0, 0, 0, H}
109: };
110: static const PetscReal HexQDeriv_Lobatto[8][8][3] = {
111:   {{M * H, M *H, M}, {P * H, 0, 0},    {0, 0, 0},        {0, P *H, 0},     {M * L, M *L, P}, {P * L, 0, 0},    {0, 0, 0},        {0, P *L, 0}    },
112:   {{M * H, 0, 0},    {P * H, M *H, M}, {0, P *H, 0},     {0, 0, 0},        {M * L, 0, 0},    {P * L, M *L, P}, {0, P *L, 0},     {0, 0, 0}       },
113:   {{0, 0, 0},        {0, M *H, 0},     {P * H, P *H, M}, {M * H, 0, 0},    {0, 0, 0},        {0, M *L, 0},     {P * L, P *L, P}, {M * L, 0, 0}   },
114:   {{0, M *H, 0},     {0, 0, 0},        {P * H, 0, 0},    {M * H, P *H, M}, {0, M *L, 0},     {0, 0, 0},        {P * L, 0, 0},    {M * L, P *L, P}},
115:   {{M * L, M *L, M}, {P * L, 0, 0},    {0, 0, 0},        {0, P *L, 0},     {M * H, M *H, P}, {P * H, 0, 0},    {0, 0, 0},        {0, P *H, 0}    },
116:   {{M * L, 0, 0},    {P * L, M *L, M}, {0, P *L, 0},     {0, 0, 0},        {M * H, 0, 0},    {P * H, M *H, P}, {0, P *H, 0},     {0, 0, 0}       },
117:   {{0, 0, 0},        {0, M *L, 0},     {P * L, P *L, M}, {M * L, 0, 0},    {0, 0, 0},        {0, M *H, 0},     {P * H, P *H, P}, {M * H, 0, 0}   },
118:   {{0, M *L, 0},     {0, 0, 0},        {P * L, 0, 0},    {M * L, P *L, M}, {0, M *H, 0},     {0, 0, 0},        {P * H, 0, 0},    {M * H, P *H, P}}
119: };
120: /* Stanndard Gauss */
121: static const PetscReal HexQInterp_Gauss[8][8] = {
122:   {H * H * H, L *H *H, L *L *H, H *L *H, H *H *L, L *H *L, L *L *L, H *L *L},
123:   {L * H * H, H *H *H, H *L *H, L *L *H, L *H *L, H *H *L, H *L *L, L *L *L},
124:   {L * L * H, H *L *H, H *H *H, L *H *H, L *L *L, H *L *L, H *H *L, L *H *L},
125:   {H * L * H, L *L *H, L *H *H, H *H *H, H *L *L, L *L *L, L *H *L, H *H *L},
126:   {H * H * L, L *H *L, L *L *L, H *L *L, H *H *H, L *H *H, L *L *H, H *L *H},
127:   {L * H * L, H *H *L, H *L *L, L *L *L, L *H *H, H *H *H, H *L *H, L *L *H},
128:   {L * L * L, H *L *L, H *H *L, L *H *L, L *L *H, H *L *H, H *H *H, L *H *H},
129:   {H * L * L, L *L *L, L *H *L, H *H *L, H *L *H, L *L *H, L *H *H, H *H *H}
130: };
131: static const PetscReal HexQDeriv_Gauss[8][8][3] = {
132:   {{M * H * H, H *M *H, H *H *M}, {P * H * H, L *M *H, L *H *M}, {P * L * H, L *P *H, L *L *M}, {M * L * H, H *P *H, H *L *M}, {M * H * L, H *M *L, H *H *P}, {P * H * L, L *M *L, L *H *P}, {P * L * L, L *P *L, L *L *P}, {M * L * L, H *P *L, H *L *P}},
133:   {{M * H * H, L *M *H, L *H *M}, {P * H * H, H *M *H, H *H *M}, {P * L * H, H *P *H, H *L *M}, {M * L * H, L *P *H, L *L *M}, {M * H * L, L *M *L, L *H *P}, {P * H * L, H *M *L, H *H *P}, {P * L * L, H *P *L, H *L *P}, {M * L * L, L *P *L, L *L *P}},
134:   {{M * L * H, L *M *H, L *L *M}, {P * L * H, H *M *H, H *L *M}, {P * H * H, H *P *H, H *H *M}, {M * H * H, L *P *H, L *H *M}, {M * L * L, L *M *L, L *L *P}, {P * L * L, H *M *L, H *L *P}, {P * H * L, H *P *L, H *H *P}, {M * H * L, L *P *L, L *H *P}},
135:   {{M * L * H, H *M *H, H *L *M}, {P * L * H, L *M *H, L *L *M}, {P * H * H, L *P *H, L *H *M}, {M * H * H, H *P *H, H *H *M}, {M * L * L, H *M *L, H *L *P}, {P * L * L, L *M *L, L *L *P}, {P * H * L, L *P *L, L *H *P}, {M * H * L, H *P *L, H *H *P}},
136:   {{M * H * L, H *M *L, H *H *M}, {P * H * L, L *M *L, L *H *M}, {P * L * L, L *P *L, L *L *M}, {M * L * L, H *P *L, H *L *M}, {M * H * H, H *M *H, H *H *P}, {P * H * H, L *M *H, L *H *P}, {P * L * H, L *P *H, L *L *P}, {M * L * H, H *P *H, H *L *P}},
137:   {{M * H * L, L *M *L, L *H *M}, {P * H * L, H *M *L, H *H *M}, {P * L * L, H *P *L, H *L *M}, {M * L * L, L *P *L, L *L *M}, {M * H * H, L *M *H, L *H *P}, {P * H * H, H *M *H, H *H *P}, {P * L * H, H *P *H, H *L *P}, {M * L * H, L *P *H, L *L *P}},
138:   {{M * L * L, L *M *L, L *L *M}, {P * L * L, H *M *L, H *L *M}, {P * H * L, H *P *L, H *H *M}, {M * H * L, L *P *L, L *H *M}, {M * L * H, L *M *H, L *L *P}, {P * L * H, H *M *H, H *L *P}, {P * H * H, H *P *H, H *H *P}, {M * H * H, L *P *H, L *H *P}},
139:   {{M * L * L, H *M *L, H *L *M}, {P * L * L, L *M *L, L *L *M}, {P * H * L, L *P *L, L *H *M}, {M * H * L, H *P *L, H *H *M}, {M * L * H, H *M *H, H *L *P}, {P * L * H, L *M *H, L *L *P}, {P * H * H, L *P *H, L *H *P}, {M * H * H, H *P *H, H *H *P}}
140: };
141: static const PetscReal (*HexQInterp)[8], (*HexQDeriv)[8][3];
142: /* Standard 2x2 Gauss quadrature for the bottom layer. */
143: static const PetscReal QuadQInterp[4][4] = {
144:   {H * H, L *H, L *L, H *L},
145:   {L * H, H *H, H *L, L *L},
146:   {L * L, H *L, H *H, L *H},
147:   {H * L, L *L, L *H, H *H}
148: };
149: static const PetscReal QuadQDeriv[4][4][2] = {
150:   {{M * H, M *H}, {P * H, M *L}, {P * L, P *L}, {M * L, P *H}},
151:   {{M * H, M *L}, {P * H, M *H}, {P * L, P *H}, {M * L, P *L}},
152:   {{M * L, M *L}, {P * L, M *H}, {P * H, P *H}, {M * H, P *L}},
153:   {{M * L, M *H}, {P * L, M *L}, {P * H, P *L}, {M * H, P *H}}
154: };
155: #undef G
156: #undef H
157: #undef L
158: #undef M
159: #undef P

161: #define HexExtract(x, i, j, k, n) \
162:   do { \
163:     (n)[0] = (x)[i][j][k]; \
164:     (n)[1] = (x)[i + 1][j][k]; \
165:     (n)[2] = (x)[i + 1][j + 1][k]; \
166:     (n)[3] = (x)[i][j + 1][k]; \
167:     (n)[4] = (x)[i][j][k + 1]; \
168:     (n)[5] = (x)[i + 1][j][k + 1]; \
169:     (n)[6] = (x)[i + 1][j + 1][k + 1]; \
170:     (n)[7] = (x)[i][j + 1][k + 1]; \
171:   } while (0)

173: #define HexExtractRef(x, i, j, k, n) \
174:   do { \
175:     (n)[0] = &(x)[i][j][k]; \
176:     (n)[1] = &(x)[i + 1][j][k]; \
177:     (n)[2] = &(x)[i + 1][j + 1][k]; \
178:     (n)[3] = &(x)[i][j + 1][k]; \
179:     (n)[4] = &(x)[i][j][k + 1]; \
180:     (n)[5] = &(x)[i + 1][j][k + 1]; \
181:     (n)[6] = &(x)[i + 1][j + 1][k + 1]; \
182:     (n)[7] = &(x)[i][j + 1][k + 1]; \
183:   } while (0)

185: #define QuadExtract(x, i, j, n) \
186:   do { \
187:     (n)[0] = (x)[i][j]; \
188:     (n)[1] = (x)[i + 1][j]; \
189:     (n)[2] = (x)[i + 1][j + 1]; \
190:     (n)[3] = (x)[i][j + 1]; \
191:   } while (0)

193: static void HexGrad(const PetscReal dphi[][3], const PetscReal zn[], PetscReal dz[])
194: {
195:   PetscInt i;
196:   dz[0] = dz[1] = dz[2] = 0;
197:   for (i = 0; i < 8; i++) {
198:     dz[0] += dphi[i][0] * zn[i];
199:     dz[1] += dphi[i][1] * zn[i];
200:     dz[2] += dphi[i][2] * zn[i];
201:   }
202: }

204: static void HexComputeGeometry(PetscInt q, PetscReal hx, PetscReal hy, const PetscReal dz[PETSC_RESTRICT], PetscReal phi[PETSC_RESTRICT], PetscReal dphi[PETSC_RESTRICT][3], PetscReal *PETSC_RESTRICT jw)
205: {
206:   const PetscReal jac[3][3] = {
207:     {hx / 2, 0,      0    },
208:     {0,      hy / 2, 0    },
209:     {dz[0],  dz[1],  dz[2]}
210:   };
211:   const PetscReal ijac[3][3] = {
212:     {1 / jac[0][0],                        0,                                    0            },
213:     {0,                                    1 / jac[1][1],                        0            },
214:     {-jac[2][0] / (jac[0][0] * jac[2][2]), -jac[2][1] / (jac[1][1] * jac[2][2]), 1 / jac[2][2]}
215:   };
216:   const PetscReal jdet = jac[0][0] * jac[1][1] * jac[2][2];
217:   PetscInt        i;

219:   for (i = 0; i < 8; i++) {
220:     const PetscReal *dphir = HexQDeriv[q][i];
221:     phi[i]                 = HexQInterp[q][i];
222:     dphi[i][0]             = dphir[0] * ijac[0][0] + dphir[1] * ijac[1][0] + dphir[2] * ijac[2][0];
223:     dphi[i][1]             = dphir[0] * ijac[0][1] + dphir[1] * ijac[1][1] + dphir[2] * ijac[2][1];
224:     dphi[i][2]             = dphir[0] * ijac[0][2] + dphir[1] * ijac[1][2] + dphir[2] * ijac[2][2];
225:   }
226:   *jw = 1.0 * jdet;
227: }

229: typedef struct _p_THI   *THI;
230: typedef struct _n_Units *Units;

232: typedef struct {
233:   PetscScalar u, v;
234: } Node;

236: typedef struct {
237:   PetscScalar b;     /* bed */
238:   PetscScalar h;     /* thickness */
239:   PetscScalar beta2; /* friction */
240: } PrmNode;

242: typedef struct {
243:   PetscReal min, max, cmin, cmax;
244: } PRange;

246: typedef enum {
247:   THIASSEMBLY_TRIDIAGONAL,
248:   THIASSEMBLY_FULL
249: } THIAssemblyMode;

251: struct _p_THI {
252:   PETSCHEADER(int);
253:   void (*initialize)(THI, PetscReal x, PetscReal y, PrmNode *p);
254:   PetscInt  zlevels;
255:   PetscReal Lx, Ly, Lz; /* Model domain */
256:   PetscReal alpha;      /* Bed angle */
257:   Units     units;
258:   PetscReal dirichlet_scale;
259:   PetscReal ssa_friction_scale;
260:   PRange    eta;
261:   PRange    beta2;
262:   struct {
263:     PetscReal Bd2, eps, exponent;
264:   } viscosity;
265:   struct {
266:     PetscReal irefgam, eps2, exponent, refvel, epsvel;
267:   } friction;
268:   PetscReal rhog;
269:   PetscBool no_slip;
270:   PetscBool tridiagonal;
271:   PetscBool coarse2d;
272:   PetscBool verbose;
273:   MatType   mattype;
274: };

276: struct _n_Units {
277:   /* fundamental */
278:   PetscReal meter;
279:   PetscReal kilogram;
280:   PetscReal second;
281:   /* derived */
282:   PetscReal Pascal;
283:   PetscReal year;
284: };

286: static PetscErrorCode THIJacobianLocal_3D_Full(DMDALocalInfo *, Node ***, Mat, Mat, THI);
287: static PetscErrorCode THIJacobianLocal_3D_Tridiagonal(DMDALocalInfo *, Node ***, Mat, Mat, THI);
288: static PetscErrorCode THIJacobianLocal_2D(DMDALocalInfo *, Node **, Mat, Mat, THI);

290: static void PrmHexGetZ(const PrmNode pn[], PetscInt k, PetscInt zm, PetscReal zn[])
291: {
292:   const PetscScalar zm1 = zm - 1, znl[8] = {pn[0].b + pn[0].h * (PetscScalar)k / zm1,       pn[1].b + pn[1].h * (PetscScalar)k / zm1,       pn[2].b + pn[2].h * (PetscScalar)k / zm1,       pn[3].b + pn[3].h * (PetscScalar)k / zm1,
293:                                             pn[0].b + pn[0].h * (PetscScalar)(k + 1) / zm1, pn[1].b + pn[1].h * (PetscScalar)(k + 1) / zm1, pn[2].b + pn[2].h * (PetscScalar)(k + 1) / zm1, pn[3].b + pn[3].h * (PetscScalar)(k + 1) / zm1};
294:   PetscInt          i;
295:   for (i = 0; i < 8; i++) zn[i] = PetscRealPart(znl[i]);
296: }

298: /* Tests A and C are from the ISMIP-HOM paper (Pattyn et al. 2008) */
299: static void THIInitialize_HOM_A(THI thi, PetscReal x, PetscReal y, PrmNode *p)
300: {
301:   Units     units = thi->units;
302:   PetscReal s     = -x * PetscSinReal(thi->alpha);

304:   p->b     = s - 1000 * units->meter + 500 * units->meter * PetscSinReal(x * 2 * PETSC_PI / thi->Lx) * PetscSinReal(y * 2 * PETSC_PI / thi->Ly);
305:   p->h     = s - p->b;
306:   p->beta2 = 1e30;
307: }

309: static void THIInitialize_HOM_C(THI thi, PetscReal x, PetscReal y, PrmNode *p)
310: {
311:   Units     units = thi->units;
312:   PetscReal s     = -x * PetscSinReal(thi->alpha);

314:   p->b = s - 1000 * units->meter;
315:   p->h = s - p->b;
316:   /* tau_b = beta2 v   is a stress (Pa) */
317:   p->beta2 = 1000 * (1 + PetscSinReal(x * 2 * PETSC_PI / thi->Lx) * PetscSinReal(y * 2 * PETSC_PI / thi->Ly)) * units->Pascal * units->year / units->meter;
318: }

320: /* These are just toys */

322: /* Same bed as test A, free slip everywhere except for a discontinuous jump to a circular sticky region in the middle. */
323: static void THIInitialize_HOM_X(THI thi, PetscReal xx, PetscReal yy, PrmNode *p)
324: {
325:   Units     units = thi->units;
326:   PetscReal x = xx * 2 * PETSC_PI / thi->Lx - PETSC_PI, y = yy * 2 * PETSC_PI / thi->Ly - PETSC_PI; /* [-pi,pi] */
327:   PetscReal r = PetscSqrtReal(x * x + y * y), s = -x * PetscSinReal(thi->alpha);
328:   p->b     = s - 1000 * units->meter + 500 * units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
329:   p->h     = s - p->b;
330:   p->beta2 = 1000 * (r < 1 ? 2 : 0) * units->Pascal * units->year / units->meter;
331: }

333: /* Like Z, but with 200 meter cliffs */
334: static void THIInitialize_HOM_Y(THI thi, PetscReal xx, PetscReal yy, PrmNode *p)
335: {
336:   Units     units = thi->units;
337:   PetscReal x = xx * 2 * PETSC_PI / thi->Lx - PETSC_PI, y = yy * 2 * PETSC_PI / thi->Ly - PETSC_PI; /* [-pi,pi] */
338:   PetscReal r = PetscSqrtReal(x * x + y * y), s = -x * PetscSinReal(thi->alpha);

340:   p->b = s - 1000 * units->meter + 500 * units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
341:   if (PetscRealPart(p->b) > -700 * units->meter) p->b += 200 * units->meter;
342:   p->h     = s - p->b;
343:   p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16 * r)) / PetscSqrtReal(1e-2 + 16 * r) * PetscCosReal(x * 3 / 2) * PetscCosReal(y * 3 / 2)) * units->Pascal * units->year / units->meter;
344: }

346: /* Same bed as A, smoothly varying slipperiness, similar to MATLAB's "sombrero" (uncorrelated with bathymetry) */
347: static void THIInitialize_HOM_Z(THI thi, PetscReal xx, PetscReal yy, PrmNode *p)
348: {
349:   Units     units = thi->units;
350:   PetscReal x = xx * 2 * PETSC_PI / thi->Lx - PETSC_PI, y = yy * 2 * PETSC_PI / thi->Ly - PETSC_PI; /* [-pi,pi] */
351:   PetscReal r = PetscSqrtReal(x * x + y * y), s = -x * PetscSinReal(thi->alpha);

353:   p->b     = s - 1000 * units->meter + 500 * units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
354:   p->h     = s - p->b;
355:   p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16 * r)) / PetscSqrtReal(1e-2 + 16 * r) * PetscCosReal(x * 3 / 2) * PetscCosReal(y * 3 / 2)) * units->Pascal * units->year / units->meter;
356: }

358: static void THIFriction(THI thi, PetscReal rbeta2, PetscReal gam, PetscReal *beta2, PetscReal *dbeta2)
359: {
360:   if (thi->friction.irefgam == 0) {
361:     Units units           = thi->units;
362:     thi->friction.irefgam = 1. / (0.5 * PetscSqr(thi->friction.refvel * units->meter / units->year));
363:     thi->friction.eps2    = 0.5 * PetscSqr(thi->friction.epsvel * units->meter / units->year) * thi->friction.irefgam;
364:   }
365:   if (thi->friction.exponent == 0) {
366:     *beta2  = rbeta2;
367:     *dbeta2 = 0;
368:   } else {
369:     *beta2  = rbeta2 * PetscPowReal(thi->friction.eps2 + gam * thi->friction.irefgam, thi->friction.exponent);
370:     *dbeta2 = thi->friction.exponent * *beta2 / (thi->friction.eps2 + gam * thi->friction.irefgam) * thi->friction.irefgam;
371:   }
372: }

374: static void THIViscosity(THI thi, PetscReal gam, PetscReal *eta, PetscReal *deta)
375: {
376:   PetscReal Bd2, eps, exponent;
377:   if (thi->viscosity.Bd2 == 0) {
378:     Units           units   = thi->units;
379:     const PetscReal n       = 3.,                                                     /* Glen exponent */
380:       p                     = 1. + 1. / n,                                            /* for Stokes */
381:       A                     = 1.e-16 * PetscPowReal(units->Pascal, -n) / units->year, /* softness parameter (Pa^{-n}/s) */
382:       B                     = PetscPowReal(A, -1. / n);                               /* hardness parameter */
383:     thi->viscosity.Bd2      = B / 2;
384:     thi->viscosity.exponent = (p - 2) / 2;
385:     thi->viscosity.eps      = 0.5 * PetscSqr(1e-5 / units->year);
386:   }
387:   Bd2      = thi->viscosity.Bd2;
388:   exponent = thi->viscosity.exponent;
389:   eps      = thi->viscosity.eps;
390:   *eta     = Bd2 * PetscPowReal(eps + gam, exponent);
391:   *deta    = exponent * (*eta) / (eps + gam);
392: }

394: static void RangeUpdate(PetscReal *min, PetscReal *max, PetscReal x)
395: {
396:   if (x < *min) *min = x;
397:   if (x > *max) *max = x;
398: }

400: static void PRangeClear(PRange *p)
401: {
402:   p->cmin = p->min = 1e100;
403:   p->cmax = p->max = -1e100;
404: }

406: static PetscErrorCode PRangeMinMax(PRange *p, PetscReal min, PetscReal max)
407: {
408:   PetscFunctionBeginUser;
409:   p->cmin = min;
410:   p->cmax = max;
411:   if (min < p->min) p->min = min;
412:   if (max > p->max) p->max = max;
413:   PetscFunctionReturn(PETSC_SUCCESS);
414: }

416: static PetscErrorCode THIDestroy(THI *thi)
417: {
418:   PetscFunctionBeginUser;
419:   if (!*thi) PetscFunctionReturn(PETSC_SUCCESS);
420:   if (--((PetscObject)*thi)->refct > 0) {
421:     *thi = 0;
422:     PetscFunctionReturn(PETSC_SUCCESS);
423:   }
424:   PetscCall(PetscFree((*thi)->units));
425:   PetscCall(PetscFree((*thi)->mattype));
426:   PetscCall(PetscHeaderDestroy(thi));
427:   PetscFunctionReturn(PETSC_SUCCESS);
428: }

430: static PetscErrorCode THICreate(MPI_Comm comm, THI *inthi)
431: {
432:   static PetscBool registered = PETSC_FALSE;
433:   THI              thi;
434:   Units            units;

436:   PetscFunctionBeginUser;
437:   *inthi = 0;
438:   if (!registered) {
439:     PetscCall(PetscClassIdRegister("Toy Hydrostatic Ice", &THI_CLASSID));
440:     registered = PETSC_TRUE;
441:   }
442:   PetscCall(PetscHeaderCreate(thi, THI_CLASSID, "THI", "Toy Hydrostatic Ice", "", comm, THIDestroy, 0));

444:   PetscCall(PetscNew(&thi->units));
445:   units           = thi->units;
446:   units->meter    = 1e-2;
447:   units->second   = 1e-7;
448:   units->kilogram = 1e-12;

450:   PetscOptionsBegin(comm, NULL, "Scaled units options", "");
451:   {
452:     PetscCall(PetscOptionsReal("-units_meter", "1 meter in scaled length units", "", units->meter, &units->meter, NULL));
453:     PetscCall(PetscOptionsReal("-units_second", "1 second in scaled time units", "", units->second, &units->second, NULL));
454:     PetscCall(PetscOptionsReal("-units_kilogram", "1 kilogram in scaled mass units", "", units->kilogram, &units->kilogram, NULL));
455:   }
456:   PetscOptionsEnd();
457:   units->Pascal = units->kilogram / (units->meter * PetscSqr(units->second));
458:   units->year   = 31556926. * units->second; /* seconds per year */

460:   thi->Lx              = 10.e3;
461:   thi->Ly              = 10.e3;
462:   thi->Lz              = 1000;
463:   thi->dirichlet_scale = 1;
464:   thi->verbose         = PETSC_FALSE;

466:   PetscOptionsBegin(comm, NULL, "Toy Hydrostatic Ice options", "");
467:   {
468:     QuadratureType quad       = QUAD_GAUSS;
469:     char           homexp[]   = "A";
470:     char           mtype[256] = MATSBAIJ;
471:     PetscReal      L, m = 1.0;
472:     PetscBool      flg;
473:     L = thi->Lx;
474:     PetscCall(PetscOptionsReal("-thi_L", "Domain size (m)", "", L, &L, &flg));
475:     if (flg) thi->Lx = thi->Ly = L;
476:     PetscCall(PetscOptionsReal("-thi_Lx", "X Domain size (m)", "", thi->Lx, &thi->Lx, NULL));
477:     PetscCall(PetscOptionsReal("-thi_Ly", "Y Domain size (m)", "", thi->Ly, &thi->Ly, NULL));
478:     PetscCall(PetscOptionsReal("-thi_Lz", "Z Domain size (m)", "", thi->Lz, &thi->Lz, NULL));
479:     PetscCall(PetscOptionsString("-thi_hom", "ISMIP-HOM experiment (A or C)", "", homexp, homexp, sizeof(homexp), NULL));
480:     switch (homexp[0] = toupper(homexp[0])) {
481:     case 'A':
482:       thi->initialize = THIInitialize_HOM_A;
483:       thi->no_slip    = PETSC_TRUE;
484:       thi->alpha      = 0.5;
485:       break;
486:     case 'C':
487:       thi->initialize = THIInitialize_HOM_C;
488:       thi->no_slip    = PETSC_FALSE;
489:       thi->alpha      = 0.1;
490:       break;
491:     case 'X':
492:       thi->initialize = THIInitialize_HOM_X;
493:       thi->no_slip    = PETSC_FALSE;
494:       thi->alpha      = 0.3;
495:       break;
496:     case 'Y':
497:       thi->initialize = THIInitialize_HOM_Y;
498:       thi->no_slip    = PETSC_FALSE;
499:       thi->alpha      = 0.5;
500:       break;
501:     case 'Z':
502:       thi->initialize = THIInitialize_HOM_Z;
503:       thi->no_slip    = PETSC_FALSE;
504:       thi->alpha      = 0.5;
505:       break;
506:     default:
507:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "HOM experiment '%c' not implemented", homexp[0]);
508:     }
509:     PetscCall(PetscOptionsEnum("-thi_quadrature", "Quadrature to use for 3D elements", "", QuadratureTypes, (PetscEnum)quad, (PetscEnum *)&quad, NULL));
510:     switch (quad) {
511:     case QUAD_GAUSS:
512:       HexQInterp = HexQInterp_Gauss;
513:       HexQDeriv  = HexQDeriv_Gauss;
514:       break;
515:     case QUAD_LOBATTO:
516:       HexQInterp = HexQInterp_Lobatto;
517:       HexQDeriv  = HexQDeriv_Lobatto;
518:       break;
519:     }
520:     PetscCall(PetscOptionsReal("-thi_alpha", "Bed angle (degrees)", "", thi->alpha, &thi->alpha, NULL));

522:     thi->friction.refvel = 100.;
523:     thi->friction.epsvel = 1.;

525:     PetscCall(PetscOptionsReal("-thi_friction_refvel", "Reference velocity for sliding", "", thi->friction.refvel, &thi->friction.refvel, NULL));
526:     PetscCall(PetscOptionsReal("-thi_friction_epsvel", "Regularization velocity for sliding", "", thi->friction.epsvel, &thi->friction.epsvel, NULL));
527:     PetscCall(PetscOptionsReal("-thi_friction_m", "Friction exponent, 0=Coulomb, 1=Navier", "", m, &m, NULL));

529:     thi->friction.exponent = (m - 1) / 2;

531:     PetscCall(PetscOptionsReal("-thi_dirichlet_scale", "Scale Dirichlet boundary conditions by this factor", "", thi->dirichlet_scale, &thi->dirichlet_scale, NULL));
532:     PetscCall(PetscOptionsReal("-thi_ssa_friction_scale", "Scale slip boundary conditions by this factor in SSA (2D) assembly", "", thi->ssa_friction_scale, &thi->ssa_friction_scale, NULL));
533:     PetscCall(PetscOptionsBool("-thi_coarse2d", "Use a 2D coarse space corresponding to SSA", "", thi->coarse2d, &thi->coarse2d, NULL));
534:     PetscCall(PetscOptionsBool("-thi_tridiagonal", "Assemble a tridiagonal system (column coupling only) on the finest level", "", thi->tridiagonal, &thi->tridiagonal, NULL));
535:     PetscCall(PetscOptionsFList("-thi_mat_type", "Matrix type", "MatSetType", MatList, mtype, (char *)mtype, sizeof(mtype), NULL));
536:     PetscCall(PetscStrallocpy(mtype, (char **)&thi->mattype));
537:     PetscCall(PetscOptionsBool("-thi_verbose", "Enable verbose output (like matrix sizes and statistics)", "", thi->verbose, &thi->verbose, NULL));
538:   }
539:   PetscOptionsEnd();

541:   /* dimensionalize */
542:   thi->Lx *= units->meter;
543:   thi->Ly *= units->meter;
544:   thi->Lz *= units->meter;
545:   thi->alpha *= PETSC_PI / 180;

547:   PRangeClear(&thi->eta);
548:   PRangeClear(&thi->beta2);

550:   {
551:     PetscReal u = 1000 * units->meter / (3e7 * units->second), gradu = u / (100 * units->meter), eta, deta, rho = 910 * units->kilogram / PetscPowReal(units->meter, 3), grav = 9.81 * units->meter / PetscSqr(units->second),
552:               driving = rho * grav * PetscSinReal(thi->alpha) * 1000 * units->meter;
553:     THIViscosity(thi, 0.5 * gradu * gradu, &eta, &deta);
554:     thi->rhog = rho * grav;
555:     if (thi->verbose) {
556:       PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Units: meter %8.2g  second %8.2g  kg %8.2g  Pa %8.2g\n", (double)units->meter, (double)units->second, (double)units->kilogram, (double)units->Pascal));
557:       PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Domain (%6.2g,%6.2g,%6.2g), pressure %8.2g, driving stress %8.2g\n", (double)thi->Lx, (double)thi->Ly, (double)thi->Lz, (double)(rho * grav * 1e3 * units->meter), (double)driving));
558:       PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Large velocity 1km/a %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n", (double)u, (double)gradu, (double)eta, (double)(2 * eta * gradu), (double)(2 * eta * gradu / driving)));
559:       THIViscosity(thi, 0.5 * PetscSqr(1e-3 * gradu), &eta, &deta);
560:       PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Small velocity 1m/a  %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n", (double)(1e-3 * u), (double)(1e-3 * gradu), (double)eta, (double)(2 * eta * 1e-3 * gradu), (double)(2 * eta * 1e-3 * gradu / driving)));
561:     }
562:   }

564:   *inthi = thi;
565:   PetscFunctionReturn(PETSC_SUCCESS);
566: }

568: static PetscErrorCode THIInitializePrm(THI thi, DM da2prm, Vec prm)
569: {
570:   PrmNode **p;
571:   PetscInt  i, j, xs, xm, ys, ym, mx, my;

573:   PetscFunctionBeginUser;
574:   PetscCall(DMDAGetGhostCorners(da2prm, &ys, &xs, 0, &ym, &xm, 0));
575:   PetscCall(DMDAGetInfo(da2prm, 0, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
576:   PetscCall(DMDAVecGetArray(da2prm, prm, &p));
577:   for (i = xs; i < xs + xm; i++) {
578:     for (j = ys; j < ys + ym; j++) {
579:       PetscReal xx = thi->Lx * i / mx, yy = thi->Ly * j / my;
580:       thi->initialize(thi, xx, yy, &p[i][j]);
581:     }
582:   }
583:   PetscCall(DMDAVecRestoreArray(da2prm, prm, &p));
584:   PetscFunctionReturn(PETSC_SUCCESS);
585: }

587: static PetscErrorCode THISetUpDM(THI thi, DM dm)
588: {
589:   PetscInt        refinelevel, coarsenlevel, level, dim, Mx, My, Mz, mx, my, s;
590:   DMDAStencilType st;
591:   DM              da2prm;
592:   Vec             X;

594:   PetscFunctionBeginUser;
595:   PetscCall(DMDAGetInfo(dm, &dim, &Mz, &My, &Mx, 0, &my, &mx, 0, &s, 0, 0, 0, &st));
596:   if (dim == 2) PetscCall(DMDAGetInfo(dm, &dim, &My, &Mx, 0, &my, &mx, 0, 0, &s, 0, 0, 0, &st));
597:   PetscCall(DMGetRefineLevel(dm, &refinelevel));
598:   PetscCall(DMGetCoarsenLevel(dm, &coarsenlevel));
599:   level = refinelevel - coarsenlevel;
600:   PetscCall(DMDACreate2d(PetscObjectComm((PetscObject)thi), DM_BOUNDARY_PERIODIC, DM_BOUNDARY_PERIODIC, st, My, Mx, my, mx, sizeof(PrmNode) / sizeof(PetscScalar), s, 0, 0, &da2prm));
601:   PetscCall(DMSetUp(da2prm));
602:   PetscCall(DMCreateLocalVector(da2prm, &X));
603:   {
604:     PetscReal Lx = thi->Lx / thi->units->meter, Ly = thi->Ly / thi->units->meter, Lz = thi->Lz / thi->units->meter;
605:     if (dim == 2) {
606:       PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Level %" PetscInt_FMT " domain size (m) %8.2g x %8.2g, num elements %" PetscInt_FMT " x %" PetscInt_FMT " (%" PetscInt_FMT "), size (m) %g x %g\n", level, (double)Lx, (double)Ly, Mx, My, Mx * My, (double)(Lx / Mx), (double)(Ly / My)));
607:     } else {
608:       PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Level %" PetscInt_FMT " domain size (m) %8.2g x %8.2g x %8.2g, num elements %" PetscInt_FMT " x %" PetscInt_FMT " x %" PetscInt_FMT " (%" PetscInt_FMT "), size (m) %g x %g x %g\n", level, (double)Lx, (double)Ly, (double)Lz, Mx, My, Mz, Mx * My * Mz, (double)(Lx / Mx), (double)(Ly / My), (double)(1000. / (Mz - 1))));
609:     }
610:   }
611:   PetscCall(THIInitializePrm(thi, da2prm, X));
612:   if (thi->tridiagonal) { /* Reset coarse Jacobian evaluation */
613:     PetscCall(DMDASNESSetJacobianLocal(dm, (DMDASNESJacobianFn *)THIJacobianLocal_3D_Full, thi));
614:   }
615:   if (thi->coarse2d) PetscCall(DMDASNESSetJacobianLocal(dm, (DMDASNESJacobianFn *)THIJacobianLocal_2D, thi));
616:   PetscCall(PetscObjectCompose((PetscObject)dm, "DMDA2Prm", (PetscObject)da2prm));
617:   PetscCall(PetscObjectCompose((PetscObject)dm, "DMDA2Prm_Vec", (PetscObject)X));
618:   PetscCall(DMDestroy(&da2prm));
619:   PetscCall(VecDestroy(&X));
620:   PetscFunctionReturn(PETSC_SUCCESS);
621: }

623: static PetscErrorCode DMCoarsenHook_THI(DM dmf, DM dmc, void *ctx)
624: {
625:   THI      thi = (THI)ctx;
626:   PetscInt rlevel, clevel;

628:   PetscFunctionBeginUser;
629:   PetscCall(THISetUpDM(thi, dmc));
630:   PetscCall(DMGetRefineLevel(dmc, &rlevel));
631:   PetscCall(DMGetCoarsenLevel(dmc, &clevel));
632:   if (rlevel - clevel == 0) PetscCall(DMSetMatType(dmc, MATAIJ));
633:   PetscCall(DMCoarsenHookAdd(dmc, DMCoarsenHook_THI, NULL, thi));
634:   PetscFunctionReturn(PETSC_SUCCESS);
635: }

637: static PetscErrorCode DMRefineHook_THI(DM dmc, DM dmf, void *ctx)
638: {
639:   THI thi = (THI)ctx;

641:   PetscFunctionBeginUser;
642:   PetscCall(THISetUpDM(thi, dmf));
643:   PetscCall(DMSetMatType(dmf, thi->mattype));
644:   PetscCall(DMRefineHookAdd(dmf, DMRefineHook_THI, NULL, thi));
645:   /* With grid sequencing, a formerly-refined DM will later be coarsened by PCSetUp_MG */
646:   PetscCall(DMCoarsenHookAdd(dmf, DMCoarsenHook_THI, NULL, thi));
647:   PetscFunctionReturn(PETSC_SUCCESS);
648: }

650: static PetscErrorCode THIDAGetPrm(DM da, PrmNode ***prm)
651: {
652:   DM  da2prm;
653:   Vec X;

655:   PetscFunctionBeginUser;
656:   PetscCall(PetscObjectQuery((PetscObject)da, "DMDA2Prm", (PetscObject *)&da2prm));
657:   PetscCheck(da2prm, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "No DMDA2Prm composed with given DMDA");
658:   PetscCall(PetscObjectQuery((PetscObject)da, "DMDA2Prm_Vec", (PetscObject *)&X));
659:   PetscCheck(X, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "No DMDA2Prm_Vec composed with given DMDA");
660:   PetscCall(DMDAVecGetArray(da2prm, X, prm));
661:   PetscFunctionReturn(PETSC_SUCCESS);
662: }

664: static PetscErrorCode THIDARestorePrm(DM da, PrmNode ***prm)
665: {
666:   DM  da2prm;
667:   Vec X;

669:   PetscFunctionBeginUser;
670:   PetscCall(PetscObjectQuery((PetscObject)da, "DMDA2Prm", (PetscObject *)&da2prm));
671:   PetscCheck(da2prm, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "No DMDA2Prm composed with given DMDA");
672:   PetscCall(PetscObjectQuery((PetscObject)da, "DMDA2Prm_Vec", (PetscObject *)&X));
673:   PetscCheck(X, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "No DMDA2Prm_Vec composed with given DMDA");
674:   PetscCall(DMDAVecRestoreArray(da2prm, X, prm));
675:   PetscFunctionReturn(PETSC_SUCCESS);
676: }

678: static PetscErrorCode THIInitial(SNES snes, Vec X, void *ctx)
679: {
680:   THI       thi;
681:   PetscInt  i, j, k, xs, xm, ys, ym, zs, zm, mx, my;
682:   PetscReal hx, hy;
683:   PrmNode **prm;
684:   Node   ***x;
685:   DM        da;

687:   PetscFunctionBeginUser;
688:   PetscCall(SNESGetDM(snes, &da));
689:   PetscCall(DMGetApplicationContext(da, &thi));
690:   PetscCall(DMDAGetInfo(da, 0, 0, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0));
691:   PetscCall(DMDAGetCorners(da, &zs, &ys, &xs, &zm, &ym, &xm));
692:   PetscCall(DMDAVecGetArray(da, X, &x));
693:   PetscCall(THIDAGetPrm(da, &prm));
694:   hx = thi->Lx / mx;
695:   hy = thi->Ly / my;
696:   for (i = xs; i < xs + xm; i++) {
697:     for (j = ys; j < ys + ym; j++) {
698:       for (k = zs; k < zs + zm; k++) {
699:         const PetscScalar zm1 = zm - 1, drivingx = thi->rhog * (prm[i + 1][j].b + prm[i + 1][j].h - prm[i - 1][j].b - prm[i - 1][j].h) / (2 * hx), drivingy = thi->rhog * (prm[i][j + 1].b + prm[i][j + 1].h - prm[i][j - 1].b - prm[i][j - 1].h) / (2 * hy);
700:         x[i][j][k].u = 0. * drivingx * prm[i][j].h * (PetscScalar)k / zm1;
701:         x[i][j][k].v = 0. * drivingy * prm[i][j].h * (PetscScalar)k / zm1;
702:       }
703:     }
704:   }
705:   PetscCall(DMDAVecRestoreArray(da, X, &x));
706:   PetscCall(THIDARestorePrm(da, &prm));
707:   PetscFunctionReturn(PETSC_SUCCESS);
708: }

710: static void PointwiseNonlinearity(THI thi, const Node n[PETSC_RESTRICT], const PetscReal phi[PETSC_RESTRICT], PetscReal dphi[PETSC_RESTRICT][3], PetscScalar *PETSC_RESTRICT u, PetscScalar *PETSC_RESTRICT v, PetscScalar du[PETSC_RESTRICT], PetscScalar dv[PETSC_RESTRICT], PetscReal *eta, PetscReal *deta)
711: {
712:   PetscInt    l, ll;
713:   PetscScalar gam;

715:   du[0] = du[1] = du[2] = 0;
716:   dv[0] = dv[1] = dv[2] = 0;
717:   *u                    = 0;
718:   *v                    = 0;
719:   for (l = 0; l < 8; l++) {
720:     *u += phi[l] * n[l].u;
721:     *v += phi[l] * n[l].v;
722:     for (ll = 0; ll < 3; ll++) {
723:       du[ll] += dphi[l][ll] * n[l].u;
724:       dv[ll] += dphi[l][ll] * n[l].v;
725:     }
726:   }
727:   gam = PetscSqr(du[0]) + PetscSqr(dv[1]) + du[0] * dv[1] + 0.25 * PetscSqr(du[1] + dv[0]) + 0.25 * PetscSqr(du[2]) + 0.25 * PetscSqr(dv[2]);
728:   THIViscosity(thi, PetscRealPart(gam), eta, deta);
729: }

731: static void PointwiseNonlinearity2D(THI thi, Node n[], PetscReal phi[], PetscReal dphi[4][2], PetscScalar *u, PetscScalar *v, PetscScalar du[], PetscScalar dv[], PetscReal *eta, PetscReal *deta)
732: {
733:   PetscInt    l, ll;
734:   PetscScalar gam;

736:   du[0] = du[1] = 0;
737:   dv[0] = dv[1] = 0;
738:   *u            = 0;
739:   *v            = 0;
740:   for (l = 0; l < 4; l++) {
741:     *u += phi[l] * n[l].u;
742:     *v += phi[l] * n[l].v;
743:     for (ll = 0; ll < 2; ll++) {
744:       du[ll] += dphi[l][ll] * n[l].u;
745:       dv[ll] += dphi[l][ll] * n[l].v;
746:     }
747:   }
748:   gam = PetscSqr(du[0]) + PetscSqr(dv[1]) + du[0] * dv[1] + 0.25 * PetscSqr(du[1] + dv[0]);
749:   THIViscosity(thi, PetscRealPart(gam), eta, deta);
750: }

752: static PetscErrorCode THIFunctionLocal(DMDALocalInfo *info, Node ***x, Node ***f, THI thi)
753: {
754:   PetscInt  xs, ys, xm, ym, zm, i, j, k, q, l;
755:   PetscReal hx, hy, etamin, etamax, beta2min, beta2max;
756:   PrmNode **prm;

758:   PetscFunctionBeginUser;
759:   xs = info->zs;
760:   ys = info->ys;
761:   xm = info->zm;
762:   ym = info->ym;
763:   zm = info->xm;
764:   hx = thi->Lx / info->mz;
765:   hy = thi->Ly / info->my;

767:   etamin   = 1e100;
768:   etamax   = 0;
769:   beta2min = 1e100;
770:   beta2max = 0;

772:   PetscCall(THIDAGetPrm(info->da, &prm));

774:   for (i = xs; i < xs + xm; i++) {
775:     for (j = ys; j < ys + ym; j++) {
776:       PrmNode pn[4];
777:       QuadExtract(prm, i, j, pn);
778:       for (k = 0; k < zm - 1; k++) {
779:         PetscInt  ls = 0;
780:         Node      n[8], *fn[8];
781:         PetscReal zn[8], etabase = 0;
782:         PrmHexGetZ(pn, k, zm, zn);
783:         HexExtract(x, i, j, k, n);
784:         HexExtractRef(f, i, j, k, fn);
785:         if (thi->no_slip && k == 0) {
786:           for (l = 0; l < 4; l++) n[l].u = n[l].v = 0;
787:           /* The first 4 basis functions lie on the bottom layer, so their contribution is exactly 0, hence we can skip them */
788:           ls = 4;
789:         }
790:         for (q = 0; q < 8; q++) {
791:           PetscReal   dz[3], phi[8], dphi[8][3], jw, eta, deta;
792:           PetscScalar du[3], dv[3], u, v;
793:           HexGrad(HexQDeriv[q], zn, dz);
794:           HexComputeGeometry(q, hx, hy, dz, phi, dphi, &jw);
795:           PointwiseNonlinearity(thi, n, phi, dphi, &u, &v, du, dv, &eta, &deta);
796:           jw /= thi->rhog; /* scales residuals to be O(1) */
797:           if (q == 0) etabase = eta;
798:           RangeUpdate(&etamin, &etamax, eta);
799:           for (l = ls; l < 8; l++) { /* test functions */
800:             const PetscReal ds[2] = {-PetscSinReal(thi->alpha), 0};
801:             const PetscReal pp = phi[l], *dp = dphi[l];
802:             fn[l]->u += dp[0] * jw * eta * (4. * du[0] + 2. * dv[1]) + dp[1] * jw * eta * (du[1] + dv[0]) + dp[2] * jw * eta * du[2] + pp * jw * thi->rhog * ds[0];
803:             fn[l]->v += dp[1] * jw * eta * (2. * du[0] + 4. * dv[1]) + dp[0] * jw * eta * (du[1] + dv[0]) + dp[2] * jw * eta * dv[2] + pp * jw * thi->rhog * ds[1];
804:           }
805:         }
806:         if (k == 0) { /* we are on a bottom face */
807:           if (thi->no_slip) {
808:             /* Note: Non-Galerkin coarse grid operators are very sensitive to the scaling of Dirichlet boundary
809:             * conditions.  After shenanigans above, etabase contains the effective viscosity at the closest quadrature
810:             * point to the bed.  We want the diagonal entry in the Dirichlet condition to have similar magnitude to the
811:             * diagonal entry corresponding to the adjacent node.  The fundamental scaling of the viscous part is in
812:             * diagu, diagv below.  This scaling is easy to recognize by considering the finite difference operator after
813:             * scaling by element size.  The no-slip Dirichlet condition is scaled by this factor, and also in the
814:             * assembled matrix (see the similar block in THIJacobianLocal).
815:             *
816:             * Note that the residual at this Dirichlet node is linear in the state at this node, but also depends
817:             * (nonlinearly in general) on the neighboring interior nodes through the local viscosity.  This will make
818:             * a matrix-free Jacobian have extra entries in the corresponding row.  We assemble only the diagonal part,
819:             * so the solution will exactly satisfy the boundary condition after the first linear iteration.
820:             */
821:             const PetscReal   hz    = PetscRealPart(pn[0].h) / (zm - 1.);
822:             const PetscScalar diagu = 2 * etabase / thi->rhog * (hx * hy / hz + hx * hz / hy + 4 * hy * hz / hx), diagv = 2 * etabase / thi->rhog * (hx * hy / hz + 4 * hx * hz / hy + hy * hz / hx);
823:             fn[0]->u = thi->dirichlet_scale * diagu * x[i][j][k].u;
824:             fn[0]->v = thi->dirichlet_scale * diagv * x[i][j][k].v;
825:           } else { /* Integrate over bottom face to apply boundary condition */
826:             for (q = 0; q < 4; q++) {
827:               const PetscReal jw = 0.25 * hx * hy / thi->rhog, *phi = QuadQInterp[q];
828:               PetscScalar     u = 0, v = 0, rbeta2 = 0;
829:               PetscReal       beta2, dbeta2;
830:               for (l = 0; l < 4; l++) {
831:                 u += phi[l] * n[l].u;
832:                 v += phi[l] * n[l].v;
833:                 rbeta2 += phi[l] * pn[l].beta2;
834:               }
835:               THIFriction(thi, PetscRealPart(rbeta2), PetscRealPart(u * u + v * v) / 2, &beta2, &dbeta2);
836:               RangeUpdate(&beta2min, &beta2max, beta2);
837:               for (l = 0; l < 4; l++) {
838:                 const PetscReal pp = phi[l];
839:                 fn[ls + l]->u += pp * jw * beta2 * u;
840:                 fn[ls + l]->v += pp * jw * beta2 * v;
841:               }
842:             }
843:           }
844:         }
845:       }
846:     }
847:   }

849:   PetscCall(THIDARestorePrm(info->da, &prm));

851:   PetscCall(PRangeMinMax(&thi->eta, etamin, etamax));
852:   PetscCall(PRangeMinMax(&thi->beta2, beta2min, beta2max));
853:   PetscFunctionReturn(PETSC_SUCCESS);
854: }

856: static PetscErrorCode THIMatrixStatistics(THI thi, Mat B, PetscViewer viewer)
857: {
858:   PetscReal   nrm;
859:   PetscInt    m;
860:   PetscMPIInt rank;

862:   PetscFunctionBeginUser;
863:   PetscCall(MatNorm(B, NORM_FROBENIUS, &nrm));
864:   PetscCall(MatGetSize(B, &m, 0));
865:   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &rank));
866:   if (rank == 0) {
867:     PetscScalar val0, val2;
868:     PetscCall(MatGetValue(B, 0, 0, &val0));
869:     PetscCall(MatGetValue(B, 2, 2, &val2));
870:     PetscCall(PetscViewerASCIIPrintf(viewer, "Matrix dim %" PetscInt_FMT " norm %8.2e (0,0) %8.2e  (2,2) %8.2e %8.2e <= eta <= %8.2e %8.2e <= beta2 <= %8.2e\n", m, (double)nrm, (double)PetscRealPart(val0), (double)PetscRealPart(val2),
871:                                      (double)thi->eta.cmin, (double)thi->eta.cmax, (double)thi->beta2.cmin, (double)thi->beta2.cmax));
872:   }
873:   PetscFunctionReturn(PETSC_SUCCESS);
874: }

876: static PetscErrorCode THISurfaceStatistics(DM da, Vec X, PetscReal *min, PetscReal *max, PetscReal *mean)
877: {
878:   Node     ***x;
879:   PetscInt    i, j, xs, ys, zs, xm, ym, zm, mx, my, mz;
880:   PetscReal   umin = 1e100, umax = -1e100;
881:   PetscScalar usum = 0.0, gusum;

883:   PetscFunctionBeginUser;
884:   *min = *max = *mean = 0;
885:   PetscCall(DMDAGetInfo(da, 0, &mz, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0));
886:   PetscCall(DMDAGetCorners(da, &zs, &ys, &xs, &zm, &ym, &xm));
887:   PetscCheck(zs == 0 && zm == mz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected decomposition");
888:   PetscCall(DMDAVecGetArray(da, X, &x));
889:   for (i = xs; i < xs + xm; i++) {
890:     for (j = ys; j < ys + ym; j++) {
891:       PetscReal u = PetscRealPart(x[i][j][zm - 1].u);
892:       RangeUpdate(&umin, &umax, u);
893:       usum += u;
894:     }
895:   }
896:   PetscCall(DMDAVecRestoreArray(da, X, &x));
897:   PetscCall(MPIU_Allreduce(&umin, min, 1, MPIU_REAL, MPIU_MIN, PetscObjectComm((PetscObject)da)));
898:   PetscCall(MPIU_Allreduce(&umax, max, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)da)));
899:   PetscCall(MPIU_Allreduce(&usum, &gusum, 1, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)da)));
900:   *mean = PetscRealPart(gusum) / (mx * my);
901:   PetscFunctionReturn(PETSC_SUCCESS);
902: }

904: static PetscErrorCode THISolveStatistics(THI thi, SNES snes, PetscInt coarsened, const char name[])
905: {
906:   MPI_Comm comm;
907:   Vec      X;
908:   DM       dm;

910:   PetscFunctionBeginUser;
911:   PetscCall(PetscObjectGetComm((PetscObject)thi, &comm));
912:   PetscCall(SNESGetSolution(snes, &X));
913:   PetscCall(SNESGetDM(snes, &dm));
914:   PetscCall(PetscPrintf(comm, "Solution statistics after solve: %s\n", name));
915:   {
916:     PetscInt            its, lits;
917:     SNESConvergedReason reason;
918:     PetscCall(SNESGetIterationNumber(snes, &its));
919:     PetscCall(SNESGetConvergedReason(snes, &reason));
920:     PetscCall(SNESGetLinearSolveIterations(snes, &lits));
921:     PetscCall(PetscPrintf(comm, "%s: Number of SNES iterations = %" PetscInt_FMT ", total linear iterations = %" PetscInt_FMT "\n", SNESConvergedReasons[reason], its, lits));
922:   }
923:   {
924:     PetscReal          nrm2, tmin[3] = {1e100, 1e100, 1e100}, tmax[3] = {-1e100, -1e100, -1e100}, min[3], max[3];
925:     PetscInt           i, j, m;
926:     const PetscScalar *x;
927:     PetscCall(VecNorm(X, NORM_2, &nrm2));
928:     PetscCall(VecGetLocalSize(X, &m));
929:     PetscCall(VecGetArrayRead(X, &x));
930:     for (i = 0; i < m; i += 2) {
931:       PetscReal u = PetscRealPart(x[i]), v = PetscRealPart(x[i + 1]), c = PetscSqrtReal(u * u + v * v);
932:       tmin[0] = PetscMin(u, tmin[0]);
933:       tmin[1] = PetscMin(v, tmin[1]);
934:       tmin[2] = PetscMin(c, tmin[2]);
935:       tmax[0] = PetscMax(u, tmax[0]);
936:       tmax[1] = PetscMax(v, tmax[1]);
937:       tmax[2] = PetscMax(c, tmax[2]);
938:     }
939:     PetscCall(VecRestoreArrayRead(X, &x));
940:     PetscCall(MPIU_Allreduce(tmin, min, 3, MPIU_REAL, MPIU_MIN, PetscObjectComm((PetscObject)thi)));
941:     PetscCall(MPIU_Allreduce(tmax, max, 3, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)thi)));
942:     /* Dimensionalize to meters/year */
943:     nrm2 *= thi->units->year / thi->units->meter;
944:     for (j = 0; j < 3; j++) {
945:       min[j] *= thi->units->year / thi->units->meter;
946:       max[j] *= thi->units->year / thi->units->meter;
947:     }
948:     if (min[0] == 0.0) min[0] = 0.0;
949:     PetscCall(PetscPrintf(comm, "|X|_2 %g   %g <= u <=  %g   %g <= v <=  %g   %g <= c <=  %g \n", (double)nrm2, (double)min[0], (double)max[0], (double)min[1], (double)max[1], (double)min[2], (double)max[2]));
950:     {
951:       PetscReal umin, umax, umean;
952:       PetscCall(THISurfaceStatistics(dm, X, &umin, &umax, &umean));
953:       umin *= thi->units->year / thi->units->meter;
954:       umax *= thi->units->year / thi->units->meter;
955:       umean *= thi->units->year / thi->units->meter;
956:       PetscCall(PetscPrintf(comm, "Surface statistics: u in [%12.6e, %12.6e] mean %12.6e\n", (double)umin, (double)umax, (double)umean));
957:     }
958:     /* These values stay nondimensional */
959:     PetscCall(PetscPrintf(comm, "Global eta range   %g to %g converged range %g to %g\n", (double)thi->eta.min, (double)thi->eta.max, (double)thi->eta.cmin, (double)thi->eta.cmax));
960:     PetscCall(PetscPrintf(comm, "Global beta2 range %g to %g converged range %g to %g\n", (double)thi->beta2.min, (double)thi->beta2.max, (double)thi->beta2.cmin, (double)thi->beta2.cmax));
961:   }
962:   PetscFunctionReturn(PETSC_SUCCESS);
963: }

965: static PetscErrorCode THIJacobianLocal_2D(DMDALocalInfo *info, Node **x, Mat J, Mat B, THI thi)
966: {
967:   PetscInt  xs, ys, xm, ym, i, j, q, l, ll;
968:   PetscReal hx, hy;
969:   PrmNode **prm;

971:   PetscFunctionBeginUser;
972:   xs = info->ys;
973:   ys = info->xs;
974:   xm = info->ym;
975:   ym = info->xm;
976:   hx = thi->Lx / info->my;
977:   hy = thi->Ly / info->mx;

979:   PetscCall(MatZeroEntries(B));
980:   PetscCall(THIDAGetPrm(info->da, &prm));

982:   for (i = xs; i < xs + xm; i++) {
983:     for (j = ys; j < ys + ym; j++) {
984:       Node        n[4];
985:       PrmNode     pn[4];
986:       PetscScalar Ke[4 * 2][4 * 2];
987:       QuadExtract(prm, i, j, pn);
988:       QuadExtract(x, i, j, n);
989:       PetscCall(PetscMemzero(Ke, sizeof(Ke)));
990:       for (q = 0; q < 4; q++) {
991:         PetscReal   phi[4], dphi[4][2], jw, eta, deta, beta2, dbeta2;
992:         PetscScalar u, v, du[2], dv[2], h = 0, rbeta2 = 0;
993:         for (l = 0; l < 4; l++) {
994:           phi[l]     = QuadQInterp[q][l];
995:           dphi[l][0] = QuadQDeriv[q][l][0] * 2. / hx;
996:           dphi[l][1] = QuadQDeriv[q][l][1] * 2. / hy;
997:           h += phi[l] * pn[l].h;
998:           rbeta2 += phi[l] * pn[l].beta2;
999:         }
1000:         jw = 0.25 * hx * hy / thi->rhog; /* rhog is only scaling */
1001:         PointwiseNonlinearity2D(thi, n, phi, dphi, &u, &v, du, dv, &eta, &deta);
1002:         THIFriction(thi, PetscRealPart(rbeta2), PetscRealPart(u * u + v * v) / 2, &beta2, &dbeta2);
1003:         for (l = 0; l < 4; l++) {
1004:           const PetscReal pp = phi[l], *dp = dphi[l];
1005:           for (ll = 0; ll < 4; ll++) {
1006:             const PetscReal ppl = phi[ll], *dpl = dphi[ll];
1007:             PetscScalar     dgdu, dgdv;
1008:             dgdu = 2. * du[0] * dpl[0] + dv[1] * dpl[0] + 0.5 * (du[1] + dv[0]) * dpl[1];
1009:             dgdv = 2. * dv[1] * dpl[1] + du[0] * dpl[1] + 0.5 * (du[1] + dv[0]) * dpl[0];
1010:             /* Picard part */
1011:             Ke[l * 2 + 0][ll * 2 + 0] += dp[0] * jw * eta * 4. * dpl[0] + dp[1] * jw * eta * dpl[1] + pp * jw * (beta2 / h) * ppl * thi->ssa_friction_scale;
1012:             Ke[l * 2 + 0][ll * 2 + 1] += dp[0] * jw * eta * 2. * dpl[1] + dp[1] * jw * eta * dpl[0];
1013:             Ke[l * 2 + 1][ll * 2 + 0] += dp[1] * jw * eta * 2. * dpl[0] + dp[0] * jw * eta * dpl[1];
1014:             Ke[l * 2 + 1][ll * 2 + 1] += dp[1] * jw * eta * 4. * dpl[1] + dp[0] * jw * eta * dpl[0] + pp * jw * (beta2 / h) * ppl * thi->ssa_friction_scale;
1015:             /* extra Newton terms */
1016:             Ke[l * 2 + 0][ll * 2 + 0] += dp[0] * jw * deta * dgdu * (4. * du[0] + 2. * dv[1]) + dp[1] * jw * deta * dgdu * (du[1] + dv[0]) + pp * jw * (dbeta2 / h) * u * u * ppl * thi->ssa_friction_scale;
1017:             Ke[l * 2 + 0][ll * 2 + 1] += dp[0] * jw * deta * dgdv * (4. * du[0] + 2. * dv[1]) + dp[1] * jw * deta * dgdv * (du[1] + dv[0]) + pp * jw * (dbeta2 / h) * u * v * ppl * thi->ssa_friction_scale;
1018:             Ke[l * 2 + 1][ll * 2 + 0] += dp[1] * jw * deta * dgdu * (4. * dv[1] + 2. * du[0]) + dp[0] * jw * deta * dgdu * (du[1] + dv[0]) + pp * jw * (dbeta2 / h) * v * u * ppl * thi->ssa_friction_scale;
1019:             Ke[l * 2 + 1][ll * 2 + 1] += dp[1] * jw * deta * dgdv * (4. * dv[1] + 2. * du[0]) + dp[0] * jw * deta * dgdv * (du[1] + dv[0]) + pp * jw * (dbeta2 / h) * v * v * ppl * thi->ssa_friction_scale;
1020:           }
1021:         }
1022:       }
1023:       {
1024:         const MatStencil rc[4] = {
1025:           {0, i,     j,     0},
1026:           {0, i + 1, j,     0},
1027:           {0, i + 1, j + 1, 0},
1028:           {0, i,     j + 1, 0}
1029:         };
1030:         PetscCall(MatSetValuesBlockedStencil(B, 4, rc, 4, rc, &Ke[0][0], ADD_VALUES));
1031:       }
1032:     }
1033:   }
1034:   PetscCall(THIDARestorePrm(info->da, &prm));

1036:   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1037:   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1038:   PetscCall(MatSetOption(B, MAT_SYMMETRIC, PETSC_TRUE));
1039:   if (thi->verbose) PetscCall(THIMatrixStatistics(thi, B, PETSC_VIEWER_STDOUT_WORLD));
1040:   PetscFunctionReturn(PETSC_SUCCESS);
1041: }

1043: static PetscErrorCode THIJacobianLocal_3D(DMDALocalInfo *info, Node ***x, Mat B, THI thi, THIAssemblyMode amode)
1044: {
1045:   PetscInt  xs, ys, xm, ym, zm, i, j, k, q, l, ll;
1046:   PetscReal hx, hy;
1047:   PrmNode **prm;

1049:   PetscFunctionBeginUser;
1050:   xs = info->zs;
1051:   ys = info->ys;
1052:   xm = info->zm;
1053:   ym = info->ym;
1054:   zm = info->xm;
1055:   hx = thi->Lx / info->mz;
1056:   hy = thi->Ly / info->my;

1058:   PetscCall(MatZeroEntries(B));
1059:   PetscCall(MatSetOption(B, MAT_SUBSET_OFF_PROC_ENTRIES, PETSC_TRUE));
1060:   PetscCall(THIDAGetPrm(info->da, &prm));

1062:   for (i = xs; i < xs + xm; i++) {
1063:     for (j = ys; j < ys + ym; j++) {
1064:       PrmNode pn[4];
1065:       QuadExtract(prm, i, j, pn);
1066:       for (k = 0; k < zm - 1; k++) {
1067:         Node        n[8];
1068:         PetscReal   zn[8], etabase = 0;
1069:         PetscScalar Ke[8 * 2][8 * 2];
1070:         PetscInt    ls = 0;

1072:         PrmHexGetZ(pn, k, zm, zn);
1073:         HexExtract(x, i, j, k, n);
1074:         PetscCall(PetscMemzero(Ke, sizeof(Ke)));
1075:         if (thi->no_slip && k == 0) {
1076:           for (l = 0; l < 4; l++) n[l].u = n[l].v = 0;
1077:           ls = 4;
1078:         }
1079:         for (q = 0; q < 8; q++) {
1080:           PetscReal   dz[3], phi[8], dphi[8][3], jw, eta, deta;
1081:           PetscScalar du[3], dv[3], u, v;
1082:           HexGrad(HexQDeriv[q], zn, dz);
1083:           HexComputeGeometry(q, hx, hy, dz, phi, dphi, &jw);
1084:           PointwiseNonlinearity(thi, n, phi, dphi, &u, &v, du, dv, &eta, &deta);
1085:           jw /= thi->rhog; /* residuals are scaled by this factor */
1086:           if (q == 0) etabase = eta;
1087:           for (l = ls; l < 8; l++) { /* test functions */
1088:             const PetscReal *PETSC_RESTRICT dp = dphi[l];
1089: #if USE_SSE2_KERNELS
1090:             /* gcc (up to my 4.5 snapshot) is really bad at hoisting intrinsics so we do it manually */
1091:             __m128d p4 = _mm_set1_pd(4), p2 = _mm_set1_pd(2), p05 = _mm_set1_pd(0.5), p42 = _mm_setr_pd(4, 2), p24 = _mm_shuffle_pd(p42, p42, _MM_SHUFFLE2(0, 1)), du0 = _mm_set1_pd(du[0]), du1 = _mm_set1_pd(du[1]), du2 = _mm_set1_pd(du[2]), dv0 = _mm_set1_pd(dv[0]), dv1 = _mm_set1_pd(dv[1]), dv2 = _mm_set1_pd(dv[2]), jweta = _mm_set1_pd(jw * eta), jwdeta = _mm_set1_pd(jw * deta), dp0 = _mm_set1_pd(dp[0]), dp1 = _mm_set1_pd(dp[1]), dp2 = _mm_set1_pd(dp[2]), dp0jweta = _mm_mul_pd(dp0, jweta), dp1jweta = _mm_mul_pd(dp1, jweta), dp2jweta = _mm_mul_pd(dp2, jweta), p4du0p2dv1 = _mm_add_pd(_mm_mul_pd(p4, du0), _mm_mul_pd(p2, dv1)), /* 4 du0 + 2 dv1 */
1092:               p4dv1p2du0 = _mm_add_pd(_mm_mul_pd(p4, dv1), _mm_mul_pd(p2, du0)), /* 4 dv1 + 2 du0 */
1093:               pdu2dv2    = _mm_unpacklo_pd(du2, dv2),                            /* [du2, dv2] */
1094:               du1pdv0    = _mm_add_pd(du1, dv0),                                 /* du1 + dv0 */
1095:               t1         = _mm_mul_pd(dp0, p4du0p2dv1),                          /* dp0 (4 du0 + 2 dv1) */
1096:               t2         = _mm_mul_pd(dp1, p4dv1p2du0);                          /* dp1 (4 dv1 + 2 du0) */

1098: #endif
1099: #if defined COMPUTE_LOWER_TRIANGULAR      /* The element matrices are always symmetric so computing the lower-triangular part is not necessary */
1100:             for (ll = ls; ll < 8; ll++) { /* trial functions */
1101: #else
1102:             for (ll = l; ll < 8; ll++) {
1103: #endif
1104:               const PetscReal *PETSC_RESTRICT dpl = dphi[ll];
1105:               if (amode == THIASSEMBLY_TRIDIAGONAL && (l - ll) % 4) continue; /* these entries would not be inserted */
1106: #if !USE_SSE2_KERNELS
1107:               /* The analytic Jacobian in nice, easy-to-read form */
1108:               {
1109:                 PetscScalar dgdu, dgdv;
1110:                 dgdu = 2. * du[0] * dpl[0] + dv[1] * dpl[0] + 0.5 * (du[1] + dv[0]) * dpl[1] + 0.5 * du[2] * dpl[2];
1111:                 dgdv = 2. * dv[1] * dpl[1] + du[0] * dpl[1] + 0.5 * (du[1] + dv[0]) * dpl[0] + 0.5 * dv[2] * dpl[2];
1112:                 /* Picard part */
1113:                 Ke[l * 2 + 0][ll * 2 + 0] += dp[0] * jw * eta * 4. * dpl[0] + dp[1] * jw * eta * dpl[1] + dp[2] * jw * eta * dpl[2];
1114:                 Ke[l * 2 + 0][ll * 2 + 1] += dp[0] * jw * eta * 2. * dpl[1] + dp[1] * jw * eta * dpl[0];
1115:                 Ke[l * 2 + 1][ll * 2 + 0] += dp[1] * jw * eta * 2. * dpl[0] + dp[0] * jw * eta * dpl[1];
1116:                 Ke[l * 2 + 1][ll * 2 + 1] += dp[1] * jw * eta * 4. * dpl[1] + dp[0] * jw * eta * dpl[0] + dp[2] * jw * eta * dpl[2];
1117:                 /* extra Newton terms */
1118:                 Ke[l * 2 + 0][ll * 2 + 0] += dp[0] * jw * deta * dgdu * (4. * du[0] + 2. * dv[1]) + dp[1] * jw * deta * dgdu * (du[1] + dv[0]) + dp[2] * jw * deta * dgdu * du[2];
1119:                 Ke[l * 2 + 0][ll * 2 + 1] += dp[0] * jw * deta * dgdv * (4. * du[0] + 2. * dv[1]) + dp[1] * jw * deta * dgdv * (du[1] + dv[0]) + dp[2] * jw * deta * dgdv * du[2];
1120:                 Ke[l * 2 + 1][ll * 2 + 0] += dp[1] * jw * deta * dgdu * (4. * dv[1] + 2. * du[0]) + dp[0] * jw * deta * dgdu * (du[1] + dv[0]) + dp[2] * jw * deta * dgdu * dv[2];
1121:                 Ke[l * 2 + 1][ll * 2 + 1] += dp[1] * jw * deta * dgdv * (4. * dv[1] + 2. * du[0]) + dp[0] * jw * deta * dgdv * (du[1] + dv[0]) + dp[2] * jw * deta * dgdv * dv[2];
1122:               }
1123: #else
1124:               /* This SSE2 code is an exact replica of above, but uses explicit packed instructions for some speed
1125:               * benefit.  On my hardware, these intrinsics are almost twice as fast as above, reducing total assembly cost
1126:               * by 25 to 30 percent. */
1127:               {
1128:                 __m128d keu = _mm_loadu_pd(&Ke[l * 2 + 0][ll * 2 + 0]), kev = _mm_loadu_pd(&Ke[l * 2 + 1][ll * 2 + 0]), dpl01 = _mm_loadu_pd(&dpl[0]), dpl10 = _mm_shuffle_pd(dpl01, dpl01, _MM_SHUFFLE2(0, 1)), dpl2 = _mm_set_sd(dpl[2]), t0, t3, pdgduv;
1129:                 keu    = _mm_add_pd(keu, _mm_add_pd(_mm_mul_pd(_mm_mul_pd(dp0jweta, p42), dpl01), _mm_add_pd(_mm_mul_pd(dp1jweta, dpl10), _mm_mul_pd(dp2jweta, dpl2))));
1130:                 kev    = _mm_add_pd(kev, _mm_add_pd(_mm_mul_pd(_mm_mul_pd(dp1jweta, p24), dpl01), _mm_add_pd(_mm_mul_pd(dp0jweta, dpl10), _mm_mul_pd(dp2jweta, _mm_shuffle_pd(dpl2, dpl2, _MM_SHUFFLE2(0, 1))))));
1131:                 pdgduv = _mm_mul_pd(p05, _mm_add_pd(_mm_add_pd(_mm_mul_pd(p42, _mm_mul_pd(du0, dpl01)), _mm_mul_pd(p24, _mm_mul_pd(dv1, dpl01))), _mm_add_pd(_mm_mul_pd(du1pdv0, dpl10), _mm_mul_pd(pdu2dv2, _mm_set1_pd(dpl[2]))))); /* [dgdu, dgdv] */
1132:                 t0     = _mm_mul_pd(jwdeta, pdgduv); /* jw deta [dgdu, dgdv] */
1133:                 t3     = _mm_mul_pd(t0, du1pdv0);    /* t0 (du1 + dv0) */
1134:                 _mm_storeu_pd(&Ke[l * 2 + 0][ll * 2 + 0], _mm_add_pd(keu, _mm_add_pd(_mm_mul_pd(t1, t0), _mm_add_pd(_mm_mul_pd(dp1, t3), _mm_mul_pd(t0, _mm_mul_pd(dp2, du2))))));
1135:                 _mm_storeu_pd(&Ke[l * 2 + 1][ll * 2 + 0], _mm_add_pd(kev, _mm_add_pd(_mm_mul_pd(t2, t0), _mm_add_pd(_mm_mul_pd(dp0, t3), _mm_mul_pd(t0, _mm_mul_pd(dp2, dv2))))));
1136:               }
1137: #endif
1138:             }
1139:           }
1140:         }
1141:         if (k == 0) { /* on a bottom face */
1142:           if (thi->no_slip) {
1143:             const PetscReal   hz    = PetscRealPart(pn[0].h) / (zm - 1);
1144:             const PetscScalar diagu = 2 * etabase / thi->rhog * (hx * hy / hz + hx * hz / hy + 4 * hy * hz / hx), diagv = 2 * etabase / thi->rhog * (hx * hy / hz + 4 * hx * hz / hy + hy * hz / hx);
1145:             Ke[0][0] = thi->dirichlet_scale * diagu;
1146:             Ke[1][1] = thi->dirichlet_scale * diagv;
1147:           } else {
1148:             for (q = 0; q < 4; q++) {
1149:               const PetscReal jw = 0.25 * hx * hy / thi->rhog, *phi = QuadQInterp[q];
1150:               PetscScalar     u = 0, v = 0, rbeta2 = 0;
1151:               PetscReal       beta2, dbeta2;
1152:               for (l = 0; l < 4; l++) {
1153:                 u += phi[l] * n[l].u;
1154:                 v += phi[l] * n[l].v;
1155:                 rbeta2 += phi[l] * pn[l].beta2;
1156:               }
1157:               THIFriction(thi, PetscRealPart(rbeta2), PetscRealPart(u * u + v * v) / 2, &beta2, &dbeta2);
1158:               for (l = 0; l < 4; l++) {
1159:                 const PetscReal pp = phi[l];
1160:                 for (ll = 0; ll < 4; ll++) {
1161:                   const PetscReal ppl = phi[ll];
1162:                   Ke[l * 2 + 0][ll * 2 + 0] += pp * jw * beta2 * ppl + pp * jw * dbeta2 * u * u * ppl;
1163:                   Ke[l * 2 + 0][ll * 2 + 1] += pp * jw * dbeta2 * u * v * ppl;
1164:                   Ke[l * 2 + 1][ll * 2 + 0] += pp * jw * dbeta2 * v * u * ppl;
1165:                   Ke[l * 2 + 1][ll * 2 + 1] += pp * jw * beta2 * ppl + pp * jw * dbeta2 * v * v * ppl;
1166:                 }
1167:               }
1168:             }
1169:           }
1170:         }
1171:         {
1172:           const MatStencil rc[8] = {
1173:             {i,     j,     k,     0},
1174:             {i + 1, j,     k,     0},
1175:             {i + 1, j + 1, k,     0},
1176:             {i,     j + 1, k,     0},
1177:             {i,     j,     k + 1, 0},
1178:             {i + 1, j,     k + 1, 0},
1179:             {i + 1, j + 1, k + 1, 0},
1180:             {i,     j + 1, k + 1, 0}
1181:           };
1182:           if (amode == THIASSEMBLY_TRIDIAGONAL) {
1183:             for (l = 0; l < 4; l++) { /* Copy out each of the blocks, discarding horizontal coupling */
1184:               const PetscInt   l4     = l + 4;
1185:               const MatStencil rcl[2] = {
1186:                 {rc[l].k,  rc[l].j,  rc[l].i,  0},
1187:                 {rc[l4].k, rc[l4].j, rc[l4].i, 0}
1188:               };
1189: #if defined COMPUTE_LOWER_TRIANGULAR
1190:               const PetscScalar Kel[4][4] = {
1191:                 {Ke[2 * l + 0][2 * l + 0],  Ke[2 * l + 0][2 * l + 1],  Ke[2 * l + 0][2 * l4 + 0],  Ke[2 * l + 0][2 * l4 + 1] },
1192:                 {Ke[2 * l + 1][2 * l + 0],  Ke[2 * l + 1][2 * l + 1],  Ke[2 * l + 1][2 * l4 + 0],  Ke[2 * l + 1][2 * l4 + 1] },
1193:                 {Ke[2 * l4 + 0][2 * l + 0], Ke[2 * l4 + 0][2 * l + 1], Ke[2 * l4 + 0][2 * l4 + 0], Ke[2 * l4 + 0][2 * l4 + 1]},
1194:                 {Ke[2 * l4 + 1][2 * l + 0], Ke[2 * l4 + 1][2 * l + 1], Ke[2 * l4 + 1][2 * l4 + 0], Ke[2 * l4 + 1][2 * l4 + 1]}
1195:               };
1196: #else
1197:               /* Same as above except for the lower-left block */
1198:               const PetscScalar Kel[4][4] = {
1199:                 {Ke[2 * l + 0][2 * l + 0],  Ke[2 * l + 0][2 * l + 1],  Ke[2 * l + 0][2 * l4 + 0],  Ke[2 * l + 0][2 * l4 + 1] },
1200:                 {Ke[2 * l + 1][2 * l + 0],  Ke[2 * l + 1][2 * l + 1],  Ke[2 * l + 1][2 * l4 + 0],  Ke[2 * l + 1][2 * l4 + 1] },
1201:                 {Ke[2 * l + 0][2 * l4 + 0], Ke[2 * l + 1][2 * l4 + 0], Ke[2 * l4 + 0][2 * l4 + 0], Ke[2 * l4 + 0][2 * l4 + 1]},
1202:                 {Ke[2 * l + 0][2 * l4 + 1], Ke[2 * l + 1][2 * l4 + 1], Ke[2 * l4 + 1][2 * l4 + 0], Ke[2 * l4 + 1][2 * l4 + 1]}
1203:               };
1204: #endif
1205:               PetscCall(MatSetValuesBlockedStencil(B, 2, rcl, 2, rcl, &Kel[0][0], ADD_VALUES));
1206:             }
1207:           } else {
1208: #if !defined COMPUTE_LOWER_TRIANGULAR /* fill in lower-triangular part, this is really cheap compared to computing the entries */
1209:             for (l = 0; l < 8; l++) {
1210:               for (ll = l + 1; ll < 8; ll++) {
1211:                 Ke[ll * 2 + 0][l * 2 + 0] = Ke[l * 2 + 0][ll * 2 + 0];
1212:                 Ke[ll * 2 + 1][l * 2 + 0] = Ke[l * 2 + 0][ll * 2 + 1];
1213:                 Ke[ll * 2 + 0][l * 2 + 1] = Ke[l * 2 + 1][ll * 2 + 0];
1214:                 Ke[ll * 2 + 1][l * 2 + 1] = Ke[l * 2 + 1][ll * 2 + 1];
1215:               }
1216:             }
1217: #endif
1218:             PetscCall(MatSetValuesBlockedStencil(B, 8, rc, 8, rc, &Ke[0][0], ADD_VALUES));
1219:           }
1220:         }
1221:       }
1222:     }
1223:   }
1224:   PetscCall(THIDARestorePrm(info->da, &prm));

1226:   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1227:   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1228:   PetscCall(MatSetOption(B, MAT_SYMMETRIC, PETSC_TRUE));
1229:   if (thi->verbose) PetscCall(THIMatrixStatistics(thi, B, PETSC_VIEWER_STDOUT_WORLD));
1230:   PetscFunctionReturn(PETSC_SUCCESS);
1231: }

1233: static PetscErrorCode THIJacobianLocal_3D_Full(DMDALocalInfo *info, Node ***x, Mat A, Mat B, THI thi)
1234: {
1235:   PetscFunctionBeginUser;
1236:   PetscCall(THIJacobianLocal_3D(info, x, B, thi, THIASSEMBLY_FULL));
1237:   PetscFunctionReturn(PETSC_SUCCESS);
1238: }

1240: static PetscErrorCode THIJacobianLocal_3D_Tridiagonal(DMDALocalInfo *info, Node ***x, Mat A, Mat B, THI thi)
1241: {
1242:   PetscFunctionBeginUser;
1243:   PetscCall(THIJacobianLocal_3D(info, x, B, thi, THIASSEMBLY_TRIDIAGONAL));
1244:   PetscFunctionReturn(PETSC_SUCCESS);
1245: }

1247: static PetscErrorCode DMRefineHierarchy_THI(DM dac0, PetscInt nlevels, DM hierarchy[])
1248: {
1249:   THI             thi;
1250:   PetscInt        dim, M, N, m, n, s, dof;
1251:   DM              dac, daf;
1252:   DMDAStencilType st;
1253:   DM_DA          *ddf, *ddc;

1255:   PetscFunctionBeginUser;
1256:   PetscCall(PetscObjectQuery((PetscObject)dac0, "THI", (PetscObject *)&thi));
1257:   PetscCheck(thi, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot refine this DMDA, missing composed THI instance");
1258:   if (nlevels > 1) {
1259:     PetscCall(DMRefineHierarchy(dac0, nlevels - 1, hierarchy));
1260:     dac = hierarchy[nlevels - 2];
1261:   } else {
1262:     dac = dac0;
1263:   }
1264:   PetscCall(DMDAGetInfo(dac, &dim, &N, &M, 0, &n, &m, 0, &dof, &s, 0, 0, 0, &st));
1265:   PetscCheck(dim == 2, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "This function can only refine 2D DMDAs");

1267:   /* Creates a 3D DMDA with the same map-plane layout as the 2D one, with contiguous columns */
1268:   PetscCall(DMDACreate3d(PetscObjectComm((PetscObject)dac), DM_BOUNDARY_NONE, DM_BOUNDARY_PERIODIC, DM_BOUNDARY_PERIODIC, st, thi->zlevels, N, M, 1, n, m, dof, s, NULL, NULL, NULL, &daf));
1269:   PetscCall(DMSetUp(daf));

1271:   daf->ops->creatematrix        = dac->ops->creatematrix;
1272:   daf->ops->createinterpolation = dac->ops->createinterpolation;
1273:   daf->ops->getcoloring         = dac->ops->getcoloring;
1274:   ddf                           = (DM_DA *)daf->data;
1275:   ddc                           = (DM_DA *)dac->data;
1276:   ddf->interptype               = ddc->interptype;

1278:   PetscCall(DMDASetFieldName(daf, 0, "x-velocity"));
1279:   PetscCall(DMDASetFieldName(daf, 1, "y-velocity"));

1281:   hierarchy[nlevels - 1] = daf;
1282:   PetscFunctionReturn(PETSC_SUCCESS);
1283: }

1285: static PetscErrorCode DMCreateInterpolation_DA_THI(DM dac, DM daf, Mat *A, Vec *scale)
1286: {
1287:   PetscInt dim;

1289:   PetscFunctionBeginUser;
1292:   PetscAssertPointer(A, 3);
1293:   if (scale) PetscAssertPointer(scale, 4);
1294:   PetscCall(DMDAGetInfo(daf, &dim, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
1295:   if (dim == 2) {
1296:     /* We are in the 2D problem and use normal DMDA interpolation */
1297:     PetscCall(DMCreateInterpolation(dac, daf, A, scale));
1298:   } else {
1299:     PetscInt i, j, k, xs, ys, zs, xm, ym, zm, mx, my, mz, rstart, cstart;
1300:     Mat      B;

1302:     PetscCall(DMDAGetInfo(daf, 0, &mz, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0));
1303:     PetscCall(DMDAGetCorners(daf, &zs, &ys, &xs, &zm, &ym, &xm));
1304:     PetscCheck(!zs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "unexpected");
1305:     PetscCall(MatCreate(PetscObjectComm((PetscObject)daf), &B));
1306:     PetscCall(MatSetSizes(B, xm * ym * zm, xm * ym, mx * my * mz, mx * my));

1308:     PetscCall(MatSetType(B, MATAIJ));
1309:     PetscCall(MatSeqAIJSetPreallocation(B, 1, NULL));
1310:     PetscCall(MatMPIAIJSetPreallocation(B, 1, NULL, 0, NULL));
1311:     PetscCall(MatGetOwnershipRange(B, &rstart, NULL));
1312:     PetscCall(MatGetOwnershipRangeColumn(B, &cstart, NULL));
1313:     for (i = xs; i < xs + xm; i++) {
1314:       for (j = ys; j < ys + ym; j++) {
1315:         for (k = zs; k < zs + zm; k++) {
1316:           PetscInt    i2 = i * ym + j, i3 = i2 * zm + k;
1317:           PetscScalar val = ((k == 0 || k == mz - 1) ? 0.5 : 1.) / (mz - 1.); /* Integration using trapezoid rule */
1318:           PetscCall(MatSetValue(B, cstart + i3, rstart + i2, val, INSERT_VALUES));
1319:         }
1320:       }
1321:     }
1322:     PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1323:     PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1324:     PetscCall(MatCreateMAIJ(B, sizeof(Node) / sizeof(PetscScalar), A));
1325:     PetscCall(MatDestroy(&B));
1326:   }
1327:   PetscFunctionReturn(PETSC_SUCCESS);
1328: }

1330: static PetscErrorCode DMCreateMatrix_THI_Tridiagonal(DM da, Mat *J)
1331: {
1332:   Mat                    A;
1333:   PetscInt               xm, ym, zm, dim, dof = 2, starts[3], dims[3];
1334:   ISLocalToGlobalMapping ltog;

1336:   PetscFunctionBeginUser;
1337:   PetscCall(DMDAGetInfo(da, &dim, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
1338:   PetscCheck(dim == 3, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Expected DMDA to be 3D");
1339:   PetscCall(DMDAGetCorners(da, 0, 0, 0, &zm, &ym, &xm));
1340:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1341:   PetscCall(MatCreate(PetscObjectComm((PetscObject)da), &A));
1342:   PetscCall(MatSetSizes(A, dof * xm * ym * zm, dof * xm * ym * zm, PETSC_DETERMINE, PETSC_DETERMINE));
1343:   PetscCall(MatSetType(A, da->mattype));
1344:   PetscCall(MatSetFromOptions(A));
1345:   PetscCall(MatSeqAIJSetPreallocation(A, 3 * 2, NULL));
1346:   PetscCall(MatMPIAIJSetPreallocation(A, 3 * 2, NULL, 0, NULL));
1347:   PetscCall(MatSeqBAIJSetPreallocation(A, 2, 3, NULL));
1348:   PetscCall(MatMPIBAIJSetPreallocation(A, 2, 3, NULL, 0, NULL));
1349:   PetscCall(MatSeqSBAIJSetPreallocation(A, 2, 2, NULL));
1350:   PetscCall(MatMPISBAIJSetPreallocation(A, 2, 2, NULL, 0, NULL));
1351:   PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog));
1352:   PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2]));
1353:   PetscCall(MatSetStencil(A, dim, dims, starts, dof));
1354:   *J = A;
1355:   PetscFunctionReturn(PETSC_SUCCESS);
1356: }

1358: static PetscErrorCode THIDAVecView_VTK_XML(THI thi, DM da, Vec X, const char filename[])
1359: {
1360:   const PetscInt     dof   = 2;
1361:   Units              units = thi->units;
1362:   MPI_Comm           comm;
1363:   PetscViewer        viewer;
1364:   PetscMPIInt        rank, size, tag, nn, nmax;
1365:   PetscInt           mx, my, mz, r, range[6];
1366:   const PetscScalar *x;

1368:   PetscFunctionBeginUser;
1369:   PetscCall(PetscObjectGetComm((PetscObject)thi, &comm));
1370:   PetscCall(DMDAGetInfo(da, 0, &mz, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0));
1371:   PetscCallMPI(MPI_Comm_size(comm, &size));
1372:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1373:   PetscCall(PetscViewerASCIIOpen(comm, filename, &viewer));
1374:   PetscCall(PetscViewerASCIIPrintf(viewer, "<VTKFile type=\"StructuredGrid\" version=\"0.1\" byte_order=\"LittleEndian\">\n"));
1375:   PetscCall(PetscViewerASCIIPrintf(viewer, "  <StructuredGrid WholeExtent=\"%d %" PetscInt_FMT " %d %" PetscInt_FMT " %d %" PetscInt_FMT "\">\n", 0, mz - 1, 0, my - 1, 0, mx - 1));

1377:   PetscCall(DMDAGetCorners(da, range, range + 1, range + 2, range + 3, range + 4, range + 5));
1378:   PetscCall(PetscMPIIntCast(range[3] * range[4] * range[5] * dof, &nn));
1379:   PetscCallMPI(MPI_Reduce(&nn, &nmax, 1, MPI_INT, MPI_MAX, 0, comm));
1380:   tag = ((PetscObject)viewer)->tag;
1381:   PetscCall(VecGetArrayRead(X, &x));
1382:   if (rank == 0) {
1383:     PetscScalar *array;
1384:     PetscCall(PetscMalloc1(nmax, &array));
1385:     for (r = 0; r < size; r++) {
1386:       PetscInt           i, j, k, xs, xm, ys, ym, zs, zm;
1387:       const PetscScalar *ptr;
1388:       MPI_Status         status;
1389:       if (r) PetscCallMPI(MPI_Recv(range, 6, MPIU_INT, r, tag, comm, MPI_STATUS_IGNORE));
1390:       zs = range[0];
1391:       ys = range[1];
1392:       xs = range[2];
1393:       zm = range[3];
1394:       ym = range[4];
1395:       xm = range[5];
1396:       PetscCheck(xm * ym * zm * dof <= nmax, PETSC_COMM_SELF, PETSC_ERR_PLIB, "should not happen");
1397:       if (r) {
1398:         PetscCallMPI(MPI_Recv(array, nmax, MPIU_SCALAR, r, tag, comm, &status));
1399:         PetscCallMPI(MPI_Get_count(&status, MPIU_SCALAR, &nn));
1400:         PetscCheck(nn == xm * ym * zm * dof, PETSC_COMM_SELF, PETSC_ERR_PLIB, "should not happen");
1401:         ptr = array;
1402:       } else ptr = x;
1403:       PetscCall(PetscViewerASCIIPrintf(viewer, "    <Piece Extent=\"%" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\">\n", zs, zs + zm - 1, ys, ys + ym - 1, xs, xs + xm - 1));

1405:       PetscCall(PetscViewerASCIIPrintf(viewer, "      <Points>\n"));
1406:       PetscCall(PetscViewerASCIIPrintf(viewer, "        <DataArray type=\"Float32\" NumberOfComponents=\"3\" format=\"ascii\">\n"));
1407:       for (i = xs; i < xs + xm; i++) {
1408:         for (j = ys; j < ys + ym; j++) {
1409:           for (k = zs; k < zs + zm; k++) {
1410:             PrmNode   p;
1411:             PetscReal xx = thi->Lx * i / mx, yy = thi->Ly * j / my, zz;
1412:             thi->initialize(thi, xx, yy, &p);
1413:             zz = PetscRealPart(p.b) + PetscRealPart(p.h) * k / (mz - 1);
1414:             PetscCall(PetscViewerASCIIPrintf(viewer, "%f %f %f\n", (double)xx, (double)yy, (double)zz));
1415:           }
1416:         }
1417:       }
1418:       PetscCall(PetscViewerASCIIPrintf(viewer, "        </DataArray>\n"));
1419:       PetscCall(PetscViewerASCIIPrintf(viewer, "      </Points>\n"));

1421:       PetscCall(PetscViewerASCIIPrintf(viewer, "      <PointData>\n"));
1422:       PetscCall(PetscViewerASCIIPrintf(viewer, "        <DataArray type=\"Float32\" Name=\"velocity\" NumberOfComponents=\"3\" format=\"ascii\">\n"));
1423:       for (i = 0; i < nn; i += dof) PetscCall(PetscViewerASCIIPrintf(viewer, "%f %f %f\n", (double)(PetscRealPart(ptr[i]) * units->year / units->meter), (double)(PetscRealPart(ptr[i + 1]) * units->year / units->meter), 0.0));
1424:       PetscCall(PetscViewerASCIIPrintf(viewer, "        </DataArray>\n"));

1426:       PetscCall(PetscViewerASCIIPrintf(viewer, "        <DataArray type=\"Int32\" Name=\"rank\" NumberOfComponents=\"1\" format=\"ascii\">\n"));
1427:       for (i = 0; i < nn; i += dof) PetscCall(PetscViewerASCIIPrintf(viewer, "%" PetscInt_FMT "\n", r));
1428:       PetscCall(PetscViewerASCIIPrintf(viewer, "        </DataArray>\n"));
1429:       PetscCall(PetscViewerASCIIPrintf(viewer, "      </PointData>\n"));

1431:       PetscCall(PetscViewerASCIIPrintf(viewer, "    </Piece>\n"));
1432:     }
1433:     PetscCall(PetscFree(array));
1434:   } else {
1435:     PetscCallMPI(MPI_Send(range, 6, MPIU_INT, 0, tag, comm));
1436:     PetscCallMPI(MPI_Send((PetscScalar *)x, nn, MPIU_SCALAR, 0, tag, comm));
1437:   }
1438:   PetscCall(VecRestoreArrayRead(X, &x));
1439:   PetscCall(PetscViewerASCIIPrintf(viewer, "  </StructuredGrid>\n"));
1440:   PetscCall(PetscViewerASCIIPrintf(viewer, "</VTKFile>\n"));
1441:   PetscCall(PetscViewerDestroy(&viewer));
1442:   PetscFunctionReturn(PETSC_SUCCESS);
1443: }

1445: int main(int argc, char *argv[])
1446: {
1447:   MPI_Comm comm;
1448:   THI      thi;
1449:   DM       da;
1450:   SNES     snes;

1452:   PetscFunctionBeginUser;
1453:   PetscCall(PetscInitialize(&argc, &argv, 0, help));
1454:   comm = PETSC_COMM_WORLD;

1456:   PetscCall(THICreate(comm, &thi));
1457:   {
1458:     PetscInt M = 3, N = 3, P = 2;
1459:     PetscOptionsBegin(comm, NULL, "Grid resolution options", "");
1460:     {
1461:       PetscCall(PetscOptionsInt("-M", "Number of elements in x-direction on coarse level", "", M, &M, NULL));
1462:       N = M;
1463:       PetscCall(PetscOptionsInt("-N", "Number of elements in y-direction on coarse level (if different from M)", "", N, &N, NULL));
1464:       if (thi->coarse2d) {
1465:         PetscCall(PetscOptionsInt("-zlevels", "Number of elements in z-direction on fine level", "", thi->zlevels, &thi->zlevels, NULL));
1466:       } else {
1467:         PetscCall(PetscOptionsInt("-P", "Number of elements in z-direction on coarse level", "", P, &P, NULL));
1468:       }
1469:     }
1470:     PetscOptionsEnd();
1471:     if (thi->coarse2d) {
1472:       PetscCall(DMDACreate2d(comm, DM_BOUNDARY_PERIODIC, DM_BOUNDARY_PERIODIC, DMDA_STENCIL_BOX, N, M, PETSC_DETERMINE, PETSC_DETERMINE, sizeof(Node) / sizeof(PetscScalar), 1, 0, 0, &da));
1473:       PetscCall(DMSetFromOptions(da));
1474:       PetscCall(DMSetUp(da));
1475:       da->ops->refinehierarchy     = DMRefineHierarchy_THI;
1476:       da->ops->createinterpolation = DMCreateInterpolation_DA_THI;

1478:       PetscCall(PetscObjectCompose((PetscObject)da, "THI", (PetscObject)thi));
1479:     } else {
1480:       PetscCall(DMDACreate3d(comm, DM_BOUNDARY_NONE, DM_BOUNDARY_PERIODIC, DM_BOUNDARY_PERIODIC, DMDA_STENCIL_BOX, P, N, M, 1, PETSC_DETERMINE, PETSC_DETERMINE, sizeof(Node) / sizeof(PetscScalar), 1, 0, 0, 0, &da));
1481:       PetscCall(DMSetFromOptions(da));
1482:       PetscCall(DMSetUp(da));
1483:     }
1484:     PetscCall(DMDASetFieldName(da, 0, "x-velocity"));
1485:     PetscCall(DMDASetFieldName(da, 1, "y-velocity"));
1486:   }
1487:   PetscCall(THISetUpDM(thi, da));
1488:   if (thi->tridiagonal) da->ops->creatematrix = DMCreateMatrix_THI_Tridiagonal;

1490:   { /* Set the fine level matrix type if -da_refine */
1491:     PetscInt rlevel, clevel;
1492:     PetscCall(DMGetRefineLevel(da, &rlevel));
1493:     PetscCall(DMGetCoarsenLevel(da, &clevel));
1494:     if (rlevel - clevel > 0) PetscCall(DMSetMatType(da, thi->mattype));
1495:   }

1497:   PetscCall(DMDASNESSetFunctionLocal(da, ADD_VALUES, (DMDASNESFunctionFn *)THIFunctionLocal, thi));
1498:   if (thi->tridiagonal) {
1499:     PetscCall(DMDASNESSetJacobianLocal(da, (DMDASNESJacobianFn *)THIJacobianLocal_3D_Tridiagonal, thi));
1500:   } else {
1501:     PetscCall(DMDASNESSetJacobianLocal(da, (DMDASNESJacobianFn *)THIJacobianLocal_3D_Full, thi));
1502:   }
1503:   PetscCall(DMCoarsenHookAdd(da, DMCoarsenHook_THI, NULL, thi));
1504:   PetscCall(DMRefineHookAdd(da, DMRefineHook_THI, NULL, thi));

1506:   PetscCall(DMSetApplicationContext(da, thi));

1508:   PetscCall(SNESCreate(comm, &snes));
1509:   PetscCall(SNESSetDM(snes, da));
1510:   PetscCall(DMDestroy(&da));
1511:   PetscCall(SNESSetComputeInitialGuess(snes, THIInitial, NULL));
1512:   PetscCall(SNESSetFromOptions(snes));

1514:   PetscCall(SNESSolve(snes, NULL, NULL));

1516:   PetscCall(THISolveStatistics(thi, snes, 0, "Full"));

1518:   {
1519:     PetscBool flg;
1520:     char      filename[PETSC_MAX_PATH_LEN] = "";
1521:     PetscCall(PetscOptionsGetString(NULL, NULL, "-o", filename, sizeof(filename), &flg));
1522:     if (flg) {
1523:       Vec X;
1524:       DM  dm;
1525:       PetscCall(SNESGetSolution(snes, &X));
1526:       PetscCall(SNESGetDM(snes, &dm));
1527:       PetscCall(THIDAVecView_VTK_XML(thi, dm, X, filename));
1528:     }
1529:   }

1531:   PetscCall(DMDestroy(&da));
1532:   PetscCall(SNESDestroy(&snes));
1533:   PetscCall(THIDestroy(&thi));
1534:   PetscCall(PetscFinalize());
1535:   return 0;
1536: }

1538: /*TEST

1540:    build:
1541:       requires: !single

1543:    test:
1544:       args: -M 6 -P 4 -da_refine 1 -snes_monitor_short -snes_converged_reason -ksp_monitor_short -ksp_converged_reason -thi_mat_type sbaij -ksp_type fgmres -pc_type mg -pc_mg_type full -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mg_levels_pc_type icc

1546:    test:
1547:       suffix: 2
1548:       nsize: 2
1549:       args: -M 6 -P 4 -thi_hom z -snes_monitor_short -snes_converged_reason -ksp_monitor_short -ksp_converged_reason -thi_mat_type sbaij -ksp_type fgmres -pc_type mg -pc_mg_type full -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mg_levels_pc_type asm -mg_levels_pc_asm_blocks 6 -mg_levels_0_pc_type redundant -snes_grid_sequence 1 -mat_partitioning_type current -ksp_atol -1

1551:    test:
1552:       suffix: 3
1553:       nsize: 3
1554:       args: -M 7 -P 4 -thi_hom z -da_refine 1 -snes_monitor_short -snes_converged_reason -ksp_monitor_short -ksp_converged_reason -thi_mat_type baij -ksp_type fgmres -pc_type mg -pc_mg_type full -mg_levels_pc_asm_type restrict -mg_levels_pc_type asm -mg_levels_pc_asm_blocks 9 -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mat_partitioning_type current

1556:    test:
1557:       suffix: 4
1558:       nsize: 6
1559:       args: -M 4 -P 2 -da_refine_hierarchy_x 1,1,3 -da_refine_hierarchy_y 2,2,1 -da_refine_hierarchy_z 2,2,1 -snes_grid_sequence 3 -ksp_converged_reason -ksp_type fgmres -ksp_rtol 1e-2 -pc_type mg -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mg_levels_pc_type bjacobi -mg_levels_1_sub_pc_type cholesky -pc_mg_type multiplicative -snes_converged_reason -snes_stol 1e-12 -thi_L 80e3 -thi_alpha 0.05 -thi_friction_m 1 -thi_hom x -snes_view -mg_levels_0_pc_type redundant -mg_levels_0_ksp_type preonly -ksp_atol -1

1561:    test:
1562:       suffix: 5
1563:       nsize: 6
1564:       args: -M 12 -P 5 -snes_monitor_short -ksp_converged_reason -pc_type asm -pc_asm_type restrict -dm_mat_type {{aij baij sbaij}}

1566: TEST*/