Actual source code: ex48.c
1: static const char help[] = "Toy hydrostatic ice flow with multigrid in 3D.\n\
2: \n\
3: Solves the hydrostatic (aka Blatter/Pattyn/First Order) equations for ice sheet flow\n\
4: using multigrid. The ice uses a power-law rheology with \"Glen\" exponent 3 (corresponds\n\
5: to p=4/3 in a p-Laplacian). The focus is on ISMIP-HOM experiments which assume periodic\n\
6: boundary conditions in the x- and y-directions.\n\
7: \n\
8: Equations are rescaled so that the domain size and solution are O(1), details of this scaling\n\
9: can be controlled by the options -units_meter, -units_second, and -units_kilogram.\n\
10: \n\
11: A VTK StructuredGrid output file can be written using the option -o filename.vts\n\
12: \n\n";
14: /*
15: The equations for horizontal velocity (u,v) are
17: - [eta (4 u_x + 2 v_y)]_x - [eta (u_y + v_x)]_y - [eta u_z]_z + rho g s_x = 0
18: - [eta (4 v_y + 2 u_x)]_y - [eta (u_y + v_x)]_x - [eta v_z]_z + rho g s_y = 0
20: where
22: eta = B/2 (epsilon + gamma)^((p-2)/2)
24: is the nonlinear effective viscosity with regularization epsilon and hardness parameter B,
25: written in terms of the second invariant
27: gamma = u_x^2 + v_y^2 + u_x v_y + (1/4) (u_y + v_x)^2 + (1/4) u_z^2 + (1/4) v_z^2
29: The surface boundary conditions are the natural conditions. The basal boundary conditions
30: are either no-slip, or Navier (linear) slip with spatially variant friction coefficient beta^2.
32: In the code, the equations for (u,v) are multiplied through by 1/(rho g) so that residuals are O(1).
34: The discretization is Q1 finite elements, managed by a DMDA. The grid is never distorted in the
35: map (x,y) plane, but the bed and surface may be bumpy. This is handled as usual in FEM, through
36: the Jacobian of the coordinate transformation from a reference element to the physical element.
38: Since ice-flow is tightly coupled in the z-direction (within columns), the DMDA is managed
39: specially so that columns are never distributed, and are always contiguous in memory.
40: This amounts to reversing the meaning of X,Y,Z compared to the DMDA's internal interpretation,
41: and then indexing as vec[i][j][k]. The exotic coarse spaces require 2D DMDAs which are made to
42: use compatible domain decomposition relative to the 3D DMDAs.
44: There are two compile-time options:
46: NO_SSE2:
47: If the host supports SSE2, we use integration code that has been vectorized with SSE2
48: intrinsics, unless this macro is defined. The intrinsics speed up integration by about
49: 30% on my architecture (P8700, gcc-4.5 snapshot).
51: COMPUTE_LOWER_TRIANGULAR:
52: The element matrices we assemble are lower-triangular so it is not necessary to compute
53: all entries explicitly. If this macro is defined, the lower-triangular entries are
54: computed explicitly.
56: */
58: #if defined(PETSC_APPLE_FRAMEWORK)
59: #import <PETSc/petscsnes.h>
60: #import <PETSc/petsc/private/dmdaimpl.h> /* There is not yet a public interface to manipulate dm->ops */
61: #else
63: #include <petscsnes.h>
64: #include <petsc/private/dmdaimpl.h>
65: #endif
66: #include <ctype.h> /* toupper() */
68: #if defined(__cplusplus) || defined (PETSC_HAVE_WINDOWS_COMPILERS) || defined (__PGI)
69: /* c++ cannot handle [_restrict_] notation like C does */
70: #undef PETSC_RESTRICT
71: #define PETSC_RESTRICT
72: #endif
74: #if defined __SSE2__
75: # include <emmintrin.h>
76: #endif
78: /* The SSE2 kernels are only for PetscScalar=double on architectures that support it */
79: #if !defined NO_SSE2 \
80: && !defined PETSC_USE_COMPLEX \
81: && !defined PETSC_USE_REAL_SINGLE \
82: && !defined PETSC_USE_REAL___FLOAT128 \
83: && !defined PETSC_USE_REAL___FP16 \
84: && defined __SSE2__
85: #define USE_SSE2_KERNELS 1
86: #else
87: #define USE_SSE2_KERNELS 0
88: #endif
90: static PetscClassId THI_CLASSID;
92: typedef enum {QUAD_GAUSS,QUAD_LOBATTO} QuadratureType;
93: static const char *QuadratureTypes[] = {"gauss","lobatto","QuadratureType","QUAD_",0};
94: PETSC_UNUSED static const PetscReal HexQWeights[8] = {1,1,1,1,1,1,1,1};
95: PETSC_UNUSED static const PetscReal HexQNodes[] = {-0.57735026918962573, 0.57735026918962573};
96: #define G 0.57735026918962573
97: #define H (0.5*(1.+G))
98: #define L (0.5*(1.-G))
99: #define M (-0.5)
100: #define P (0.5)
101: /* Special quadrature: Lobatto in horizontal, Gauss in vertical */
102: static const PetscReal HexQInterp_Lobatto[8][8] = {{H,0,0,0,L,0,0,0},
103: {0,H,0,0,0,L,0,0},
104: {0,0,H,0,0,0,L,0},
105: {0,0,0,H,0,0,0,L},
106: {L,0,0,0,H,0,0,0},
107: {0,L,0,0,0,H,0,0},
108: {0,0,L,0,0,0,H,0},
109: {0,0,0,L,0,0,0,H}};
110: static const PetscReal HexQDeriv_Lobatto[8][8][3] = {
111: {{M*H,M*H,M},{P*H,0,0} ,{0,0,0} ,{0,P*H,0} ,{M*L,M*L,P},{P*L,0,0} ,{0,0,0} ,{0,P*L,0} },
112: {{M*H,0,0} ,{P*H,M*H,M},{0,P*H,0} ,{0,0,0} ,{M*L,0,0} ,{P*L,M*L,P},{0,P*L,0} ,{0,0,0} },
113: {{0,0,0} ,{0,M*H,0} ,{P*H,P*H,M},{M*H,0,0} ,{0,0,0} ,{0,M*L,0} ,{P*L,P*L,P},{M*L,0,0} },
114: {{0,M*H,0} ,{0,0,0} ,{P*H,0,0} ,{M*H,P*H,M},{0,M*L,0} ,{0,0,0} ,{P*L,0,0} ,{M*L,P*L,P}},
115: {{M*L,M*L,M},{P*L,0,0} ,{0,0,0} ,{0,P*L,0} ,{M*H,M*H,P},{P*H,0,0} ,{0,0,0} ,{0,P*H,0} },
116: {{M*L,0,0} ,{P*L,M*L,M},{0,P*L,0} ,{0,0,0} ,{M*H,0,0} ,{P*H,M*H,P},{0,P*H,0} ,{0,0,0} },
117: {{0,0,0} ,{0,M*L,0} ,{P*L,P*L,M},{M*L,0,0} ,{0,0,0} ,{0,M*H,0} ,{P*H,P*H,P},{M*H,0,0} },
118: {{0,M*L,0} ,{0,0,0} ,{P*L,0,0} ,{M*L,P*L,M},{0,M*H,0} ,{0,0,0} ,{P*H,0,0} ,{M*H,P*H,P}}};
119: /* Stanndard Gauss */
120: static const PetscReal HexQInterp_Gauss[8][8] = {{H*H*H,L*H*H,L*L*H,H*L*H, H*H*L,L*H*L,L*L*L,H*L*L},
121: {L*H*H,H*H*H,H*L*H,L*L*H, L*H*L,H*H*L,H*L*L,L*L*L},
122: {L*L*H,H*L*H,H*H*H,L*H*H, L*L*L,H*L*L,H*H*L,L*H*L},
123: {H*L*H,L*L*H,L*H*H,H*H*H, H*L*L,L*L*L,L*H*L,H*H*L},
124: {H*H*L,L*H*L,L*L*L,H*L*L, H*H*H,L*H*H,L*L*H,H*L*H},
125: {L*H*L,H*H*L,H*L*L,L*L*L, L*H*H,H*H*H,H*L*H,L*L*H},
126: {L*L*L,H*L*L,H*H*L,L*H*L, L*L*H,H*L*H,H*H*H,L*H*H},
127: {H*L*L,L*L*L,L*H*L,H*H*L, H*L*H,L*L*H,L*H*H,H*H*H}};
128: static const PetscReal HexQDeriv_Gauss[8][8][3] = {
129: {{M*H*H,H*M*H,H*H*M},{P*H*H,L*M*H,L*H*M},{P*L*H,L*P*H,L*L*M},{M*L*H,H*P*H,H*L*M}, {M*H*L,H*M*L,H*H*P},{P*H*L,L*M*L,L*H*P},{P*L*L,L*P*L,L*L*P},{M*L*L,H*P*L,H*L*P}},
130: {{M*H*H,L*M*H,L*H*M},{P*H*H,H*M*H,H*H*M},{P*L*H,H*P*H,H*L*M},{M*L*H,L*P*H,L*L*M}, {M*H*L,L*M*L,L*H*P},{P*H*L,H*M*L,H*H*P},{P*L*L,H*P*L,H*L*P},{M*L*L,L*P*L,L*L*P}},
131: {{M*L*H,L*M*H,L*L*M},{P*L*H,H*M*H,H*L*M},{P*H*H,H*P*H,H*H*M},{M*H*H,L*P*H,L*H*M}, {M*L*L,L*M*L,L*L*P},{P*L*L,H*M*L,H*L*P},{P*H*L,H*P*L,H*H*P},{M*H*L,L*P*L,L*H*P}},
132: {{M*L*H,H*M*H,H*L*M},{P*L*H,L*M*H,L*L*M},{P*H*H,L*P*H,L*H*M},{M*H*H,H*P*H,H*H*M}, {M*L*L,H*M*L,H*L*P},{P*L*L,L*M*L,L*L*P},{P*H*L,L*P*L,L*H*P},{M*H*L,H*P*L,H*H*P}},
133: {{M*H*L,H*M*L,H*H*M},{P*H*L,L*M*L,L*H*M},{P*L*L,L*P*L,L*L*M},{M*L*L,H*P*L,H*L*M}, {M*H*H,H*M*H,H*H*P},{P*H*H,L*M*H,L*H*P},{P*L*H,L*P*H,L*L*P},{M*L*H,H*P*H,H*L*P}},
134: {{M*H*L,L*M*L,L*H*M},{P*H*L,H*M*L,H*H*M},{P*L*L,H*P*L,H*L*M},{M*L*L,L*P*L,L*L*M}, {M*H*H,L*M*H,L*H*P},{P*H*H,H*M*H,H*H*P},{P*L*H,H*P*H,H*L*P},{M*L*H,L*P*H,L*L*P}},
135: {{M*L*L,L*M*L,L*L*M},{P*L*L,H*M*L,H*L*M},{P*H*L,H*P*L,H*H*M},{M*H*L,L*P*L,L*H*M}, {M*L*H,L*M*H,L*L*P},{P*L*H,H*M*H,H*L*P},{P*H*H,H*P*H,H*H*P},{M*H*H,L*P*H,L*H*P}},
136: {{M*L*L,H*M*L,H*L*M},{P*L*L,L*M*L,L*L*M},{P*H*L,L*P*L,L*H*M},{M*H*L,H*P*L,H*H*M}, {M*L*H,H*M*H,H*L*P},{P*L*H,L*M*H,L*L*P},{P*H*H,L*P*H,L*H*P},{M*H*H,H*P*H,H*H*P}}};
137: static const PetscReal (*HexQInterp)[8],(*HexQDeriv)[8][3];
138: /* Standard 2x2 Gauss quadrature for the bottom layer. */
139: static const PetscReal QuadQInterp[4][4] = {{H*H,L*H,L*L,H*L},
140: {L*H,H*H,H*L,L*L},
141: {L*L,H*L,H*H,L*H},
142: {H*L,L*L,L*H,H*H}};
143: static const PetscReal QuadQDeriv[4][4][2] = {
144: {{M*H,M*H},{P*H,M*L},{P*L,P*L},{M*L,P*H}},
145: {{M*H,M*L},{P*H,M*H},{P*L,P*H},{M*L,P*L}},
146: {{M*L,M*L},{P*L,M*H},{P*H,P*H},{M*H,P*L}},
147: {{M*L,M*H},{P*L,M*L},{P*H,P*L},{M*H,P*H}}};
148: #undef G
149: #undef H
150: #undef L
151: #undef M
152: #undef P
154: #define HexExtract(x,i,j,k,n) do { \
155: (n)[0] = (x)[i][j][k]; \
156: (n)[1] = (x)[i+1][j][k]; \
157: (n)[2] = (x)[i+1][j+1][k]; \
158: (n)[3] = (x)[i][j+1][k]; \
159: (n)[4] = (x)[i][j][k+1]; \
160: (n)[5] = (x)[i+1][j][k+1]; \
161: (n)[6] = (x)[i+1][j+1][k+1]; \
162: (n)[7] = (x)[i][j+1][k+1]; \
163: } while (0)
165: #define HexExtractRef(x,i,j,k,n) do { \
166: (n)[0] = &(x)[i][j][k]; \
167: (n)[1] = &(x)[i+1][j][k]; \
168: (n)[2] = &(x)[i+1][j+1][k]; \
169: (n)[3] = &(x)[i][j+1][k]; \
170: (n)[4] = &(x)[i][j][k+1]; \
171: (n)[5] = &(x)[i+1][j][k+1]; \
172: (n)[6] = &(x)[i+1][j+1][k+1]; \
173: (n)[7] = &(x)[i][j+1][k+1]; \
174: } while (0)
176: #define QuadExtract(x,i,j,n) do { \
177: (n)[0] = (x)[i][j]; \
178: (n)[1] = (x)[i+1][j]; \
179: (n)[2] = (x)[i+1][j+1]; \
180: (n)[3] = (x)[i][j+1]; \
181: } while (0)
183: static void HexGrad(const PetscReal dphi[][3],const PetscReal zn[],PetscReal dz[])
184: {
185: PetscInt i;
186: dz[0] = dz[1] = dz[2] = 0;
187: for (i=0; i<8; i++) {
188: dz[0] += dphi[i][0] * zn[i];
189: dz[1] += dphi[i][1] * zn[i];
190: dz[2] += dphi[i][2] * zn[i];
191: }
192: }
194: static void HexComputeGeometry(PetscInt q,PetscReal hx,PetscReal hy,const PetscReal dz[PETSC_RESTRICT],PetscReal phi[PETSC_RESTRICT],PetscReal dphi[PETSC_RESTRICT][3],PetscReal *PETSC_RESTRICT jw)
195: {
196: const PetscReal jac[3][3] = {{hx/2,0,0}, {0,hy/2,0}, {dz[0],dz[1],dz[2]}};
197: const PetscReal ijac[3][3] = {{1/jac[0][0],0,0}, {0,1/jac[1][1],0}, {-jac[2][0]/(jac[0][0]*jac[2][2]),-jac[2][1]/(jac[1][1]*jac[2][2]),1/jac[2][2]}};
198: const PetscReal jdet = jac[0][0]*jac[1][1]*jac[2][2];
199: PetscInt i;
201: for (i=0; i<8; i++) {
202: const PetscReal *dphir = HexQDeriv[q][i];
203: phi[i] = HexQInterp[q][i];
204: dphi[i][0] = dphir[0]*ijac[0][0] + dphir[1]*ijac[1][0] + dphir[2]*ijac[2][0];
205: dphi[i][1] = dphir[0]*ijac[0][1] + dphir[1]*ijac[1][1] + dphir[2]*ijac[2][1];
206: dphi[i][2] = dphir[0]*ijac[0][2] + dphir[1]*ijac[1][2] + dphir[2]*ijac[2][2];
207: }
208: *jw = 1.0 * jdet;
209: }
211: typedef struct _p_THI *THI;
212: typedef struct _n_Units *Units;
214: typedef struct {
215: PetscScalar u,v;
216: } Node;
218: typedef struct {
219: PetscScalar b; /* bed */
220: PetscScalar h; /* thickness */
221: PetscScalar beta2; /* friction */
222: } PrmNode;
224: typedef struct {
225: PetscReal min,max,cmin,cmax;
226: } PRange;
228: typedef enum {THIASSEMBLY_TRIDIAGONAL,THIASSEMBLY_FULL} THIAssemblyMode;
230: struct _p_THI {
231: PETSCHEADER(int);
232: void (*initialize)(THI,PetscReal x,PetscReal y,PrmNode *p);
233: PetscInt zlevels;
234: PetscReal Lx,Ly,Lz; /* Model domain */
235: PetscReal alpha; /* Bed angle */
236: Units units;
237: PetscReal dirichlet_scale;
238: PetscReal ssa_friction_scale;
239: PRange eta;
240: PRange beta2;
241: struct {
242: PetscReal Bd2,eps,exponent;
243: } viscosity;
244: struct {
245: PetscReal irefgam,eps2,exponent,refvel,epsvel;
246: } friction;
247: PetscReal rhog;
248: PetscBool no_slip;
249: PetscBool tridiagonal;
250: PetscBool coarse2d;
251: PetscBool verbose;
252: MatType mattype;
253: };
255: struct _n_Units {
256: /* fundamental */
257: PetscReal meter;
258: PetscReal kilogram;
259: PetscReal second;
260: /* derived */
261: PetscReal Pascal;
262: PetscReal year;
263: };
265: static PetscErrorCode THIJacobianLocal_3D_Full(DMDALocalInfo*,Node***,Mat,Mat,THI);
266: static PetscErrorCode THIJacobianLocal_3D_Tridiagonal(DMDALocalInfo*,Node***,Mat,Mat,THI);
267: static PetscErrorCode THIJacobianLocal_2D(DMDALocalInfo*,Node**,Mat,Mat,THI);
269: static void PrmHexGetZ(const PrmNode pn[],PetscInt k,PetscInt zm,PetscReal zn[])
270: {
271: const PetscScalar zm1 = zm-1,
272: znl[8] = {pn[0].b + pn[0].h*(PetscScalar)k/zm1,
273: pn[1].b + pn[1].h*(PetscScalar)k/zm1,
274: pn[2].b + pn[2].h*(PetscScalar)k/zm1,
275: pn[3].b + pn[3].h*(PetscScalar)k/zm1,
276: pn[0].b + pn[0].h*(PetscScalar)(k+1)/zm1,
277: pn[1].b + pn[1].h*(PetscScalar)(k+1)/zm1,
278: pn[2].b + pn[2].h*(PetscScalar)(k+1)/zm1,
279: pn[3].b + pn[3].h*(PetscScalar)(k+1)/zm1};
280: PetscInt i;
281: for (i=0; i<8; i++) zn[i] = PetscRealPart(znl[i]);
282: }
284: /* Tests A and C are from the ISMIP-HOM paper (Pattyn et al. 2008) */
285: static void THIInitialize_HOM_A(THI thi,PetscReal x,PetscReal y,PrmNode *p)
286: {
287: Units units = thi->units;
288: PetscReal s = -x*PetscSinReal(thi->alpha);
290: p->b = s - 1000*units->meter + 500*units->meter * PetscSinReal(x*2*PETSC_PI/thi->Lx) * PetscSinReal(y*2*PETSC_PI/thi->Ly);
291: p->h = s - p->b;
292: p->beta2 = 1e30;
293: }
295: static void THIInitialize_HOM_C(THI thi,PetscReal x,PetscReal y,PrmNode *p)
296: {
297: Units units = thi->units;
298: PetscReal s = -x*PetscSinReal(thi->alpha);
300: p->b = s - 1000*units->meter;
301: p->h = s - p->b;
302: /* tau_b = beta2 v is a stress (Pa) */
303: p->beta2 = 1000 * (1 + PetscSinReal(x*2*PETSC_PI/thi->Lx)*PetscSinReal(y*2*PETSC_PI/thi->Ly)) * units->Pascal * units->year / units->meter;
304: }
306: /* These are just toys */
308: /* Same bed as test A, free slip everywhere except for a discontinuous jump to a circular sticky region in the middle. */
309: static void THIInitialize_HOM_X(THI thi,PetscReal xx,PetscReal yy,PrmNode *p)
310: {
311: Units units = thi->units;
312: PetscReal x = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */
313: PetscReal r = PetscSqrtReal(x*x + y*y),s = -x*PetscSinReal(thi->alpha);
314: p->b = s - 1000*units->meter + 500*units->meter*PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
315: p->h = s - p->b;
316: p->beta2 = 1000 * (r < 1 ? 2 : 0) * units->Pascal * units->year / units->meter;
317: }
319: /* Like Z, but with 200 meter cliffs */
320: static void THIInitialize_HOM_Y(THI thi,PetscReal xx,PetscReal yy,PrmNode *p)
321: {
322: Units units = thi->units;
323: PetscReal x = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */
324: PetscReal r = PetscSqrtReal(x*x + y*y),s = -x*PetscSinReal(thi->alpha);
326: p->b = s - 1000*units->meter + 500*units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
327: if (PetscRealPart(p->b) > -700*units->meter) p->b += 200*units->meter;
328: p->h = s - p->b;
329: p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16*r))/PetscSqrtReal(1e-2 + 16*r)*PetscCosReal(x*3/2)*PetscCosReal(y*3/2)) * units->Pascal * units->year / units->meter;
330: }
332: /* Same bed as A, smoothly varying slipperiness, similar to MATLAB's "sombrero" (uncorrelated with bathymetry) */
333: static void THIInitialize_HOM_Z(THI thi,PetscReal xx,PetscReal yy,PrmNode *p)
334: {
335: Units units = thi->units;
336: PetscReal x = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */
337: PetscReal r = PetscSqrtReal(x*x + y*y),s = -x*PetscSinReal(thi->alpha);
339: p->b = s - 1000*units->meter + 500*units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
340: p->h = s - p->b;
341: p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16*r))/PetscSqrtReal(1e-2 + 16*r)*PetscCosReal(x*3/2)*PetscCosReal(y*3/2)) * units->Pascal * units->year / units->meter;
342: }
344: static void THIFriction(THI thi,PetscReal rbeta2,PetscReal gam,PetscReal *beta2,PetscReal *dbeta2)
345: {
346: if (thi->friction.irefgam == 0) {
347: Units units = thi->units;
348: thi->friction.irefgam = 1./(0.5*PetscSqr(thi->friction.refvel * units->meter / units->year));
349: thi->friction.eps2 = 0.5*PetscSqr(thi->friction.epsvel * units->meter / units->year) * thi->friction.irefgam;
350: }
351: if (thi->friction.exponent == 0) {
352: *beta2 = rbeta2;
353: *dbeta2 = 0;
354: } else {
355: *beta2 = rbeta2 * PetscPowReal(thi->friction.eps2 + gam*thi->friction.irefgam,thi->friction.exponent);
356: *dbeta2 = thi->friction.exponent * *beta2 / (thi->friction.eps2 + gam*thi->friction.irefgam) * thi->friction.irefgam;
357: }
358: }
360: static void THIViscosity(THI thi,PetscReal gam,PetscReal *eta,PetscReal *deta)
361: {
362: PetscReal Bd2,eps,exponent;
363: if (thi->viscosity.Bd2 == 0) {
364: Units units = thi->units;
365: const PetscReal
366: n = 3., /* Glen exponent */
367: p = 1. + 1./n, /* for Stokes */
368: A = 1.e-16 * PetscPowReal(units->Pascal,-n) / units->year, /* softness parameter (Pa^{-n}/s) */
369: B = PetscPowReal(A,-1./n); /* hardness parameter */
370: thi->viscosity.Bd2 = B/2;
371: thi->viscosity.exponent = (p-2)/2;
372: thi->viscosity.eps = 0.5*PetscSqr(1e-5 / units->year);
373: }
374: Bd2 = thi->viscosity.Bd2;
375: exponent = thi->viscosity.exponent;
376: eps = thi->viscosity.eps;
377: *eta = Bd2 * PetscPowReal(eps + gam,exponent);
378: *deta = exponent * (*eta) / (eps + gam);
379: }
381: static void RangeUpdate(PetscReal *min,PetscReal *max,PetscReal x)
382: {
383: if (x < *min) *min = x;
384: if (x > *max) *max = x;
385: }
387: static void PRangeClear(PRange *p)
388: {
389: p->cmin = p->min = 1e100;
390: p->cmax = p->max = -1e100;
391: }
393: static PetscErrorCode PRangeMinMax(PRange *p,PetscReal min,PetscReal max)
394: {
396: p->cmin = min;
397: p->cmax = max;
398: if (min < p->min) p->min = min;
399: if (max > p->max) p->max = max;
400: return 0;
401: }
403: static PetscErrorCode THIDestroy(THI *thi)
404: {
406: if (!*thi) return 0;
407: if (--((PetscObject)(*thi))->refct > 0) {*thi = 0; return 0;}
408: PetscFree((*thi)->units);
409: PetscFree((*thi)->mattype);
410: PetscHeaderDestroy(thi);
411: return 0;
412: }
414: static PetscErrorCode THICreate(MPI_Comm comm,THI *inthi)
415: {
416: static PetscBool registered = PETSC_FALSE;
417: THI thi;
418: Units units;
419: PetscErrorCode ierr;
422: *inthi = 0;
423: if (!registered) {
424: PetscClassIdRegister("Toy Hydrostatic Ice",&THI_CLASSID);
425: registered = PETSC_TRUE;
426: }
427: PetscHeaderCreate(thi,THI_CLASSID,"THI","Toy Hydrostatic Ice","",comm,THIDestroy,0);
429: PetscNew(&thi->units);
430: units = thi->units;
431: units->meter = 1e-2;
432: units->second = 1e-7;
433: units->kilogram = 1e-12;
435: PetscOptionsBegin(comm,NULL,"Scaled units options","");
436: {
437: PetscOptionsReal("-units_meter","1 meter in scaled length units","",units->meter,&units->meter,NULL);
438: PetscOptionsReal("-units_second","1 second in scaled time units","",units->second,&units->second,NULL);
439: PetscOptionsReal("-units_kilogram","1 kilogram in scaled mass units","",units->kilogram,&units->kilogram,NULL);
440: }
441: PetscOptionsEnd();
442: units->Pascal = units->kilogram / (units->meter * PetscSqr(units->second));
443: units->year = 31556926. * units->second; /* seconds per year */
445: thi->Lx = 10.e3;
446: thi->Ly = 10.e3;
447: thi->Lz = 1000;
448: thi->dirichlet_scale = 1;
449: thi->verbose = PETSC_FALSE;
451: PetscOptionsBegin(comm,NULL,"Toy Hydrostatic Ice options","");
452: {
453: QuadratureType quad = QUAD_GAUSS;
454: char homexp[] = "A";
455: char mtype[256] = MATSBAIJ;
456: PetscReal L,m = 1.0;
457: PetscBool flg;
458: L = thi->Lx;
459: PetscOptionsReal("-thi_L","Domain size (m)","",L,&L,&flg);
460: if (flg) thi->Lx = thi->Ly = L;
461: PetscOptionsReal("-thi_Lx","X Domain size (m)","",thi->Lx,&thi->Lx,NULL);
462: PetscOptionsReal("-thi_Ly","Y Domain size (m)","",thi->Ly,&thi->Ly,NULL);
463: PetscOptionsReal("-thi_Lz","Z Domain size (m)","",thi->Lz,&thi->Lz,NULL);
464: PetscOptionsString("-thi_hom","ISMIP-HOM experiment (A or C)","",homexp,homexp,sizeof(homexp),NULL);
465: switch (homexp[0] = toupper(homexp[0])) {
466: case 'A':
467: thi->initialize = THIInitialize_HOM_A;
468: thi->no_slip = PETSC_TRUE;
469: thi->alpha = 0.5;
470: break;
471: case 'C':
472: thi->initialize = THIInitialize_HOM_C;
473: thi->no_slip = PETSC_FALSE;
474: thi->alpha = 0.1;
475: break;
476: case 'X':
477: thi->initialize = THIInitialize_HOM_X;
478: thi->no_slip = PETSC_FALSE;
479: thi->alpha = 0.3;
480: break;
481: case 'Y':
482: thi->initialize = THIInitialize_HOM_Y;
483: thi->no_slip = PETSC_FALSE;
484: thi->alpha = 0.5;
485: break;
486: case 'Z':
487: thi->initialize = THIInitialize_HOM_Z;
488: thi->no_slip = PETSC_FALSE;
489: thi->alpha = 0.5;
490: break;
491: default:
492: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"HOM experiment '%c' not implemented",homexp[0]);
493: }
494: PetscOptionsEnum("-thi_quadrature","Quadrature to use for 3D elements","",QuadratureTypes,(PetscEnum)quad,(PetscEnum*)&quad,NULL);
495: switch (quad) {
496: case QUAD_GAUSS:
497: HexQInterp = HexQInterp_Gauss;
498: HexQDeriv = HexQDeriv_Gauss;
499: break;
500: case QUAD_LOBATTO:
501: HexQInterp = HexQInterp_Lobatto;
502: HexQDeriv = HexQDeriv_Lobatto;
503: break;
504: }
505: PetscOptionsReal("-thi_alpha","Bed angle (degrees)","",thi->alpha,&thi->alpha,NULL);
507: thi->friction.refvel = 100.;
508: thi->friction.epsvel = 1.;
510: PetscOptionsReal("-thi_friction_refvel","Reference velocity for sliding","",thi->friction.refvel,&thi->friction.refvel,NULL);
511: PetscOptionsReal("-thi_friction_epsvel","Regularization velocity for sliding","",thi->friction.epsvel,&thi->friction.epsvel,NULL);
512: PetscOptionsReal("-thi_friction_m","Friction exponent, 0=Coulomb, 1=Navier","",m,&m,NULL);
514: thi->friction.exponent = (m-1)/2;
516: PetscOptionsReal("-thi_dirichlet_scale","Scale Dirichlet boundary conditions by this factor","",thi->dirichlet_scale,&thi->dirichlet_scale,NULL);
517: PetscOptionsReal("-thi_ssa_friction_scale","Scale slip boundary conditions by this factor in SSA (2D) assembly","",thi->ssa_friction_scale,&thi->ssa_friction_scale,NULL);
518: PetscOptionsBool("-thi_coarse2d","Use a 2D coarse space corresponding to SSA","",thi->coarse2d,&thi->coarse2d,NULL);
519: PetscOptionsBool("-thi_tridiagonal","Assemble a tridiagonal system (column coupling only) on the finest level","",thi->tridiagonal,&thi->tridiagonal,NULL);
520: PetscOptionsFList("-thi_mat_type","Matrix type","MatSetType",MatList,mtype,(char*)mtype,sizeof(mtype),NULL);
521: PetscStrallocpy(mtype,(char**)&thi->mattype);
522: PetscOptionsBool("-thi_verbose","Enable verbose output (like matrix sizes and statistics)","",thi->verbose,&thi->verbose,NULL);
523: }
524: PetscOptionsEnd();
526: /* dimensionalize */
527: thi->Lx *= units->meter;
528: thi->Ly *= units->meter;
529: thi->Lz *= units->meter;
530: thi->alpha *= PETSC_PI / 180;
532: PRangeClear(&thi->eta);
533: PRangeClear(&thi->beta2);
535: {
536: PetscReal u = 1000*units->meter/(3e7*units->second),
537: gradu = u / (100*units->meter),eta,deta,
538: rho = 910 * units->kilogram/PetscPowReal(units->meter,3),
539: grav = 9.81 * units->meter/PetscSqr(units->second),
540: driving = rho * grav * PetscSinReal(thi->alpha) * 1000*units->meter;
541: THIViscosity(thi,0.5*gradu*gradu,&eta,&deta);
542: thi->rhog = rho * grav;
543: if (thi->verbose) {
544: PetscPrintf(PetscObjectComm((PetscObject)thi),"Units: meter %8.2g second %8.2g kg %8.2g Pa %8.2g\n",(double)units->meter,(double)units->second,(double)units->kilogram,(double)units->Pascal);
545: PetscPrintf(PetscObjectComm((PetscObject)thi),"Domain (%6.2g,%6.2g,%6.2g), pressure %8.2g, driving stress %8.2g\n",(double)thi->Lx,(double)thi->Ly,(double)thi->Lz,(double)(rho*grav*1e3*units->meter),(double)driving);
546: PetscPrintf(PetscObjectComm((PetscObject)thi),"Large velocity 1km/a %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n",(double)u,(double)gradu,(double)eta,(double)(2*eta*gradu),(double)(2*eta*gradu/driving));
547: THIViscosity(thi,0.5*PetscSqr(1e-3*gradu),&eta,&deta);
548: PetscPrintf(PetscObjectComm((PetscObject)thi),"Small velocity 1m/a %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n",(double)(1e-3*u),(double)(1e-3*gradu),(double)eta,(double)(2*eta*1e-3*gradu),(double)(2*eta*1e-3*gradu/driving));
549: }
550: }
552: *inthi = thi;
553: return 0;
554: }
556: static PetscErrorCode THIInitializePrm(THI thi,DM da2prm,Vec prm)
557: {
558: PrmNode **p;
559: PetscInt i,j,xs,xm,ys,ym,mx,my;
562: DMDAGetGhostCorners(da2prm,&ys,&xs,0,&ym,&xm,0);
563: DMDAGetInfo(da2prm,0, &my,&mx,0, 0,0,0, 0,0,0,0,0,0);
564: DMDAVecGetArray(da2prm,prm,&p);
565: for (i=xs; i<xs+xm; i++) {
566: for (j=ys; j<ys+ym; j++) {
567: PetscReal xx = thi->Lx*i/mx,yy = thi->Ly*j/my;
568: thi->initialize(thi,xx,yy,&p[i][j]);
569: }
570: }
571: DMDAVecRestoreArray(da2prm,prm,&p);
572: return 0;
573: }
575: static PetscErrorCode THISetUpDM(THI thi,DM dm)
576: {
577: PetscInt refinelevel,coarsenlevel,level,dim,Mx,My,Mz,mx,my,s;
578: DMDAStencilType st;
579: DM da2prm;
580: Vec X;
583: DMDAGetInfo(dm,&dim, &Mz,&My,&Mx, 0,&my,&mx, 0,&s,0,0,0,&st);
584: if (dim == 2) {
585: DMDAGetInfo(dm,&dim, &My,&Mx,0, &my,&mx,0, 0,&s,0,0,0,&st);
586: }
587: DMGetRefineLevel(dm,&refinelevel);
588: DMGetCoarsenLevel(dm,&coarsenlevel);
589: level = refinelevel - coarsenlevel;
590: DMDACreate2d(PetscObjectComm((PetscObject)thi),DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC,st,My,Mx,my,mx,sizeof(PrmNode)/sizeof(PetscScalar),s,0,0,&da2prm);
591: DMSetUp(da2prm);
592: DMCreateLocalVector(da2prm,&X);
593: {
594: PetscReal Lx = thi->Lx / thi->units->meter,Ly = thi->Ly / thi->units->meter,Lz = thi->Lz / thi->units->meter;
595: if (dim == 2) {
596: PetscPrintf(PetscObjectComm((PetscObject)thi),"Level %D domain size (m) %8.2g x %8.2g, num elements %D x %D (%D), size (m) %g x %g\n",level,(double)Lx,(double)Ly,Mx,My,Mx*My,(double)(Lx/Mx),(double)(Ly/My));
597: } else {
598: PetscPrintf(PetscObjectComm((PetscObject)thi),"Level %D domain size (m) %8.2g x %8.2g x %8.2g, num elements %D x %D x %D (%D), size (m) %g x %g x %g\n",level,(double)Lx,(double)Ly,(double)Lz,Mx,My,Mz,Mx*My*Mz,(double)(Lx/Mx),(double)(Ly/My),(double)(1000./(Mz-1)));
599: }
600: }
601: THIInitializePrm(thi,da2prm,X);
602: if (thi->tridiagonal) { /* Reset coarse Jacobian evaluation */
603: DMDASNESSetJacobianLocal(dm,(DMDASNESJacobian)THIJacobianLocal_3D_Full,thi);
604: }
605: if (thi->coarse2d) {
606: DMDASNESSetJacobianLocal(dm,(DMDASNESJacobian)THIJacobianLocal_2D,thi);
607: }
608: PetscObjectCompose((PetscObject)dm,"DMDA2Prm",(PetscObject)da2prm);
609: PetscObjectCompose((PetscObject)dm,"DMDA2Prm_Vec",(PetscObject)X);
610: DMDestroy(&da2prm);
611: VecDestroy(&X);
612: return 0;
613: }
615: static PetscErrorCode DMCoarsenHook_THI(DM dmf,DM dmc,void *ctx)
616: {
617: THI thi = (THI)ctx;
618: PetscInt rlevel,clevel;
621: THISetUpDM(thi,dmc);
622: DMGetRefineLevel(dmc,&rlevel);
623: DMGetCoarsenLevel(dmc,&clevel);
624: if (rlevel-clevel == 0) DMSetMatType(dmc,MATAIJ);
625: DMCoarsenHookAdd(dmc,DMCoarsenHook_THI,NULL,thi);
626: return 0;
627: }
629: static PetscErrorCode DMRefineHook_THI(DM dmc,DM dmf,void *ctx)
630: {
631: THI thi = (THI)ctx;
634: THISetUpDM(thi,dmf);
635: DMSetMatType(dmf,thi->mattype);
636: DMRefineHookAdd(dmf,DMRefineHook_THI,NULL,thi);
637: /* With grid sequencing, a formerly-refined DM will later be coarsened by PCSetUp_MG */
638: DMCoarsenHookAdd(dmf,DMCoarsenHook_THI,NULL,thi);
639: return 0;
640: }
642: static PetscErrorCode THIDAGetPrm(DM da,PrmNode ***prm)
643: {
644: DM da2prm;
645: Vec X;
648: PetscObjectQuery((PetscObject)da,"DMDA2Prm",(PetscObject*)&da2prm);
650: PetscObjectQuery((PetscObject)da,"DMDA2Prm_Vec",(PetscObject*)&X);
652: DMDAVecGetArray(da2prm,X,prm);
653: return 0;
654: }
656: static PetscErrorCode THIDARestorePrm(DM da,PrmNode ***prm)
657: {
658: DM da2prm;
659: Vec X;
662: PetscObjectQuery((PetscObject)da,"DMDA2Prm",(PetscObject*)&da2prm);
664: PetscObjectQuery((PetscObject)da,"DMDA2Prm_Vec",(PetscObject*)&X);
666: DMDAVecRestoreArray(da2prm,X,prm);
667: return 0;
668: }
670: static PetscErrorCode THIInitial(SNES snes,Vec X,void *ctx)
671: {
672: THI thi;
673: PetscInt i,j,k,xs,xm,ys,ym,zs,zm,mx,my;
674: PetscReal hx,hy;
675: PrmNode **prm;
676: Node ***x;
677: DM da;
680: SNESGetDM(snes,&da);
681: DMGetApplicationContext(da,&thi);
682: DMDAGetInfo(da,0, 0,&my,&mx, 0,0,0, 0,0,0,0,0,0);
683: DMDAGetCorners(da,&zs,&ys,&xs,&zm,&ym,&xm);
684: DMDAVecGetArray(da,X,&x);
685: THIDAGetPrm(da,&prm);
686: hx = thi->Lx / mx;
687: hy = thi->Ly / my;
688: for (i=xs; i<xs+xm; i++) {
689: for (j=ys; j<ys+ym; j++) {
690: for (k=zs; k<zs+zm; k++) {
691: const PetscScalar zm1 = zm-1,
692: drivingx = thi->rhog * (prm[i+1][j].b+prm[i+1][j].h - prm[i-1][j].b-prm[i-1][j].h) / (2*hx),
693: drivingy = thi->rhog * (prm[i][j+1].b+prm[i][j+1].h - prm[i][j-1].b-prm[i][j-1].h) / (2*hy);
694: x[i][j][k].u = 0. * drivingx * prm[i][j].h*(PetscScalar)k/zm1;
695: x[i][j][k].v = 0. * drivingy * prm[i][j].h*(PetscScalar)k/zm1;
696: }
697: }
698: }
699: DMDAVecRestoreArray(da,X,&x);
700: THIDARestorePrm(da,&prm);
701: return 0;
702: }
704: static void PointwiseNonlinearity(THI thi,const Node n[PETSC_RESTRICT],const PetscReal phi[PETSC_RESTRICT],PetscReal dphi[PETSC_RESTRICT][3],PetscScalar *PETSC_RESTRICT u,PetscScalar *PETSC_RESTRICT v,PetscScalar du[PETSC_RESTRICT],PetscScalar dv[PETSC_RESTRICT],PetscReal *eta,PetscReal *deta)
705: {
706: PetscInt l,ll;
707: PetscScalar gam;
709: du[0] = du[1] = du[2] = 0;
710: dv[0] = dv[1] = dv[2] = 0;
711: *u = 0;
712: *v = 0;
713: for (l=0; l<8; l++) {
714: *u += phi[l] * n[l].u;
715: *v += phi[l] * n[l].v;
716: for (ll=0; ll<3; ll++) {
717: du[ll] += dphi[l][ll] * n[l].u;
718: dv[ll] += dphi[l][ll] * n[l].v;
719: }
720: }
721: gam = PetscSqr(du[0]) + PetscSqr(dv[1]) + du[0]*dv[1] + 0.25*PetscSqr(du[1]+dv[0]) + 0.25*PetscSqr(du[2]) + 0.25*PetscSqr(dv[2]);
722: THIViscosity(thi,PetscRealPart(gam),eta,deta);
723: }
725: static void PointwiseNonlinearity2D(THI thi,Node n[],PetscReal phi[],PetscReal dphi[4][2],PetscScalar *u,PetscScalar *v,PetscScalar du[],PetscScalar dv[],PetscReal *eta,PetscReal *deta)
726: {
727: PetscInt l,ll;
728: PetscScalar gam;
730: du[0] = du[1] = 0;
731: dv[0] = dv[1] = 0;
732: *u = 0;
733: *v = 0;
734: for (l=0; l<4; l++) {
735: *u += phi[l] * n[l].u;
736: *v += phi[l] * n[l].v;
737: for (ll=0; ll<2; ll++) {
738: du[ll] += dphi[l][ll] * n[l].u;
739: dv[ll] += dphi[l][ll] * n[l].v;
740: }
741: }
742: gam = PetscSqr(du[0]) + PetscSqr(dv[1]) + du[0]*dv[1] + 0.25*PetscSqr(du[1]+dv[0]);
743: THIViscosity(thi,PetscRealPart(gam),eta,deta);
744: }
746: static PetscErrorCode THIFunctionLocal(DMDALocalInfo *info,Node ***x,Node ***f,THI thi)
747: {
748: PetscInt xs,ys,xm,ym,zm,i,j,k,q,l;
749: PetscReal hx,hy,etamin,etamax,beta2min,beta2max;
750: PrmNode **prm;
753: xs = info->zs;
754: ys = info->ys;
755: xm = info->zm;
756: ym = info->ym;
757: zm = info->xm;
758: hx = thi->Lx / info->mz;
759: hy = thi->Ly / info->my;
761: etamin = 1e100;
762: etamax = 0;
763: beta2min = 1e100;
764: beta2max = 0;
766: THIDAGetPrm(info->da,&prm);
768: for (i=xs; i<xs+xm; i++) {
769: for (j=ys; j<ys+ym; j++) {
770: PrmNode pn[4];
771: QuadExtract(prm,i,j,pn);
772: for (k=0; k<zm-1; k++) {
773: PetscInt ls = 0;
774: Node n[8],*fn[8];
775: PetscReal zn[8],etabase = 0;
776: PrmHexGetZ(pn,k,zm,zn);
777: HexExtract(x,i,j,k,n);
778: HexExtractRef(f,i,j,k,fn);
779: if (thi->no_slip && k == 0) {
780: for (l=0; l<4; l++) n[l].u = n[l].v = 0;
781: /* The first 4 basis functions lie on the bottom layer, so their contribution is exactly 0, hence we can skip them */
782: ls = 4;
783: }
784: for (q=0; q<8; q++) {
785: PetscReal dz[3],phi[8],dphi[8][3],jw,eta,deta;
786: PetscScalar du[3],dv[3],u,v;
787: HexGrad(HexQDeriv[q],zn,dz);
788: HexComputeGeometry(q,hx,hy,dz,phi,dphi,&jw);
789: PointwiseNonlinearity(thi,n,phi,dphi,&u,&v,du,dv,&eta,&deta);
790: jw /= thi->rhog; /* scales residuals to be O(1) */
791: if (q == 0) etabase = eta;
792: RangeUpdate(&etamin,&etamax,eta);
793: for (l=ls; l<8; l++) { /* test functions */
794: const PetscReal ds[2] = {-PetscSinReal(thi->alpha),0};
795: const PetscReal pp = phi[l],*dp = dphi[l];
796: fn[l]->u += dp[0]*jw*eta*(4.*du[0]+2.*dv[1]) + dp[1]*jw*eta*(du[1]+dv[0]) + dp[2]*jw*eta*du[2] + pp*jw*thi->rhog*ds[0];
797: fn[l]->v += dp[1]*jw*eta*(2.*du[0]+4.*dv[1]) + dp[0]*jw*eta*(du[1]+dv[0]) + dp[2]*jw*eta*dv[2] + pp*jw*thi->rhog*ds[1];
798: }
799: }
800: if (k == 0) { /* we are on a bottom face */
801: if (thi->no_slip) {
802: /* Note: Non-Galerkin coarse grid operators are very sensitive to the scaling of Dirichlet boundary
803: * conditions. After shenanigans above, etabase contains the effective viscosity at the closest quadrature
804: * point to the bed. We want the diagonal entry in the Dirichlet condition to have similar magnitude to the
805: * diagonal entry corresponding to the adjacent node. The fundamental scaling of the viscous part is in
806: * diagu, diagv below. This scaling is easy to recognize by considering the finite difference operator after
807: * scaling by element size. The no-slip Dirichlet condition is scaled by this factor, and also in the
808: * assembled matrix (see the similar block in THIJacobianLocal).
809: *
810: * Note that the residual at this Dirichlet node is linear in the state at this node, but also depends
811: * (nonlinearly in general) on the neighboring interior nodes through the local viscosity. This will make
812: * a matrix-free Jacobian have extra entries in the corresponding row. We assemble only the diagonal part,
813: * so the solution will exactly satisfy the boundary condition after the first linear iteration.
814: */
815: const PetscReal hz = PetscRealPart(pn[0].h)/(zm-1.);
816: const PetscScalar diagu = 2*etabase/thi->rhog*(hx*hy/hz + hx*hz/hy + 4*hy*hz/hx),diagv = 2*etabase/thi->rhog*(hx*hy/hz + 4*hx*hz/hy + hy*hz/hx);
817: fn[0]->u = thi->dirichlet_scale*diagu*x[i][j][k].u;
818: fn[0]->v = thi->dirichlet_scale*diagv*x[i][j][k].v;
819: } else { /* Integrate over bottom face to apply boundary condition */
820: for (q=0; q<4; q++) {
821: const PetscReal jw = 0.25*hx*hy/thi->rhog,*phi = QuadQInterp[q];
822: PetscScalar u =0,v=0,rbeta2=0;
823: PetscReal beta2,dbeta2;
824: for (l=0; l<4; l++) {
825: u += phi[l]*n[l].u;
826: v += phi[l]*n[l].v;
827: rbeta2 += phi[l]*pn[l].beta2;
828: }
829: THIFriction(thi,PetscRealPart(rbeta2),PetscRealPart(u*u+v*v)/2,&beta2,&dbeta2);
830: RangeUpdate(&beta2min,&beta2max,beta2);
831: for (l=0; l<4; l++) {
832: const PetscReal pp = phi[l];
833: fn[ls+l]->u += pp*jw*beta2*u;
834: fn[ls+l]->v += pp*jw*beta2*v;
835: }
836: }
837: }
838: }
839: }
840: }
841: }
843: THIDARestorePrm(info->da,&prm);
845: PRangeMinMax(&thi->eta,etamin,etamax);
846: PRangeMinMax(&thi->beta2,beta2min,beta2max);
847: return 0;
848: }
850: static PetscErrorCode THIMatrixStatistics(THI thi,Mat B,PetscViewer viewer)
851: {
852: PetscReal nrm;
853: PetscInt m;
854: PetscMPIInt rank;
857: MatNorm(B,NORM_FROBENIUS,&nrm);
858: MatGetSize(B,&m,0);
859: MPI_Comm_rank(PetscObjectComm((PetscObject)B),&rank);
860: if (rank == 0) {
861: PetscScalar val0,val2;
862: MatGetValue(B,0,0,&val0);
863: MatGetValue(B,2,2,&val2);
864: PetscViewerASCIIPrintf(viewer,"Matrix dim %D norm %8.2e (0,0) %8.2e (2,2) %8.2e %8.2e <= eta <= %8.2e %8.2e <= beta2 <= %8.2e\n",m,(double)nrm,(double)PetscRealPart(val0),(double)PetscRealPart(val2),(double)thi->eta.cmin,(double)thi->eta.cmax,(double)thi->beta2.cmin,(double)thi->beta2.cmax);
865: }
866: return 0;
867: }
869: static PetscErrorCode THISurfaceStatistics(DM da,Vec X,PetscReal *min,PetscReal *max,PetscReal *mean)
870: {
871: Node ***x;
872: PetscInt i,j,xs,ys,zs,xm,ym,zm,mx,my,mz;
873: PetscReal umin = 1e100,umax=-1e100;
874: PetscScalar usum = 0.0,gusum;
877: *min = *max = *mean = 0;
878: DMDAGetInfo(da,0, &mz,&my,&mx, 0,0,0, 0,0,0,0,0,0);
879: DMDAGetCorners(da,&zs,&ys,&xs,&zm,&ym,&xm);
881: DMDAVecGetArray(da,X,&x);
882: for (i=xs; i<xs+xm; i++) {
883: for (j=ys; j<ys+ym; j++) {
884: PetscReal u = PetscRealPart(x[i][j][zm-1].u);
885: RangeUpdate(&umin,&umax,u);
886: usum += u;
887: }
888: }
889: DMDAVecRestoreArray(da,X,&x);
890: MPI_Allreduce(&umin,min,1,MPIU_REAL,MPIU_MIN,PetscObjectComm((PetscObject)da));
891: MPI_Allreduce(&umax,max,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)da));
892: MPI_Allreduce(&usum,&gusum,1,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)da));
893: *mean = PetscRealPart(gusum) / (mx*my);
894: return 0;
895: }
897: static PetscErrorCode THISolveStatistics(THI thi,SNES snes,PetscInt coarsened,const char name[])
898: {
899: MPI_Comm comm;
900: Vec X;
901: DM dm;
904: PetscObjectGetComm((PetscObject)thi,&comm);
905: SNESGetSolution(snes,&X);
906: SNESGetDM(snes,&dm);
907: PetscPrintf(comm,"Solution statistics after solve: %s\n",name);
908: {
909: PetscInt its,lits;
910: SNESConvergedReason reason;
911: SNESGetIterationNumber(snes,&its);
912: SNESGetConvergedReason(snes,&reason);
913: SNESGetLinearSolveIterations(snes,&lits);
914: PetscPrintf(comm,"%s: Number of SNES iterations = %D, total linear iterations = %D\n",SNESConvergedReasons[reason],its,lits);
915: }
916: {
917: PetscReal nrm2,tmin[3]={1e100,1e100,1e100},tmax[3]={-1e100,-1e100,-1e100},min[3],max[3];
918: PetscInt i,j,m;
919: const PetscScalar *x;
920: VecNorm(X,NORM_2,&nrm2);
921: VecGetLocalSize(X,&m);
922: VecGetArrayRead(X,&x);
923: for (i=0; i<m; i+=2) {
924: PetscReal u = PetscRealPart(x[i]),v = PetscRealPart(x[i+1]),c = PetscSqrtReal(u*u+v*v);
925: tmin[0] = PetscMin(u,tmin[0]);
926: tmin[1] = PetscMin(v,tmin[1]);
927: tmin[2] = PetscMin(c,tmin[2]);
928: tmax[0] = PetscMax(u,tmax[0]);
929: tmax[1] = PetscMax(v,tmax[1]);
930: tmax[2] = PetscMax(c,tmax[2]);
931: }
932: VecRestoreArrayRead(X,&x);
933: MPI_Allreduce(tmin,min,3,MPIU_REAL,MPIU_MIN,PetscObjectComm((PetscObject)thi));
934: MPI_Allreduce(tmax,max,3,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)thi));
935: /* Dimensionalize to meters/year */
936: nrm2 *= thi->units->year / thi->units->meter;
937: for (j=0; j<3; j++) {
938: min[j] *= thi->units->year / thi->units->meter;
939: max[j] *= thi->units->year / thi->units->meter;
940: }
941: if (min[0] == 0.0) min[0] = 0.0;
942: PetscPrintf(comm,"|X|_2 %g %g <= u <= %g %g <= v <= %g %g <= c <= %g \n",(double)nrm2,(double)min[0],(double)max[0],(double)min[1],(double)max[1],(double)min[2],(double)max[2]);
943: {
944: PetscReal umin,umax,umean;
945: THISurfaceStatistics(dm,X,&umin,&umax,&umean);
946: umin *= thi->units->year / thi->units->meter;
947: umax *= thi->units->year / thi->units->meter;
948: umean *= thi->units->year / thi->units->meter;
949: PetscPrintf(comm,"Surface statistics: u in [%12.6e, %12.6e] mean %12.6e\n",(double)umin,(double)umax,(double)umean);
950: }
951: /* These values stay nondimensional */
952: PetscPrintf(comm,"Global eta range %g to %g converged range %g to %g\n",(double)thi->eta.min,(double)thi->eta.max,(double)thi->eta.cmin,(double)thi->eta.cmax);
953: PetscPrintf(comm,"Global beta2 range %g to %g converged range %g to %g\n",(double)thi->beta2.min,(double)thi->beta2.max,(double)thi->beta2.cmin,(double)thi->beta2.cmax);
954: }
955: return 0;
956: }
958: static PetscErrorCode THIJacobianLocal_2D(DMDALocalInfo *info,Node **x,Mat J,Mat B,THI thi)
959: {
960: PetscInt xs,ys,xm,ym,i,j,q,l,ll;
961: PetscReal hx,hy;
962: PrmNode **prm;
965: xs = info->ys;
966: ys = info->xs;
967: xm = info->ym;
968: ym = info->xm;
969: hx = thi->Lx / info->my;
970: hy = thi->Ly / info->mx;
972: MatZeroEntries(B);
973: THIDAGetPrm(info->da,&prm);
975: for (i=xs; i<xs+xm; i++) {
976: for (j=ys; j<ys+ym; j++) {
977: Node n[4];
978: PrmNode pn[4];
979: PetscScalar Ke[4*2][4*2];
980: QuadExtract(prm,i,j,pn);
981: QuadExtract(x,i,j,n);
982: PetscMemzero(Ke,sizeof(Ke));
983: for (q=0; q<4; q++) {
984: PetscReal phi[4],dphi[4][2],jw,eta,deta,beta2,dbeta2;
985: PetscScalar u,v,du[2],dv[2],h = 0,rbeta2 = 0;
986: for (l=0; l<4; l++) {
987: phi[l] = QuadQInterp[q][l];
988: dphi[l][0] = QuadQDeriv[q][l][0]*2./hx;
989: dphi[l][1] = QuadQDeriv[q][l][1]*2./hy;
990: h += phi[l] * pn[l].h;
991: rbeta2 += phi[l] * pn[l].beta2;
992: }
993: jw = 0.25*hx*hy / thi->rhog; /* rhog is only scaling */
994: PointwiseNonlinearity2D(thi,n,phi,dphi,&u,&v,du,dv,&eta,&deta);
995: THIFriction(thi,PetscRealPart(rbeta2),PetscRealPart(u*u+v*v)/2,&beta2,&dbeta2);
996: for (l=0; l<4; l++) {
997: const PetscReal pp = phi[l],*dp = dphi[l];
998: for (ll=0; ll<4; ll++) {
999: const PetscReal ppl = phi[ll],*dpl = dphi[ll];
1000: PetscScalar dgdu,dgdv;
1001: dgdu = 2.*du[0]*dpl[0] + dv[1]*dpl[0] + 0.5*(du[1]+dv[0])*dpl[1];
1002: dgdv = 2.*dv[1]*dpl[1] + du[0]*dpl[1] + 0.5*(du[1]+dv[0])*dpl[0];
1003: /* Picard part */
1004: Ke[l*2+0][ll*2+0] += dp[0]*jw*eta*4.*dpl[0] + dp[1]*jw*eta*dpl[1] + pp*jw*(beta2/h)*ppl*thi->ssa_friction_scale;
1005: Ke[l*2+0][ll*2+1] += dp[0]*jw*eta*2.*dpl[1] + dp[1]*jw*eta*dpl[0];
1006: Ke[l*2+1][ll*2+0] += dp[1]*jw*eta*2.*dpl[0] + dp[0]*jw*eta*dpl[1];
1007: Ke[l*2+1][ll*2+1] += dp[1]*jw*eta*4.*dpl[1] + dp[0]*jw*eta*dpl[0] + pp*jw*(beta2/h)*ppl*thi->ssa_friction_scale;
1008: /* extra Newton terms */
1009: Ke[l*2+0][ll*2+0] += dp[0]*jw*deta*dgdu*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdu*(du[1]+dv[0]) + pp*jw*(dbeta2/h)*u*u*ppl*thi->ssa_friction_scale;
1010: Ke[l*2+0][ll*2+1] += dp[0]*jw*deta*dgdv*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdv*(du[1]+dv[0]) + pp*jw*(dbeta2/h)*u*v*ppl*thi->ssa_friction_scale;
1011: Ke[l*2+1][ll*2+0] += dp[1]*jw*deta*dgdu*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdu*(du[1]+dv[0]) + pp*jw*(dbeta2/h)*v*u*ppl*thi->ssa_friction_scale;
1012: Ke[l*2+1][ll*2+1] += dp[1]*jw*deta*dgdv*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdv*(du[1]+dv[0]) + pp*jw*(dbeta2/h)*v*v*ppl*thi->ssa_friction_scale;
1013: }
1014: }
1015: }
1016: {
1017: const MatStencil rc[4] = {{0,i,j,0},{0,i+1,j,0},{0,i+1,j+1,0},{0,i,j+1,0}};
1018: MatSetValuesBlockedStencil(B,4,rc,4,rc,&Ke[0][0],ADD_VALUES);
1019: }
1020: }
1021: }
1022: THIDARestorePrm(info->da,&prm);
1024: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1025: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1026: MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);
1027: if (thi->verbose) THIMatrixStatistics(thi,B,PETSC_VIEWER_STDOUT_WORLD);
1028: return 0;
1029: }
1031: static PetscErrorCode THIJacobianLocal_3D(DMDALocalInfo *info,Node ***x,Mat B,THI thi,THIAssemblyMode amode)
1032: {
1033: PetscInt xs,ys,xm,ym,zm,i,j,k,q,l,ll;
1034: PetscReal hx,hy;
1035: PrmNode **prm;
1038: xs = info->zs;
1039: ys = info->ys;
1040: xm = info->zm;
1041: ym = info->ym;
1042: zm = info->xm;
1043: hx = thi->Lx / info->mz;
1044: hy = thi->Ly / info->my;
1046: MatZeroEntries(B);
1047: MatSetOption(B,MAT_SUBSET_OFF_PROC_ENTRIES,PETSC_TRUE);
1048: THIDAGetPrm(info->da,&prm);
1050: for (i=xs; i<xs+xm; i++) {
1051: for (j=ys; j<ys+ym; j++) {
1052: PrmNode pn[4];
1053: QuadExtract(prm,i,j,pn);
1054: for (k=0; k<zm-1; k++) {
1055: Node n[8];
1056: PetscReal zn[8],etabase = 0;
1057: PetscScalar Ke[8*2][8*2];
1058: PetscInt ls = 0;
1060: PrmHexGetZ(pn,k,zm,zn);
1061: HexExtract(x,i,j,k,n);
1062: PetscMemzero(Ke,sizeof(Ke));
1063: if (thi->no_slip && k == 0) {
1064: for (l=0; l<4; l++) n[l].u = n[l].v = 0;
1065: ls = 4;
1066: }
1067: for (q=0; q<8; q++) {
1068: PetscReal dz[3],phi[8],dphi[8][3],jw,eta,deta;
1069: PetscScalar du[3],dv[3],u,v;
1070: HexGrad(HexQDeriv[q],zn,dz);
1071: HexComputeGeometry(q,hx,hy,dz,phi,dphi,&jw);
1072: PointwiseNonlinearity(thi,n,phi,dphi,&u,&v,du,dv,&eta,&deta);
1073: jw /= thi->rhog; /* residuals are scaled by this factor */
1074: if (q == 0) etabase = eta;
1075: for (l=ls; l<8; l++) { /* test functions */
1076: const PetscReal *PETSC_RESTRICT dp = dphi[l];
1077: #if USE_SSE2_KERNELS
1078: /* gcc (up to my 4.5 snapshot) is really bad at hoisting intrinsics so we do it manually */
1079: __m128d
1080: p4 = _mm_set1_pd(4),p2 = _mm_set1_pd(2),p05 = _mm_set1_pd(0.5),
1081: p42 = _mm_setr_pd(4,2),p24 = _mm_shuffle_pd(p42,p42,_MM_SHUFFLE2(0,1)),
1082: du0 = _mm_set1_pd(du[0]),du1 = _mm_set1_pd(du[1]),du2 = _mm_set1_pd(du[2]),
1083: dv0 = _mm_set1_pd(dv[0]),dv1 = _mm_set1_pd(dv[1]),dv2 = _mm_set1_pd(dv[2]),
1084: jweta = _mm_set1_pd(jw*eta),jwdeta = _mm_set1_pd(jw*deta),
1085: dp0 = _mm_set1_pd(dp[0]),dp1 = _mm_set1_pd(dp[1]),dp2 = _mm_set1_pd(dp[2]),
1086: dp0jweta = _mm_mul_pd(dp0,jweta),dp1jweta = _mm_mul_pd(dp1,jweta),dp2jweta = _mm_mul_pd(dp2,jweta),
1087: p4du0p2dv1 = _mm_add_pd(_mm_mul_pd(p4,du0),_mm_mul_pd(p2,dv1)), /* 4 du0 + 2 dv1 */
1088: p4dv1p2du0 = _mm_add_pd(_mm_mul_pd(p4,dv1),_mm_mul_pd(p2,du0)), /* 4 dv1 + 2 du0 */
1089: pdu2dv2 = _mm_unpacklo_pd(du2,dv2), /* [du2, dv2] */
1090: du1pdv0 = _mm_add_pd(du1,dv0), /* du1 + dv0 */
1091: t1 = _mm_mul_pd(dp0,p4du0p2dv1), /* dp0 (4 du0 + 2 dv1) */
1092: t2 = _mm_mul_pd(dp1,p4dv1p2du0); /* dp1 (4 dv1 + 2 du0) */
1094: #endif
1095: #if defined COMPUTE_LOWER_TRIANGULAR /* The element matrices are always symmetric so computing the lower-triangular part is not necessary */
1096: for (ll=ls; ll<8; ll++) { /* trial functions */
1097: #else
1098: for (ll=l; ll<8; ll++) {
1099: #endif
1100: const PetscReal *PETSC_RESTRICT dpl = dphi[ll];
1101: if (amode == THIASSEMBLY_TRIDIAGONAL && (l-ll)%4) continue; /* these entries would not be inserted */
1102: #if !USE_SSE2_KERNELS
1103: /* The analytic Jacobian in nice, easy-to-read form */
1104: {
1105: PetscScalar dgdu,dgdv;
1106: dgdu = 2.*du[0]*dpl[0] + dv[1]*dpl[0] + 0.5*(du[1]+dv[0])*dpl[1] + 0.5*du[2]*dpl[2];
1107: dgdv = 2.*dv[1]*dpl[1] + du[0]*dpl[1] + 0.5*(du[1]+dv[0])*dpl[0] + 0.5*dv[2]*dpl[2];
1108: /* Picard part */
1109: Ke[l*2+0][ll*2+0] += dp[0]*jw*eta*4.*dpl[0] + dp[1]*jw*eta*dpl[1] + dp[2]*jw*eta*dpl[2];
1110: Ke[l*2+0][ll*2+1] += dp[0]*jw*eta*2.*dpl[1] + dp[1]*jw*eta*dpl[0];
1111: Ke[l*2+1][ll*2+0] += dp[1]*jw*eta*2.*dpl[0] + dp[0]*jw*eta*dpl[1];
1112: Ke[l*2+1][ll*2+1] += dp[1]*jw*eta*4.*dpl[1] + dp[0]*jw*eta*dpl[0] + dp[2]*jw*eta*dpl[2];
1113: /* extra Newton terms */
1114: Ke[l*2+0][ll*2+0] += dp[0]*jw*deta*dgdu*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdu*(du[1]+dv[0]) + dp[2]*jw*deta*dgdu*du[2];
1115: Ke[l*2+0][ll*2+1] += dp[0]*jw*deta*dgdv*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdv*(du[1]+dv[0]) + dp[2]*jw*deta*dgdv*du[2];
1116: Ke[l*2+1][ll*2+0] += dp[1]*jw*deta*dgdu*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdu*(du[1]+dv[0]) + dp[2]*jw*deta*dgdu*dv[2];
1117: Ke[l*2+1][ll*2+1] += dp[1]*jw*deta*dgdv*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdv*(du[1]+dv[0]) + dp[2]*jw*deta*dgdv*dv[2];
1118: }
1119: #else
1120: /* This SSE2 code is an exact replica of above, but uses explicit packed instructions for some speed
1121: * benefit. On my hardware, these intrinsics are almost twice as fast as above, reducing total assembly cost
1122: * by 25 to 30 percent. */
1123: {
1124: __m128d
1125: keu = _mm_loadu_pd(&Ke[l*2+0][ll*2+0]),
1126: kev = _mm_loadu_pd(&Ke[l*2+1][ll*2+0]),
1127: dpl01 = _mm_loadu_pd(&dpl[0]),dpl10 = _mm_shuffle_pd(dpl01,dpl01,_MM_SHUFFLE2(0,1)),dpl2 = _mm_set_sd(dpl[2]),
1128: t0,t3,pdgduv;
1129: keu = _mm_add_pd(keu,_mm_add_pd(_mm_mul_pd(_mm_mul_pd(dp0jweta,p42),dpl01),
1130: _mm_add_pd(_mm_mul_pd(dp1jweta,dpl10),
1131: _mm_mul_pd(dp2jweta,dpl2))));
1132: kev = _mm_add_pd(kev,_mm_add_pd(_mm_mul_pd(_mm_mul_pd(dp1jweta,p24),dpl01),
1133: _mm_add_pd(_mm_mul_pd(dp0jweta,dpl10),
1134: _mm_mul_pd(dp2jweta,_mm_shuffle_pd(dpl2,dpl2,_MM_SHUFFLE2(0,1))))));
1135: pdgduv = _mm_mul_pd(p05,_mm_add_pd(_mm_add_pd(_mm_mul_pd(p42,_mm_mul_pd(du0,dpl01)),
1136: _mm_mul_pd(p24,_mm_mul_pd(dv1,dpl01))),
1137: _mm_add_pd(_mm_mul_pd(du1pdv0,dpl10),
1138: _mm_mul_pd(pdu2dv2,_mm_set1_pd(dpl[2]))))); /* [dgdu, dgdv] */
1139: t0 = _mm_mul_pd(jwdeta,pdgduv); /* jw deta [dgdu, dgdv] */
1140: t3 = _mm_mul_pd(t0,du1pdv0); /* t0 (du1 + dv0) */
1141: _mm_storeu_pd(&Ke[l*2+0][ll*2+0],_mm_add_pd(keu,_mm_add_pd(_mm_mul_pd(t1,t0),
1142: _mm_add_pd(_mm_mul_pd(dp1,t3),
1143: _mm_mul_pd(t0,_mm_mul_pd(dp2,du2))))));
1144: _mm_storeu_pd(&Ke[l*2+1][ll*2+0],_mm_add_pd(kev,_mm_add_pd(_mm_mul_pd(t2,t0),
1145: _mm_add_pd(_mm_mul_pd(dp0,t3),
1146: _mm_mul_pd(t0,_mm_mul_pd(dp2,dv2))))));
1147: }
1148: #endif
1149: }
1150: }
1151: }
1152: if (k == 0) { /* on a bottom face */
1153: if (thi->no_slip) {
1154: const PetscReal hz = PetscRealPart(pn[0].h)/(zm-1);
1155: const PetscScalar diagu = 2*etabase/thi->rhog*(hx*hy/hz + hx*hz/hy + 4*hy*hz/hx),diagv = 2*etabase/thi->rhog*(hx*hy/hz + 4*hx*hz/hy + hy*hz/hx);
1156: Ke[0][0] = thi->dirichlet_scale*diagu;
1157: Ke[1][1] = thi->dirichlet_scale*diagv;
1158: } else {
1159: for (q=0; q<4; q++) {
1160: const PetscReal jw = 0.25*hx*hy/thi->rhog,*phi = QuadQInterp[q];
1161: PetscScalar u =0,v=0,rbeta2=0;
1162: PetscReal beta2,dbeta2;
1163: for (l=0; l<4; l++) {
1164: u += phi[l]*n[l].u;
1165: v += phi[l]*n[l].v;
1166: rbeta2 += phi[l]*pn[l].beta2;
1167: }
1168: THIFriction(thi,PetscRealPart(rbeta2),PetscRealPart(u*u+v*v)/2,&beta2,&dbeta2);
1169: for (l=0; l<4; l++) {
1170: const PetscReal pp = phi[l];
1171: for (ll=0; ll<4; ll++) {
1172: const PetscReal ppl = phi[ll];
1173: Ke[l*2+0][ll*2+0] += pp*jw*beta2*ppl + pp*jw*dbeta2*u*u*ppl;
1174: Ke[l*2+0][ll*2+1] += pp*jw*dbeta2*u*v*ppl;
1175: Ke[l*2+1][ll*2+0] += pp*jw*dbeta2*v*u*ppl;
1176: Ke[l*2+1][ll*2+1] += pp*jw*beta2*ppl + pp*jw*dbeta2*v*v*ppl;
1177: }
1178: }
1179: }
1180: }
1181: }
1182: {
1183: const MatStencil rc[8] = {{i,j,k,0},{i+1,j,k,0},{i+1,j+1,k,0},{i,j+1,k,0},{i,j,k+1,0},{i+1,j,k+1,0},{i+1,j+1,k+1,0},{i,j+1,k+1,0}};
1184: if (amode == THIASSEMBLY_TRIDIAGONAL) {
1185: for (l=0; l<4; l++) { /* Copy out each of the blocks, discarding horizontal coupling */
1186: const PetscInt l4 = l+4;
1187: const MatStencil rcl[2] = {{rc[l].k,rc[l].j,rc[l].i,0},{rc[l4].k,rc[l4].j,rc[l4].i,0}};
1188: #if defined COMPUTE_LOWER_TRIANGULAR
1189: const PetscScalar Kel[4][4] = {{Ke[2*l+0][2*l+0] ,Ke[2*l+0][2*l+1] ,Ke[2*l+0][2*l4+0] ,Ke[2*l+0][2*l4+1]},
1190: {Ke[2*l+1][2*l+0] ,Ke[2*l+1][2*l+1] ,Ke[2*l+1][2*l4+0] ,Ke[2*l+1][2*l4+1]},
1191: {Ke[2*l4+0][2*l+0],Ke[2*l4+0][2*l+1],Ke[2*l4+0][2*l4+0],Ke[2*l4+0][2*l4+1]},
1192: {Ke[2*l4+1][2*l+0],Ke[2*l4+1][2*l+1],Ke[2*l4+1][2*l4+0],Ke[2*l4+1][2*l4+1]}};
1193: #else
1194: /* Same as above except for the lower-left block */
1195: const PetscScalar Kel[4][4] = {{Ke[2*l+0][2*l+0] ,Ke[2*l+0][2*l+1] ,Ke[2*l+0][2*l4+0] ,Ke[2*l+0][2*l4+1]},
1196: {Ke[2*l+1][2*l+0] ,Ke[2*l+1][2*l+1] ,Ke[2*l+1][2*l4+0] ,Ke[2*l+1][2*l4+1]},
1197: {Ke[2*l+0][2*l4+0],Ke[2*l+1][2*l4+0],Ke[2*l4+0][2*l4+0],Ke[2*l4+0][2*l4+1]},
1198: {Ke[2*l+0][2*l4+1],Ke[2*l+1][2*l4+1],Ke[2*l4+1][2*l4+0],Ke[2*l4+1][2*l4+1]}};
1199: #endif
1200: MatSetValuesBlockedStencil(B,2,rcl,2,rcl,&Kel[0][0],ADD_VALUES);
1201: }
1202: } else {
1203: #if !defined COMPUTE_LOWER_TRIANGULAR /* fill in lower-triangular part, this is really cheap compared to computing the entries */
1204: for (l=0; l<8; l++) {
1205: for (ll=l+1; ll<8; ll++) {
1206: Ke[ll*2+0][l*2+0] = Ke[l*2+0][ll*2+0];
1207: Ke[ll*2+1][l*2+0] = Ke[l*2+0][ll*2+1];
1208: Ke[ll*2+0][l*2+1] = Ke[l*2+1][ll*2+0];
1209: Ke[ll*2+1][l*2+1] = Ke[l*2+1][ll*2+1];
1210: }
1211: }
1212: #endif
1213: MatSetValuesBlockedStencil(B,8,rc,8,rc,&Ke[0][0],ADD_VALUES);
1214: }
1215: }
1216: }
1217: }
1218: }
1219: THIDARestorePrm(info->da,&prm);
1221: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1222: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1223: MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);
1224: if (thi->verbose) THIMatrixStatistics(thi,B,PETSC_VIEWER_STDOUT_WORLD);
1225: return 0;
1226: }
1228: static PetscErrorCode THIJacobianLocal_3D_Full(DMDALocalInfo *info,Node ***x,Mat A,Mat B,THI thi)
1229: {
1231: THIJacobianLocal_3D(info,x,B,thi,THIASSEMBLY_FULL);
1232: return 0;
1233: }
1235: static PetscErrorCode THIJacobianLocal_3D_Tridiagonal(DMDALocalInfo *info,Node ***x,Mat A,Mat B,THI thi)
1236: {
1238: THIJacobianLocal_3D(info,x,B,thi,THIASSEMBLY_TRIDIAGONAL);
1239: return 0;
1240: }
1242: static PetscErrorCode DMRefineHierarchy_THI(DM dac0,PetscInt nlevels,DM hierarchy[])
1243: {
1244: THI thi;
1245: PetscInt dim,M,N,m,n,s,dof;
1246: DM dac,daf;
1247: DMDAStencilType st;
1248: DM_DA *ddf,*ddc;
1251: PetscObjectQuery((PetscObject)dac0,"THI",(PetscObject*)&thi);
1253: if (nlevels > 1) {
1254: DMRefineHierarchy(dac0,nlevels-1,hierarchy);
1255: dac = hierarchy[nlevels-2];
1256: } else {
1257: dac = dac0;
1258: }
1259: DMDAGetInfo(dac,&dim, &N,&M,0, &n,&m,0, &dof,&s,0,0,0,&st);
1262: /* Creates a 3D DMDA with the same map-plane layout as the 2D one, with contiguous columns */
1263: DMDACreate3d(PetscObjectComm((PetscObject)dac),DM_BOUNDARY_NONE,DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC,st,thi->zlevels,N,M,1,n,m,dof,s,NULL,NULL,NULL,&daf);
1264: DMSetUp(daf);
1266: daf->ops->creatematrix = dac->ops->creatematrix;
1267: daf->ops->createinterpolation = dac->ops->createinterpolation;
1268: daf->ops->getcoloring = dac->ops->getcoloring;
1269: ddf = (DM_DA*)daf->data;
1270: ddc = (DM_DA*)dac->data;
1271: ddf->interptype = ddc->interptype;
1273: DMDASetFieldName(daf,0,"x-velocity");
1274: DMDASetFieldName(daf,1,"y-velocity");
1276: hierarchy[nlevels-1] = daf;
1277: return 0;
1278: }
1280: static PetscErrorCode DMCreateInterpolation_DA_THI(DM dac,DM daf,Mat *A,Vec *scale)
1281: {
1282: PetscInt dim;
1289: DMDAGetInfo(daf,&dim,0,0,0,0,0,0,0,0,0,0,0,0);
1290: if (dim == 2) {
1291: /* We are in the 2D problem and use normal DMDA interpolation */
1292: DMCreateInterpolation(dac,daf,A,scale);
1293: } else {
1294: PetscInt i,j,k,xs,ys,zs,xm,ym,zm,mx,my,mz,rstart,cstart;
1295: Mat B;
1297: DMDAGetInfo(daf,0, &mz,&my,&mx, 0,0,0, 0,0,0,0,0,0);
1298: DMDAGetCorners(daf,&zs,&ys,&xs,&zm,&ym,&xm);
1300: MatCreate(PetscObjectComm((PetscObject)daf),&B);
1301: MatSetSizes(B,xm*ym*zm,xm*ym,mx*my*mz,mx*my);
1303: MatSetType(B,MATAIJ);
1304: MatSeqAIJSetPreallocation(B,1,NULL);
1305: MatMPIAIJSetPreallocation(B,1,NULL,0,NULL);
1306: MatGetOwnershipRange(B,&rstart,NULL);
1307: MatGetOwnershipRangeColumn(B,&cstart,NULL);
1308: for (i=xs; i<xs+xm; i++) {
1309: for (j=ys; j<ys+ym; j++) {
1310: for (k=zs; k<zs+zm; k++) {
1311: PetscInt i2 = i*ym+j,i3 = i2*zm+k;
1312: PetscScalar val = ((k == 0 || k == mz-1) ? 0.5 : 1.) / (mz-1.); /* Integration using trapezoid rule */
1313: MatSetValue(B,cstart+i3,rstart+i2,val,INSERT_VALUES);
1314: }
1315: }
1316: }
1317: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1318: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1319: MatCreateMAIJ(B,sizeof(Node)/sizeof(PetscScalar),A);
1320: MatDestroy(&B);
1321: }
1322: return 0;
1323: }
1325: static PetscErrorCode DMCreateMatrix_THI_Tridiagonal(DM da,Mat *J)
1326: {
1327: Mat A;
1328: PetscInt xm,ym,zm,dim,dof = 2,starts[3],dims[3];
1329: ISLocalToGlobalMapping ltog;
1332: DMDAGetInfo(da,&dim, 0,0,0, 0,0,0, 0,0,0,0,0,0);
1334: DMDAGetCorners(da,0,0,0,&zm,&ym,&xm);
1335: DMGetLocalToGlobalMapping(da,<og);
1336: MatCreate(PetscObjectComm((PetscObject)da),&A);
1337: MatSetSizes(A,dof*xm*ym*zm,dof*xm*ym*zm,PETSC_DETERMINE,PETSC_DETERMINE);
1338: MatSetType(A,da->mattype);
1339: MatSetFromOptions(A);
1340: MatSeqAIJSetPreallocation(A,3*2,NULL);
1341: MatMPIAIJSetPreallocation(A,3*2,NULL,0,NULL);
1342: MatSeqBAIJSetPreallocation(A,2,3,NULL);
1343: MatMPIBAIJSetPreallocation(A,2,3,NULL,0,NULL);
1344: MatSeqSBAIJSetPreallocation(A,2,2,NULL);
1345: MatMPISBAIJSetPreallocation(A,2,2,NULL,0,NULL);
1346: MatSetLocalToGlobalMapping(A,ltog,ltog);
1347: DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);
1348: MatSetStencil(A,dim,dims,starts,dof);
1349: *J = A;
1350: return 0;
1351: }
1353: static PetscErrorCode THIDAVecView_VTK_XML(THI thi,DM da,Vec X,const char filename[])
1354: {
1355: const PetscInt dof = 2;
1356: Units units = thi->units;
1357: MPI_Comm comm;
1358: PetscViewer viewer;
1359: PetscMPIInt rank,size,tag,nn,nmax;
1360: PetscInt mx,my,mz,r,range[6];
1361: const PetscScalar *x;
1364: PetscObjectGetComm((PetscObject)thi,&comm);
1365: DMDAGetInfo(da,0, &mz,&my,&mx, 0,0,0, 0,0,0,0,0,0);
1366: MPI_Comm_size(comm,&size);
1367: MPI_Comm_rank(comm,&rank);
1368: PetscViewerASCIIOpen(comm,filename,&viewer);
1369: PetscViewerASCIIPrintf(viewer,"<VTKFile type=\"StructuredGrid\" version=\"0.1\" byte_order=\"LittleEndian\">\n");
1370: PetscViewerASCIIPrintf(viewer," <StructuredGrid WholeExtent=\"%d %D %d %D %d %D\">\n",0,mz-1,0,my-1,0,mx-1);
1372: DMDAGetCorners(da,range,range+1,range+2,range+3,range+4,range+5);
1373: PetscMPIIntCast(range[3]*range[4]*range[5]*dof,&nn);
1374: MPI_Reduce(&nn,&nmax,1,MPI_INT,MPI_MAX,0,comm);
1375: tag = ((PetscObject) viewer)->tag;
1376: VecGetArrayRead(X,&x);
1377: if (rank == 0) {
1378: PetscScalar *array;
1379: PetscMalloc1(nmax,&array);
1380: for (r=0; r<size; r++) {
1381: PetscInt i,j,k,xs,xm,ys,ym,zs,zm;
1382: const PetscScalar *ptr;
1383: MPI_Status status;
1384: if (r) {
1385: MPI_Recv(range,6,MPIU_INT,r,tag,comm,MPI_STATUS_IGNORE);
1386: }
1387: zs = range[0];ys = range[1];xs = range[2];zm = range[3];ym = range[4];xm = range[5];
1389: if (r) {
1390: MPI_Recv(array,nmax,MPIU_SCALAR,r,tag,comm,&status);
1391: MPI_Get_count(&status,MPIU_SCALAR,&nn);
1393: ptr = array;
1394: } else ptr = x;
1395: PetscViewerASCIIPrintf(viewer," <Piece Extent=\"%D %D %D %D %D %D\">\n",zs,zs+zm-1,ys,ys+ym-1,xs,xs+xm-1);
1397: PetscViewerASCIIPrintf(viewer," <Points>\n");
1398: PetscViewerASCIIPrintf(viewer," <DataArray type=\"Float32\" NumberOfComponents=\"3\" format=\"ascii\">\n");
1399: for (i=xs; i<xs+xm; i++) {
1400: for (j=ys; j<ys+ym; j++) {
1401: for (k=zs; k<zs+zm; k++) {
1402: PrmNode p;
1403: PetscReal xx = thi->Lx*i/mx,yy = thi->Ly*j/my,zz;
1404: thi->initialize(thi,xx,yy,&p);
1405: zz = PetscRealPart(p.b) + PetscRealPart(p.h)*k/(mz-1);
1406: PetscViewerASCIIPrintf(viewer,"%f %f %f\n",(double)xx,(double)yy,(double)zz);
1407: }
1408: }
1409: }
1410: PetscViewerASCIIPrintf(viewer," </DataArray>\n");
1411: PetscViewerASCIIPrintf(viewer," </Points>\n");
1413: PetscViewerASCIIPrintf(viewer," <PointData>\n");
1414: PetscViewerASCIIPrintf(viewer," <DataArray type=\"Float32\" Name=\"velocity\" NumberOfComponents=\"3\" format=\"ascii\">\n");
1415: for (i=0; i<nn; i+=dof) {
1416: PetscViewerASCIIPrintf(viewer,"%f %f %f\n",(double)(PetscRealPart(ptr[i])*units->year/units->meter),(double)(PetscRealPart(ptr[i+1])*units->year/units->meter),0.0);
1417: }
1418: PetscViewerASCIIPrintf(viewer," </DataArray>\n");
1420: PetscViewerASCIIPrintf(viewer," <DataArray type=\"Int32\" Name=\"rank\" NumberOfComponents=\"1\" format=\"ascii\">\n");
1421: for (i=0; i<nn; i+=dof) {
1422: PetscViewerASCIIPrintf(viewer,"%D\n",r);
1423: }
1424: PetscViewerASCIIPrintf(viewer," </DataArray>\n");
1425: PetscViewerASCIIPrintf(viewer," </PointData>\n");
1427: PetscViewerASCIIPrintf(viewer," </Piece>\n");
1428: }
1429: PetscFree(array);
1430: } else {
1431: MPI_Send(range,6,MPIU_INT,0,tag,comm);
1432: MPI_Send((PetscScalar*)x,nn,MPIU_SCALAR,0,tag,comm);
1433: }
1434: VecRestoreArrayRead(X,&x);
1435: PetscViewerASCIIPrintf(viewer," </StructuredGrid>\n");
1436: PetscViewerASCIIPrintf(viewer,"</VTKFile>\n");
1437: PetscViewerDestroy(&viewer);
1438: return 0;
1439: }
1441: int main(int argc,char *argv[])
1442: {
1443: MPI_Comm comm;
1444: THI thi;
1446: DM da;
1447: SNES snes;
1449: PetscInitialize(&argc,&argv,0,help);
1450: comm = PETSC_COMM_WORLD;
1452: THICreate(comm,&thi);
1453: {
1454: PetscInt M = 3,N = 3,P = 2;
1455: PetscOptionsBegin(comm,NULL,"Grid resolution options","");
1456: {
1457: PetscOptionsInt("-M","Number of elements in x-direction on coarse level","",M,&M,NULL);
1458: N = M;
1459: PetscOptionsInt("-N","Number of elements in y-direction on coarse level (if different from M)","",N,&N,NULL);
1460: if (thi->coarse2d) {
1461: PetscOptionsInt("-zlevels","Number of elements in z-direction on fine level","",thi->zlevels,&thi->zlevels,NULL);
1462: } else {
1463: PetscOptionsInt("-P","Number of elements in z-direction on coarse level","",P,&P,NULL);
1464: }
1465: }
1466: PetscOptionsEnd();
1467: if (thi->coarse2d) {
1468: DMDACreate2d(comm,DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC,DMDA_STENCIL_BOX,N,M,PETSC_DETERMINE,PETSC_DETERMINE,sizeof(Node)/sizeof(PetscScalar),1,0,0,&da);
1469: DMSetFromOptions(da);
1470: DMSetUp(da);
1471: da->ops->refinehierarchy = DMRefineHierarchy_THI;
1472: da->ops->createinterpolation = DMCreateInterpolation_DA_THI;
1474: PetscObjectCompose((PetscObject)da,"THI",(PetscObject)thi);
1475: } else {
1476: DMDACreate3d(comm,DM_BOUNDARY_NONE,DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC, DMDA_STENCIL_BOX,P,N,M,1,PETSC_DETERMINE,PETSC_DETERMINE,sizeof(Node)/sizeof(PetscScalar),1,0,0,0,&da);
1477: DMSetFromOptions(da);
1478: DMSetUp(da);
1479: }
1480: DMDASetFieldName(da,0,"x-velocity");
1481: DMDASetFieldName(da,1,"y-velocity");
1482: }
1483: THISetUpDM(thi,da);
1484: if (thi->tridiagonal) da->ops->creatematrix = DMCreateMatrix_THI_Tridiagonal;
1486: { /* Set the fine level matrix type if -da_refine */
1487: PetscInt rlevel,clevel;
1488: DMGetRefineLevel(da,&rlevel);
1489: DMGetCoarsenLevel(da,&clevel);
1490: if (rlevel - clevel > 0) DMSetMatType(da,thi->mattype);
1491: }
1493: DMDASNESSetFunctionLocal(da,ADD_VALUES,(DMDASNESFunction)THIFunctionLocal,thi);
1494: if (thi->tridiagonal) {
1495: DMDASNESSetJacobianLocal(da,(DMDASNESJacobian)THIJacobianLocal_3D_Tridiagonal,thi);
1496: } else {
1497: DMDASNESSetJacobianLocal(da,(DMDASNESJacobian)THIJacobianLocal_3D_Full,thi);
1498: }
1499: DMCoarsenHookAdd(da,DMCoarsenHook_THI,NULL,thi);
1500: DMRefineHookAdd(da,DMRefineHook_THI,NULL,thi);
1502: DMSetApplicationContext(da,thi);
1504: SNESCreate(comm,&snes);
1505: SNESSetDM(snes,da);
1506: DMDestroy(&da);
1507: SNESSetComputeInitialGuess(snes,THIInitial,NULL);
1508: SNESSetFromOptions(snes);
1510: SNESSolve(snes,NULL,NULL);
1512: THISolveStatistics(thi,snes,0,"Full");
1514: {
1515: PetscBool flg;
1516: char filename[PETSC_MAX_PATH_LEN] = "";
1517: PetscOptionsGetString(NULL,NULL,"-o",filename,sizeof(filename),&flg);
1518: if (flg) {
1519: Vec X;
1520: DM dm;
1521: SNESGetSolution(snes,&X);
1522: SNESGetDM(snes,&dm);
1523: THIDAVecView_VTK_XML(thi,dm,X,filename);
1524: }
1525: }
1527: DMDestroy(&da);
1528: SNESDestroy(&snes);
1529: THIDestroy(&thi);
1530: PetscFinalize();
1531: return 0;
1532: }
1534: /*TEST
1536: build:
1537: requires: !single
1539: test:
1540: args: -M 6 -P 4 -da_refine 1 -snes_monitor_short -snes_converged_reason -ksp_monitor_short -ksp_converged_reason -thi_mat_type sbaij -ksp_type fgmres -pc_type mg -pc_mg_type full -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mg_levels_pc_type icc
1542: test:
1543: suffix: 2
1544: nsize: 2
1545: args: -M 6 -P 4 -thi_hom z -snes_monitor_short -snes_converged_reason -ksp_monitor_short -ksp_converged_reason -thi_mat_type sbaij -ksp_type fgmres -pc_type mg -pc_mg_type full -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mg_levels_pc_type asm -mg_levels_pc_asm_blocks 6 -mg_levels_0_pc_type redundant -snes_grid_sequence 1 -mat_partitioning_type current -ksp_atol -1
1547: test:
1548: suffix: 3
1549: nsize: 3
1550: args: -M 7 -P 4 -thi_hom z -da_refine 1 -snes_monitor_short -snes_converged_reason -ksp_monitor_short -ksp_converged_reason -thi_mat_type baij -ksp_type fgmres -pc_type mg -pc_mg_type full -mg_levels_pc_asm_type restrict -mg_levels_pc_type asm -mg_levels_pc_asm_blocks 9 -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mat_partitioning_type current
1552: test:
1553: suffix: 4
1554: nsize: 6
1555: args: -M 4 -P 2 -da_refine_hierarchy_x 1,1,3 -da_refine_hierarchy_y 2,2,1 -da_refine_hierarchy_z 2,2,1 -snes_grid_sequence 3 -ksp_converged_reason -ksp_type fgmres -ksp_rtol 1e-2 -pc_type mg -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mg_levels_pc_type bjacobi -mg_levels_1_sub_pc_type cholesky -pc_mg_type multiplicative -snes_converged_reason -snes_stol 1e-12 -thi_L 80e3 -thi_alpha 0.05 -thi_friction_m 1 -thi_hom x -snes_view -mg_levels_0_pc_type redundant -mg_levels_0_ksp_type preonly -ksp_atol -1
1557: test:
1558: suffix: 5
1559: nsize: 6
1560: args: -M 12 -P 5 -snes_monitor_short -ksp_converged_reason -pc_type asm -pc_asm_type restrict -dm_mat_type {{aij baij sbaij}}
1562: TEST*/