Actual source code: ex48.c
petsc-3.3-p7 2013-05-11
1: static const char help[] = "Toy hydrostatic ice flow with multigrid in 3D.\n\
2: \n\
3: Solves the hydrostatic (aka Blatter/Pattyn/First Order) equations for ice sheet flow\n\
4: using multigrid. The ice uses a power-law rheology with \"Glen\" exponent 3 (corresponds\n\
5: to p=4/3 in a p-Laplacian). The focus is on ISMIP-HOM experiments which assume periodic\n\
6: boundary conditions in the x- and y-directions.\n\
7: \n\
8: Equations are rescaled so that the domain size and solution are O(1), details of this scaling\n\
9: can be controlled by the options -units_meter, -units_second, and -units_kilogram.\n\
10: \n\
11: A VTK StructuredGrid output file can be written using the option -o filename.vts\n\
12: \n\n";
14: /*
15: The equations for horizontal velocity (u,v) are
17: - [eta (4 u_x + 2 v_y)]_x - [eta (u_y + v_x)]_y - [eta u_z]_z + rho g s_x = 0
18: - [eta (4 v_y + 2 u_x)]_y - [eta (u_y + v_x)]_x - [eta v_z]_z + rho g s_y = 0
20: where
22: eta = B/2 (epsilon + gamma)^((p-2)/2)
24: is the nonlinear effective viscosity with regularization epsilon and hardness parameter B,
25: written in terms of the second invariant
27: gamma = u_x^2 + v_y^2 + u_x v_y + (1/4) (u_y + v_x)^2 + (1/4) u_z^2 + (1/4) v_z^2
29: The surface boundary conditions are the natural conditions. The basal boundary conditions
30: are either no-slip, or Navier (linear) slip with spatially variant friction coefficient beta^2.
32: In the code, the equations for (u,v) are multiplied through by 1/(rho g) so that residuals are O(1).
34: The discretization is Q1 finite elements, managed by a DMDA. The grid is never distorted in the
35: map (x,y) plane, but the bed and surface may be bumpy. This is handled as usual in FEM, through
36: the Jacobian of the coordinate transformation from a reference element to the physical element.
38: Since ice-flow is tightly coupled in the z-direction (within columns), the DMDA is managed
39: specially so that columns are never distributed, and are always contiguous in memory.
40: This amounts to reversing the meaning of X,Y,Z compared to the DMDA's internal interpretation,
41: and then indexing as vec[i][j][k]. The exotic coarse spaces require 2D DMDAs which are made to
42: use compatible domain decomposition relative to the 3D DMDAs.
44: There are two compile-time options:
46: NO_SSE2:
47: If the host supports SSE2, we use integration code that has been vectorized with SSE2
48: intrinsics, unless this macro is defined. The intrinsics speed up integration by about
49: 30% on my architecture (P8700, gcc-4.5 snapshot).
51: COMPUTE_LOWER_TRIANGULAR:
52: The element matrices we assemble are lower-triangular so it is not necessary to compute
53: all entries explicitly. If this macro is defined, the lower-triangular entries are
54: computed explicitly.
56: */
58: #include <petscsnes.h>
59: #include <ctype.h> /* toupper() */
60: #include <petsc-private/daimpl.h> /* There is not yet a public interface to manipulate dm->ops */
62: #if !defined __STDC_VERSION__ || __STDC_VERSION__ < 199901L
63: # if defined __cplusplus /* C++ restrict is nonstandard and compilers have inconsistent rules about where it can be used */
64: # define restrict
65: # else
66: # define restrict PETSC_RESTRICT
67: # endif
68: #endif
69: #if defined __SSE2__
70: # include <emmintrin.h>
71: #endif
73: /* The SSE2 kernels are only for PetscScalar=double on architectures that support it */
74: #define USE_SSE2_KERNELS (!defined NO_SSE2 \
75: && !defined PETSC_USE_COMPLEX \
76: && !defined PETSC_USE_REAL_SINGLE \
77: && defined __SSE2__)
79: static PetscClassId THI_CLASSID;
81: typedef enum {QUAD_GAUSS,QUAD_LOBATTO} QuadratureType;
82: static const char *QuadratureTypes[] = {"gauss","lobatto","QuadratureType","QUAD_",0};
83: static const PetscReal HexQWeights[8] = {1,1,1,1,1,1,1,1};
84: static const PetscReal HexQNodes[] = {-0.57735026918962573, 0.57735026918962573};
85: #define G 0.57735026918962573
86: #define H (0.5*(1.+G))
87: #define L (0.5*(1.-G))
88: #define M (-0.5)
89: #define P (0.5)
90: /* Special quadrature: Lobatto in horizontal, Gauss in vertical */
91: static const PetscReal HexQInterp_Lobatto[8][8] = {{H,0,0,0,L,0,0,0},
92: {0,H,0,0,0,L,0,0},
93: {0,0,H,0,0,0,L,0},
94: {0,0,0,H,0,0,0,L},
95: {L,0,0,0,H,0,0,0},
96: {0,L,0,0,0,H,0,0},
97: {0,0,L,0,0,0,H,0},
98: {0,0,0,L,0,0,0,H}};
99: static const PetscReal HexQDeriv_Lobatto[8][8][3] = {
100: {{M*H,M*H,M},{P*H,0,0} ,{0,0,0} ,{0,P*H,0} ,{M*L,M*L,P},{P*L,0,0} ,{0,0,0} ,{0,P*L,0} },
101: {{M*H,0,0} ,{P*H,M*H,M},{0,P*H,0} ,{0,0,0} ,{M*L,0,0} ,{P*L,M*L,P},{0,P*L,0} ,{0,0,0} },
102: {{0,0,0} ,{0,M*H,0} ,{P*H,P*H,M},{M*H,0,0} ,{0,0,0} ,{0,M*L,0} ,{P*L,P*L,P},{M*L,0,0} },
103: {{0,M*H,0} ,{0,0,0} ,{P*H,0,0} ,{M*H,P*H,M},{0,M*L,0} ,{0,0,0} ,{P*L,0,0} ,{M*L,P*L,P}},
104: {{M*L,M*L,M},{P*L,0,0} ,{0,0,0} ,{0,P*L,0} ,{M*H,M*H,P},{P*H,0,0} ,{0,0,0} ,{0,P*H,0} },
105: {{M*L,0,0} ,{P*L,M*L,M},{0,P*L,0} ,{0,0,0} ,{M*H,0,0} ,{P*H,M*H,P},{0,P*H,0} ,{0,0,0} },
106: {{0,0,0} ,{0,M*L,0} ,{P*L,P*L,M},{M*L,0,0} ,{0,0,0} ,{0,M*H,0} ,{P*H,P*H,P},{M*H,0,0} },
107: {{0,M*L,0} ,{0,0,0} ,{P*L,0,0} ,{M*L,P*L,M},{0,M*H,0} ,{0,0,0} ,{P*H,0,0} ,{M*H,P*H,P}}};
108: /* Stanndard Gauss */
109: static const PetscReal HexQInterp_Gauss[8][8] = {{H*H*H,L*H*H,L*L*H,H*L*H, H*H*L,L*H*L,L*L*L,H*L*L},
110: {L*H*H,H*H*H,H*L*H,L*L*H, L*H*L,H*H*L,H*L*L,L*L*L},
111: {L*L*H,H*L*H,H*H*H,L*H*H, L*L*L,H*L*L,H*H*L,L*H*L},
112: {H*L*H,L*L*H,L*H*H,H*H*H, H*L*L,L*L*L,L*H*L,H*H*L},
113: {H*H*L,L*H*L,L*L*L,H*L*L, H*H*H,L*H*H,L*L*H,H*L*H},
114: {L*H*L,H*H*L,H*L*L,L*L*L, L*H*H,H*H*H,H*L*H,L*L*H},
115: {L*L*L,H*L*L,H*H*L,L*H*L, L*L*H,H*L*H,H*H*H,L*H*H},
116: {H*L*L,L*L*L,L*H*L,H*H*L, H*L*H,L*L*H,L*H*H,H*H*H}};
117: static const PetscReal HexQDeriv_Gauss[8][8][3] = {
118: {{M*H*H,H*M*H,H*H*M},{P*H*H,L*M*H,L*H*M},{P*L*H,L*P*H,L*L*M},{M*L*H,H*P*H,H*L*M}, {M*H*L,H*M*L,H*H*P},{P*H*L,L*M*L,L*H*P},{P*L*L,L*P*L,L*L*P},{M*L*L,H*P*L,H*L*P}},
119: {{M*H*H,L*M*H,L*H*M},{P*H*H,H*M*H,H*H*M},{P*L*H,H*P*H,H*L*M},{M*L*H,L*P*H,L*L*M}, {M*H*L,L*M*L,L*H*P},{P*H*L,H*M*L,H*H*P},{P*L*L,H*P*L,H*L*P},{M*L*L,L*P*L,L*L*P}},
120: {{M*L*H,L*M*H,L*L*M},{P*L*H,H*M*H,H*L*M},{P*H*H,H*P*H,H*H*M},{M*H*H,L*P*H,L*H*M}, {M*L*L,L*M*L,L*L*P},{P*L*L,H*M*L,H*L*P},{P*H*L,H*P*L,H*H*P},{M*H*L,L*P*L,L*H*P}},
121: {{M*L*H,H*M*H,H*L*M},{P*L*H,L*M*H,L*L*M},{P*H*H,L*P*H,L*H*M},{M*H*H,H*P*H,H*H*M}, {M*L*L,H*M*L,H*L*P},{P*L*L,L*M*L,L*L*P},{P*H*L,L*P*L,L*H*P},{M*H*L,H*P*L,H*H*P}},
122: {{M*H*L,H*M*L,H*H*M},{P*H*L,L*M*L,L*H*M},{P*L*L,L*P*L,L*L*M},{M*L*L,H*P*L,H*L*M}, {M*H*H,H*M*H,H*H*P},{P*H*H,L*M*H,L*H*P},{P*L*H,L*P*H,L*L*P},{M*L*H,H*P*H,H*L*P}},
123: {{M*H*L,L*M*L,L*H*M},{P*H*L,H*M*L,H*H*M},{P*L*L,H*P*L,H*L*M},{M*L*L,L*P*L,L*L*M}, {M*H*H,L*M*H,L*H*P},{P*H*H,H*M*H,H*H*P},{P*L*H,H*P*H,H*L*P},{M*L*H,L*P*H,L*L*P}},
124: {{M*L*L,L*M*L,L*L*M},{P*L*L,H*M*L,H*L*M},{P*H*L,H*P*L,H*H*M},{M*H*L,L*P*L,L*H*M}, {M*L*H,L*M*H,L*L*P},{P*L*H,H*M*H,H*L*P},{P*H*H,H*P*H,H*H*P},{M*H*H,L*P*H,L*H*P}},
125: {{M*L*L,H*M*L,H*L*M},{P*L*L,L*M*L,L*L*M},{P*H*L,L*P*L,L*H*M},{M*H*L,H*P*L,H*H*M}, {M*L*H,H*M*H,H*L*P},{P*L*H,L*M*H,L*L*P},{P*H*H,L*P*H,L*H*P},{M*H*H,H*P*H,H*H*P}}};
126: static const PetscReal (*HexQInterp)[8],(*HexQDeriv)[8][3];
127: /* Standard 2x2 Gauss quadrature for the bottom layer. */
128: static const PetscReal QuadQInterp[4][4] = {{H*H,L*H,L*L,H*L},
129: {L*H,H*H,H*L,L*L},
130: {L*L,H*L,H*H,L*H},
131: {H*L,L*L,L*H,H*H}};
132: static const PetscReal QuadQDeriv[4][4][2] = {
133: {{M*H,M*H},{P*H,M*L},{P*L,P*L},{M*L,P*H}},
134: {{M*H,M*L},{P*H,M*H},{P*L,P*H},{M*L,P*L}},
135: {{M*L,M*L},{P*L,M*H},{P*H,P*H},{M*H,P*L}},
136: {{M*L,M*H},{P*L,M*L},{P*H,P*L},{M*H,P*H}}};
137: #undef G
138: #undef H
139: #undef L
140: #undef M
141: #undef P
143: #define HexExtract(x,i,j,k,n) do { \
144: (n)[0] = (x)[i][j][k]; \
145: (n)[1] = (x)[i+1][j][k]; \
146: (n)[2] = (x)[i+1][j+1][k]; \
147: (n)[3] = (x)[i][j+1][k]; \
148: (n)[4] = (x)[i][j][k+1]; \
149: (n)[5] = (x)[i+1][j][k+1]; \
150: (n)[6] = (x)[i+1][j+1][k+1]; \
151: (n)[7] = (x)[i][j+1][k+1]; \
152: } while (0)
154: #define HexExtractRef(x,i,j,k,n) do { \
155: (n)[0] = &(x)[i][j][k]; \
156: (n)[1] = &(x)[i+1][j][k]; \
157: (n)[2] = &(x)[i+1][j+1][k]; \
158: (n)[3] = &(x)[i][j+1][k]; \
159: (n)[4] = &(x)[i][j][k+1]; \
160: (n)[5] = &(x)[i+1][j][k+1]; \
161: (n)[6] = &(x)[i+1][j+1][k+1]; \
162: (n)[7] = &(x)[i][j+1][k+1]; \
163: } while (0)
165: #define QuadExtract(x,i,j,n) do { \
166: (n)[0] = (x)[i][j]; \
167: (n)[1] = (x)[i+1][j]; \
168: (n)[2] = (x)[i+1][j+1]; \
169: (n)[3] = (x)[i][j+1]; \
170: } while (0)
172: static PetscScalar Sqr(PetscScalar a) {return a*a;}
174: static void HexGrad(const PetscReal dphi[][3],const PetscReal zn[],PetscReal dz[])
175: {
176: PetscInt i;
177: dz[0] = dz[1] = dz[2] = 0;
178: for (i=0; i<8; i++) {
179: dz[0] += dphi[i][0] * zn[i];
180: dz[1] += dphi[i][1] * zn[i];
181: dz[2] += dphi[i][2] * zn[i];
182: }
183: }
185: static void HexComputeGeometry(PetscInt q,PetscReal hx,PetscReal hy,const PetscReal dz[restrict],PetscReal phi[restrict],PetscReal dphi[restrict][3],PetscReal *restrict jw)
186: {
187: const PetscReal
188: jac[3][3] = {{hx/2,0,0}, {0,hy/2,0}, {dz[0],dz[1],dz[2]}}
189: ,ijac[3][3] = {{1/jac[0][0],0,0}, {0,1/jac[1][1],0}, {-jac[2][0]/(jac[0][0]*jac[2][2]),-jac[2][1]/(jac[1][1]*jac[2][2]),1/jac[2][2]}}
190: ,jdet = jac[0][0]*jac[1][1]*jac[2][2];
191: PetscInt i;
193: for (i=0; i<8; i++) {
194: const PetscReal *dphir = HexQDeriv[q][i];
195: phi[i] = HexQInterp[q][i];
196: dphi[i][0] = dphir[0]*ijac[0][0] + dphir[1]*ijac[1][0] + dphir[2]*ijac[2][0];
197: dphi[i][1] = dphir[0]*ijac[0][1] + dphir[1]*ijac[1][1] + dphir[2]*ijac[2][1];
198: dphi[i][2] = dphir[0]*ijac[0][2] + dphir[1]*ijac[1][2] + dphir[2]*ijac[2][2];
199: }
200: *jw = 1.0 * jdet;
201: }
203: typedef struct _p_THI *THI;
204: typedef struct _n_Units *Units;
206: typedef struct {
207: PetscScalar u,v;
208: } Node;
210: typedef struct {
211: PetscScalar b; /* bed */
212: PetscScalar h; /* thickness */
213: PetscScalar beta2; /* friction */
214: } PrmNode;
216: typedef struct {
217: PetscReal min,max,cmin,cmax;
218: } PRange;
220: typedef enum {THIASSEMBLY_TRIDIAGONAL,THIASSEMBLY_FULL} THIAssemblyMode;
222: struct _p_THI {
223: PETSCHEADER(int);
224: void (*initialize)(THI,PetscReal x,PetscReal y,PrmNode *p);
225: PetscInt zlevels;
226: PetscReal Lx,Ly,Lz; /* Model domain */
227: PetscReal alpha; /* Bed angle */
228: Units units;
229: PetscReal dirichlet_scale;
230: PetscReal ssa_friction_scale;
231: PRange eta;
232: PRange beta2;
233: struct {
234: PetscReal Bd2,eps,exponent;
235: } viscosity;
236: struct {
237: PetscReal irefgam,eps2,exponent,refvel,epsvel;
238: } friction;
239: PetscReal rhog;
240: PetscBool no_slip;
241: PetscBool tridiagonal;
242: PetscBool coarse2d;
243: PetscBool verbose;
244: MatType mattype;
245: };
247: struct _n_Units {
248: /* fundamental */
249: PetscReal meter;
250: PetscReal kilogram;
251: PetscReal second;
252: /* derived */
253: PetscReal Pascal;
254: PetscReal year;
255: };
257: typedef PetscErrorCode (*DMDASNESJacobianLocal)(DMDALocalInfo*,void*,Mat,Mat,MatStructure*,void*);
258: typedef PetscErrorCode (*DMDASNESFunctionLocal)(DMDALocalInfo*,void*,void*,void*);
259: static PetscErrorCode THIJacobianLocal_3D_Full(DMDALocalInfo*,Node***,Mat,Mat,MatStructure*,THI);
260: static PetscErrorCode THIJacobianLocal_3D_Tridiagonal(DMDALocalInfo*,Node***,Mat,THI);
261: static PetscErrorCode THIJacobianLocal_2D(DMDALocalInfo*,Node**,Mat,THI);
263: static void PrmHexGetZ(const PrmNode pn[],PetscInt k,PetscInt zm,PetscReal zn[])
264: {
265: const PetscScalar zm1 = zm-1,
266: znl[8] = {pn[0].b + pn[0].h*(PetscScalar)k/zm1,
267: pn[1].b + pn[1].h*(PetscScalar)k/zm1,
268: pn[2].b + pn[2].h*(PetscScalar)k/zm1,
269: pn[3].b + pn[3].h*(PetscScalar)k/zm1,
270: pn[0].b + pn[0].h*(PetscScalar)(k+1)/zm1,
271: pn[1].b + pn[1].h*(PetscScalar)(k+1)/zm1,
272: pn[2].b + pn[2].h*(PetscScalar)(k+1)/zm1,
273: pn[3].b + pn[3].h*(PetscScalar)(k+1)/zm1};
274: PetscInt i;
275: for (i=0; i<8; i++) zn[i] = PetscRealPart(znl[i]);
276: }
278: /* Tests A and C are from the ISMIP-HOM paper (Pattyn et al. 2008) */
279: static void THIInitialize_HOM_A(THI thi,PetscReal x,PetscReal y,PrmNode *p)
280: {
281: Units units = thi->units;
282: PetscReal s = -x*sin(thi->alpha);
283: p->b = s - 1000*units->meter + 500*units->meter * sin(x*2*PETSC_PI/thi->Lx) * sin(y*2*PETSC_PI/thi->Ly);
284: p->h = s - p->b;
285: p->beta2 = 1e30;
286: }
288: static void THIInitialize_HOM_C(THI thi,PetscReal x,PetscReal y,PrmNode *p)
289: {
290: Units units = thi->units;
291: PetscReal s = -x*sin(thi->alpha);
292: p->b = s - 1000*units->meter;
293: p->h = s - p->b;
294: /* tau_b = beta2 v is a stress (Pa) */
295: p->beta2 = 1000 * (1 + sin(x*2*PETSC_PI/thi->Lx)*sin(y*2*PETSC_PI/thi->Ly)) * units->Pascal * units->year / units->meter;
296: }
298: /* These are just toys */
300: /* Same bed as test A, free slip everywhere except for a discontinuous jump to a circular sticky region in the middle. */
301: static void THIInitialize_HOM_X(THI thi,PetscReal xx,PetscReal yy,PrmNode *p)
302: {
303: Units units = thi->units;
304: PetscReal x = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */
305: PetscReal r = sqrt(x*x + y*y),s = -x*sin(thi->alpha);
306: p->b = s - 1000*units->meter + 500*units->meter * sin(x + PETSC_PI) * sin(y + PETSC_PI);
307: p->h = s - p->b;
308: p->beta2 = 1000 * (r < 1 ? 2 : 0) * units->Pascal * units->year / units->meter;
309: }
311: /* Like Z, but with 200 meter cliffs */
312: static void THIInitialize_HOM_Y(THI thi,PetscReal xx,PetscReal yy,PrmNode *p)
313: {
314: Units units = thi->units;
315: PetscReal x = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */
316: PetscReal r = sqrt(x*x + y*y),s = -x*sin(thi->alpha);
317: p->b = s - 1000*units->meter + 500*units->meter * sin(x + PETSC_PI) * sin(y + PETSC_PI);
318: if (PetscRealPart(p->b) > -700*units->meter) p->b += 200*units->meter;
319: p->h = s - p->b;
320: p->beta2 = 1000 * (1. + sin(sqrt(16*r))/sqrt(1e-2 + 16*r)*cos(x*3/2)*cos(y*3/2)) * units->Pascal * units->year / units->meter;
321: }
323: /* Same bed as A, smoothly varying slipperiness, similar to MATLAB's "sombrero" (uncorrelated with bathymetry) */
324: static void THIInitialize_HOM_Z(THI thi,PetscReal xx,PetscReal yy,PrmNode *p)
325: {
326: Units units = thi->units;
327: PetscReal x = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */
328: PetscReal r = sqrt(x*x + y*y),s = -x*sin(thi->alpha);
329: p->b = s - 1000*units->meter + 500*units->meter * sin(x + PETSC_PI) * sin(y + PETSC_PI);
330: p->h = s - p->b;
331: p->beta2 = 1000 * (1. + sin(sqrt(16*r))/sqrt(1e-2 + 16*r)*cos(x*3/2)*cos(y*3/2)) * units->Pascal * units->year / units->meter;
332: }
334: static void THIFriction(THI thi,PetscReal rbeta2,PetscReal gam,PetscReal *beta2,PetscReal *dbeta2)
335: {
336: if (thi->friction.irefgam == 0) {
337: Units units = thi->units;
338: thi->friction.irefgam = 1./(0.5*PetscSqr(thi->friction.refvel * units->meter / units->year));
339: thi->friction.eps2 = 0.5*PetscSqr(thi->friction.epsvel * units->meter / units->year) * thi->friction.irefgam;
340: }
341: if (thi->friction.exponent == 0) {
342: *beta2 = rbeta2;
343: *dbeta2 = 0;
344: } else {
345: *beta2 = rbeta2 * pow(thi->friction.eps2 + gam*thi->friction.irefgam,thi->friction.exponent);
346: *dbeta2 = thi->friction.exponent * *beta2 / (thi->friction.eps2 + gam*thi->friction.irefgam) * thi->friction.irefgam;
347: }
348: }
350: static void THIViscosity(THI thi,PetscReal gam,PetscReal *eta,PetscReal *deta)
351: {
352: PetscReal Bd2,eps,exponent;
353: if (thi->viscosity.Bd2 == 0) {
354: Units units = thi->units;
355: const PetscReal
356: n = 3., /* Glen exponent */
357: p = 1. + 1./n, /* for Stokes */
358: A = 1.e-16 * pow(units->Pascal,-n) / units->year, /* softness parameter (Pa^{-n}/s) */
359: B = pow(A,-1./n); /* hardness parameter */
360: thi->viscosity.Bd2 = B/2;
361: thi->viscosity.exponent = (p-2)/2;
362: thi->viscosity.eps = 0.5*PetscSqr(1e-5 / units->year);
363: }
364: Bd2 = thi->viscosity.Bd2;
365: exponent = thi->viscosity.exponent;
366: eps = thi->viscosity.eps;
367: *eta = Bd2 * pow(eps + gam,exponent);
368: *deta = exponent * (*eta) / (eps + gam);
369: }
371: static void RangeUpdate(PetscReal *min,PetscReal *max,PetscReal x)
372: {
373: if (x < *min) *min = x;
374: if (x > *max) *max = x;
375: }
377: static void PRangeClear(PRange *p)
378: {
379: p->cmin = p->min = 1e100;
380: p->cmax = p->max = -1e100;
381: }
385: static PetscErrorCode PRangeMinMax(PRange *p,PetscReal min,PetscReal max)
386: {
389: p->cmin = min;
390: p->cmax = max;
391: if (min < p->min) p->min = min;
392: if (max > p->max) p->max = max;
393: return(0);
394: }
398: static PetscErrorCode THIDestroy(THI *thi)
399: {
403: if (!*thi) return(0);
404: if (--((PetscObject)(*thi))->refct > 0) {*thi = 0; return(0);}
405: PetscFree((*thi)->units);
406: PetscFree((*thi)->mattype);
407: PetscHeaderDestroy(thi);
408: return(0);
409: }
413: static PetscErrorCode THICreate(MPI_Comm comm,THI *inthi)
414: {
415: static PetscBool registered = PETSC_FALSE;
416: THI thi;
417: Units units;
421: *inthi = 0;
422: if (!registered) {
423: PetscClassIdRegister("Toy Hydrostatic Ice",&THI_CLASSID);
424: registered = PETSC_TRUE;
425: }
426: PetscHeaderCreate(thi,_p_THI,0,THI_CLASSID,-1,"THI","Toy Hydrostatic Ice","",comm,THIDestroy,0);
428: PetscNew(struct _n_Units,&thi->units);
429: units = thi->units;
430: units->meter = 1e-2;
431: units->second = 1e-7;
432: units->kilogram = 1e-12;
433: PetscOptionsBegin(comm,NULL,"Scaled units options","");
434: {
435: PetscOptionsReal("-units_meter","1 meter in scaled length units","",units->meter,&units->meter,NULL);
436: PetscOptionsReal("-units_second","1 second in scaled time units","",units->second,&units->second,NULL);
437: PetscOptionsReal("-units_kilogram","1 kilogram in scaled mass units","",units->kilogram,&units->kilogram,NULL);
438: }
439: PetscOptionsEnd();
440: units->Pascal = units->kilogram / (units->meter * PetscSqr(units->second));
441: units->year = 31556926. * units->second, /* seconds per year */
443: thi->Lx = 10.e3;
444: thi->Ly = 10.e3;
445: thi->Lz = 1000;
446: thi->dirichlet_scale = 1;
447: thi->verbose = PETSC_FALSE;
449: PetscOptionsBegin(comm,NULL,"Toy Hydrostatic Ice options","");
450: {
451: QuadratureType quad = QUAD_GAUSS;
452: char homexp[] = "A";
453: char mtype[256] = MATSBAIJ;
454: PetscReal L,m = 1.0;
455: PetscBool flg;
456: L = thi->Lx;
457: PetscOptionsReal("-thi_L","Domain size (m)","",L,&L,&flg);
458: if (flg) thi->Lx = thi->Ly = L;
459: PetscOptionsReal("-thi_Lx","X Domain size (m)","",thi->Lx,&thi->Lx,NULL);
460: PetscOptionsReal("-thi_Ly","Y Domain size (m)","",thi->Ly,&thi->Ly,NULL);
461: PetscOptionsReal("-thi_Lz","Z Domain size (m)","",thi->Lz,&thi->Lz,NULL);
462: PetscOptionsString("-thi_hom","ISMIP-HOM experiment (A or C)","",homexp,homexp,sizeof(homexp),NULL);
463: switch (homexp[0] = toupper(homexp[0])) {
464: case 'A':
465: thi->initialize = THIInitialize_HOM_A;
466: thi->no_slip = PETSC_TRUE;
467: thi->alpha = 0.5;
468: break;
469: case 'C':
470: thi->initialize = THIInitialize_HOM_C;
471: thi->no_slip = PETSC_FALSE;
472: thi->alpha = 0.1;
473: break;
474: case 'X':
475: thi->initialize = THIInitialize_HOM_X;
476: thi->no_slip = PETSC_FALSE;
477: thi->alpha = 0.3;
478: break;
479: case 'Y':
480: thi->initialize = THIInitialize_HOM_Y;
481: thi->no_slip = PETSC_FALSE;
482: thi->alpha = 0.5;
483: break;
484: case 'Z':
485: thi->initialize = THIInitialize_HOM_Z;
486: thi->no_slip = PETSC_FALSE;
487: thi->alpha = 0.5;
488: break;
489: default:
490: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"HOM experiment '%c' not implemented",homexp[0]);
491: }
492: PetscOptionsEnum("-thi_quadrature","Quadrature to use for 3D elements","",QuadratureTypes,(PetscEnum)quad,(PetscEnum*)&quad,NULL);
493: switch (quad) {
494: case QUAD_GAUSS:
495: HexQInterp = HexQInterp_Gauss;
496: HexQDeriv = HexQDeriv_Gauss;
497: break;
498: case QUAD_LOBATTO:
499: HexQInterp = HexQInterp_Lobatto;
500: HexQDeriv = HexQDeriv_Lobatto;
501: break;
502: }
503: PetscOptionsReal("-thi_alpha","Bed angle (degrees)","",thi->alpha,&thi->alpha,NULL);
504: thi->friction.refvel = 100.;
505: thi->friction.epsvel = 1.;
506: PetscOptionsReal("-thi_friction_refvel","Reference velocity for sliding","",thi->friction.refvel,&thi->friction.refvel,NULL);
507: PetscOptionsReal("-thi_friction_epsvel","Regularization velocity for sliding","",thi->friction.epsvel,&thi->friction.epsvel,NULL);
508: PetscOptionsReal("-thi_friction_m","Friction exponent, 0=Coulomb, 1=Navier","",m,&m,NULL);
509: thi->friction.exponent = (m-1)/2;
510: PetscOptionsReal("-thi_dirichlet_scale","Scale Dirichlet boundary conditions by this factor","",thi->dirichlet_scale,&thi->dirichlet_scale,NULL);
511: PetscOptionsReal("-thi_ssa_friction_scale","Scale slip boundary conditions by this factor in SSA (2D) assembly","",thi->ssa_friction_scale,&thi->ssa_friction_scale,NULL);
512: PetscOptionsBool("-thi_coarse2d","Use a 2D coarse space corresponding to SSA","",thi->coarse2d,&thi->coarse2d,NULL);
513: PetscOptionsBool("-thi_tridiagonal","Assemble a tridiagonal system (column coupling only) on the finest level","",thi->tridiagonal,&thi->tridiagonal,NULL);
514: PetscOptionsList("-thi_mat_type","Matrix type","MatSetType",MatList,mtype,(char*)mtype,sizeof(mtype),NULL);
515: PetscStrallocpy(mtype,&thi->mattype);
516: PetscOptionsBool("-thi_verbose","Enable verbose output (like matrix sizes and statistics)","",thi->verbose,&thi->verbose,NULL);
517: }
518: PetscOptionsEnd();
520: /* dimensionalize */
521: thi->Lx *= units->meter;
522: thi->Ly *= units->meter;
523: thi->Lz *= units->meter;
524: thi->alpha *= PETSC_PI / 180;
526: PRangeClear(&thi->eta);
527: PRangeClear(&thi->beta2);
529: {
530: PetscReal u = 1000*units->meter/(3e7*units->second),
531: gradu = u / (100*units->meter),eta,deta,
532: rho = 910 * units->kilogram/pow(units->meter,3),
533: grav = 9.81 * units->meter/PetscSqr(units->second),
534: driving = rho * grav * sin(thi->alpha) * 1000*units->meter;
535: THIViscosity(thi,0.5*gradu*gradu,&eta,&deta);
536: thi->rhog = rho * grav;
537: if (thi->verbose) {
538: PetscPrintf(((PetscObject)thi)->comm,"Units: meter %8.2g second %8.2g kg %8.2g Pa %8.2g\n",units->meter,units->second,units->kilogram,units->Pascal);
539: PetscPrintf(((PetscObject)thi)->comm,"Domain (%6.2g,%6.2g,%6.2g), pressure %8.2g, driving stress %8.2g\n",thi->Lx,thi->Ly,thi->Lz,rho*grav*1e3*units->meter,driving);
540: PetscPrintf(((PetscObject)thi)->comm,"Large velocity 1km/a %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n",u,gradu,eta,2*eta*gradu,2*eta*gradu/driving);
541: THIViscosity(thi,0.5*PetscSqr(1e-3*gradu),&eta,&deta);
542: PetscPrintf(((PetscObject)thi)->comm,"Small velocity 1m/a %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n",1e-3*u,1e-3*gradu,eta,2*eta*1e-3*gradu,2*eta*1e-3*gradu/driving);
543: }
544: }
546: *inthi = thi;
547: return(0);
548: }
552: static PetscErrorCode THIInitializePrm(THI thi,DM da2prm,Vec prm)
553: {
554: PrmNode **p;
555: PetscInt i,j,xs,xm,ys,ym,mx,my;
559: DMDAGetGhostCorners(da2prm,&ys,&xs,0,&ym,&xm,0);
560: DMDAGetInfo(da2prm,0, &my,&mx,0, 0,0,0, 0,0,0,0,0,0);
561: DMDAVecGetArray(da2prm,prm,&p);
562: for (i=xs; i<xs+xm; i++) {
563: for (j=ys; j<ys+ym; j++) {
564: PetscReal xx = thi->Lx*i/mx,yy = thi->Ly*j/my;
565: thi->initialize(thi,xx,yy,&p[i][j]);
566: }
567: }
568: DMDAVecRestoreArray(da2prm,prm,&p);
569: return(0);
570: }
574: static PetscErrorCode THISetUpDM(THI thi,DM dm)
575: {
577: PetscInt refinelevel,coarsenlevel,level,dim,Mx,My,Mz,mx,my,s;
578: DMDAStencilType st;
579: DM da2prm;
580: Vec X;
583: DMDAGetInfo(dm,&dim, &Mz,&My,&Mx, 0,&my,&mx, 0,&s,0,0,0,&st);
584: if (dim == 2) {
585: DMDAGetInfo(dm,&dim, &My,&Mx,0, &my,&mx,0, 0,&s,0,0,0,&st);
586: }
587: DMGetRefineLevel(dm,&refinelevel);
588: DMGetCoarsenLevel(dm,&coarsenlevel);
589: level = refinelevel - coarsenlevel;
590: DMDACreate2d(((PetscObject)thi)->comm,DMDA_BOUNDARY_PERIODIC,DMDA_BOUNDARY_PERIODIC,st,My,Mx,my,mx,sizeof(PrmNode)/sizeof(PetscScalar),s,0,0,&da2prm);
591: DMCreateLocalVector(da2prm,&X);
592: {
593: PetscReal Lx = thi->Lx / thi->units->meter,Ly = thi->Ly / thi->units->meter,Lz = thi->Lz / thi->units->meter;
594: if (dim == 2) {
595: PetscPrintf(((PetscObject)thi)->comm,"Level %D domain size (m) %8.2g x %8.2g, num elements %3d x %3d (%8d), size (m) %g x %g\n",level,Lx,Ly,Mx,My,Mx*My,Lx/Mx,Ly/My);
596: } else {
597: PetscPrintf(((PetscObject)thi)->comm,"Level %D domain size (m) %8.2g x %8.2g x %8.2g, num elements %3d x %3d x %3d (%8d), size (m) %g x %g x %g\n",level,Lx,Ly,Lz,Mx,My,Mz,Mx*My*Mz,Lx/Mx,Ly/My,1000./(Mz-1));
598: }
599: }
600: THIInitializePrm(thi,da2prm,X);
601: if (thi->tridiagonal) { /* Reset coarse Jacobian evaluation */
602: DMDASNESSetJacobianLocal(dm,(DMDASNESJacobianLocal)THIJacobianLocal_3D_Full,thi);
603: }
604: if (thi->coarse2d) {
605: DMDASNESSetJacobianLocal(dm,(DMDASNESJacobianLocal)THIJacobianLocal_2D,thi);
606: }
607: PetscObjectCompose((PetscObject)dm,"DMDA2Prm",(PetscObject)da2prm);
608: PetscObjectCompose((PetscObject)dm,"DMDA2Prm_Vec",(PetscObject)X);
609: DMDestroy(&da2prm);
610: VecDestroy(&X);
611: return(0);
612: }
616: static PetscErrorCode DMCoarsenHook_THI(DM dmf,DM dmc,void *ctx)
617: {
618: THI thi = (THI)ctx;
620: PetscInt rlevel,clevel;
623: THISetUpDM(thi,dmc);
624: DMGetRefineLevel(dmc,&rlevel);
625: DMGetCoarsenLevel(dmc,&clevel);
626: if (rlevel-clevel == 0) {DMSetMatType(dmc,MATAIJ);}
627: DMCoarsenHookAdd(dmc,DMCoarsenHook_THI,PETSC_NULL,thi);
628: return(0);
629: }
633: static PetscErrorCode DMRefineHook_THI(DM dmc,DM dmf,void *ctx)
634: {
635: THI thi = (THI)ctx;
639: THISetUpDM(thi,dmf);
640: DMSetMatType(dmf,thi->mattype);
641: DMRefineHookAdd(dmf,DMRefineHook_THI,PETSC_NULL,thi);
642: /* With grid sequencing, a formerly-refined DM will later be coarsened by PCSetUp_MG */
643: DMCoarsenHookAdd(dmf,DMCoarsenHook_THI,PETSC_NULL,thi);
644: return(0);
645: }
649: static PetscErrorCode THIDAGetPrm(DM da,PrmNode ***prm)
650: {
652: DM da2prm;
653: Vec X;
656: PetscObjectQuery((PetscObject)da,"DMDA2Prm",(PetscObject*)&da2prm);
657: if (!da2prm) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"No DMDA2Prm composed with given DMDA");
658: PetscObjectQuery((PetscObject)da,"DMDA2Prm_Vec",(PetscObject*)&X);
659: if (!X) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"No DMDA2Prm_Vec composed with given DMDA");
660: DMDAVecGetArray(da2prm,X,prm);
661: return(0);
662: }
666: static PetscErrorCode THIDARestorePrm(DM da,PrmNode ***prm)
667: {
669: DM da2prm;
670: Vec X;
673: PetscObjectQuery((PetscObject)da,"DMDA2Prm",(PetscObject*)&da2prm);
674: if (!da2prm) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"No DMDA2Prm composed with given DMDA");
675: PetscObjectQuery((PetscObject)da,"DMDA2Prm_Vec",(PetscObject*)&X);
676: if (!X) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"No DMDA2Prm_Vec composed with given DMDA");
677: DMDAVecRestoreArray(da2prm,X,prm);
678: return(0);
679: }
683: static PetscErrorCode THIInitial(DM da,Vec X)
684: {
685: THI thi;
686: PetscInt i,j,k,xs,xm,ys,ym,zs,zm,mx,my;
687: PetscReal hx,hy;
688: PrmNode **prm;
689: Node ***x;
693: DMGetApplicationContext(da,&thi);
694: DMDAGetInfo(da,0, 0,&my,&mx, 0,0,0, 0,0,0,0,0,0);
695: DMDAGetCorners(da,&zs,&ys,&xs,&zm,&ym,&xm);
696: DMDAVecGetArray(da,X,&x);
697: THIDAGetPrm(da,&prm);
698: hx = thi->Lx / mx;
699: hy = thi->Ly / my;
700: for (i=xs; i<xs+xm; i++) {
701: for (j=ys; j<ys+ym; j++) {
702: for (k=zs; k<zs+zm; k++) {
703: const PetscScalar zm1 = zm-1,
704: drivingx = thi->rhog * (prm[i+1][j].b+prm[i+1][j].h - prm[i-1][j].b-prm[i-1][j].h) / (2*hx),
705: drivingy = thi->rhog * (prm[i][j+1].b+prm[i][j+1].h - prm[i][j-1].b-prm[i][j-1].h) / (2*hy);
706: x[i][j][k].u = 0. * drivingx * prm[i][j].h*(PetscScalar)k/zm1;
707: x[i][j][k].v = 0. * drivingy * prm[i][j].h*(PetscScalar)k/zm1;
708: }
709: }
710: }
711: DMDAVecRestoreArray(da,X,&x);
712: THIDARestorePrm(da,&prm);
713: return(0);
714: }
716: static void PointwiseNonlinearity(THI thi,const Node n[restrict],const PetscReal phi[restrict],PetscReal dphi[restrict][3],PetscScalar *restrict u,PetscScalar *restrict v,PetscScalar du[restrict],PetscScalar dv[restrict],PetscReal *eta,PetscReal *deta)
717: {
718: PetscInt l,ll;
719: PetscScalar gam;
721: du[0] = du[1] = du[2] = 0;
722: dv[0] = dv[1] = dv[2] = 0;
723: *u = 0;
724: *v = 0;
725: for (l=0; l<8; l++) {
726: *u += phi[l] * n[l].u;
727: *v += phi[l] * n[l].v;
728: for (ll=0; ll<3; ll++) {
729: du[ll] += dphi[l][ll] * n[l].u;
730: dv[ll] += dphi[l][ll] * n[l].v;
731: }
732: }
733: gam = Sqr(du[0]) + Sqr(dv[1]) + du[0]*dv[1] + 0.25*Sqr(du[1]+dv[0]) + 0.25*Sqr(du[2]) + 0.25*Sqr(dv[2]);
734: THIViscosity(thi,PetscRealPart(gam),eta,deta);
735: }
737: static void PointwiseNonlinearity2D(THI thi,Node n[],PetscReal phi[],PetscReal dphi[4][2],PetscScalar *u,PetscScalar *v,PetscScalar du[],PetscScalar dv[],PetscReal *eta,PetscReal *deta)
738: {
739: PetscInt l,ll;
740: PetscScalar gam;
742: du[0] = du[1] = 0;
743: dv[0] = dv[1] = 0;
744: *u = 0;
745: *v = 0;
746: for (l=0; l<4; l++) {
747: *u += phi[l] * n[l].u;
748: *v += phi[l] * n[l].v;
749: for (ll=0; ll<2; ll++) {
750: du[ll] += dphi[l][ll] * n[l].u;
751: dv[ll] += dphi[l][ll] * n[l].v;
752: }
753: }
754: gam = Sqr(du[0]) + Sqr(dv[1]) + du[0]*dv[1] + 0.25*Sqr(du[1]+dv[0]);
755: THIViscosity(thi,PetscRealPart(gam),eta,deta);
756: }
760: static PetscErrorCode THIFunctionLocal(DMDALocalInfo *info,Node ***x,Node ***f,THI thi)
761: {
762: PetscInt xs,ys,xm,ym,zm,i,j,k,q,l;
763: PetscReal hx,hy,etamin,etamax,beta2min,beta2max;
764: PrmNode **prm;
768: xs = info->zs;
769: ys = info->ys;
770: xm = info->zm;
771: ym = info->ym;
772: zm = info->xm;
773: hx = thi->Lx / info->mz;
774: hy = thi->Ly / info->my;
776: etamin = 1e100;
777: etamax = 0;
778: beta2min = 1e100;
779: beta2max = 0;
781: THIDAGetPrm(info->da,&prm);
783: for (i=xs; i<xs+xm; i++) {
784: for (j=ys; j<ys+ym; j++) {
785: PrmNode pn[4];
786: QuadExtract(prm,i,j,pn);
787: for (k=0; k<zm-1; k++) {
788: PetscInt ls = 0;
789: Node n[8],*fn[8];
790: PetscReal zn[8],etabase = 0;
791: PrmHexGetZ(pn,k,zm,zn);
792: HexExtract(x,i,j,k,n);
793: HexExtractRef(f,i,j,k,fn);
794: if (thi->no_slip && k == 0) {
795: for (l=0; l<4; l++) n[l].u = n[l].v = 0;
796: /* The first 4 basis functions lie on the bottom layer, so their contribution is exactly 0, hence we can skip them */
797: ls = 4;
798: }
799: for (q=0; q<8; q++) {
800: PetscReal dz[3],phi[8],dphi[8][3],jw,eta,deta;
801: PetscScalar du[3],dv[3],u,v;
802: HexGrad(HexQDeriv[q],zn,dz);
803: HexComputeGeometry(q,hx,hy,dz,phi,dphi,&jw);
804: PointwiseNonlinearity(thi,n,phi,dphi,&u,&v,du,dv,&eta,&deta);
805: jw /= thi->rhog; /* scales residuals to be O(1) */
806: if (q == 0) etabase = eta;
807: RangeUpdate(&etamin,&etamax,eta);
808: for (l=ls; l<8; l++) { /* test functions */
809: const PetscReal ds[2] = {-sin(thi->alpha),0};
810: const PetscReal pp=phi[l],*dp = dphi[l];
811: fn[l]->u += dp[0]*jw*eta*(4.*du[0]+2.*dv[1]) + dp[1]*jw*eta*(du[1]+dv[0]) + dp[2]*jw*eta*du[2] + pp*jw*thi->rhog*ds[0];
812: fn[l]->v += dp[1]*jw*eta*(2.*du[0]+4.*dv[1]) + dp[0]*jw*eta*(du[1]+dv[0]) + dp[2]*jw*eta*dv[2] + pp*jw*thi->rhog*ds[1];
813: }
814: }
815: if (k == 0) { /* we are on a bottom face */
816: if (thi->no_slip) {
817: /* Note: Non-Galerkin coarse grid operators are very sensitive to the scaling of Dirichlet boundary
818: * conditions. After shenanigans above, etabase contains the effective viscosity at the closest quadrature
819: * point to the bed. We want the diagonal entry in the Dirichlet condition to have similar magnitude to the
820: * diagonal entry corresponding to the adjacent node. The fundamental scaling of the viscous part is in
821: * diagu, diagv below. This scaling is easy to recognize by considering the finite difference operator after
822: * scaling by element size. The no-slip Dirichlet condition is scaled by this factor, and also in the
823: * assembled matrix (see the similar block in THIJacobianLocal).
824: *
825: * Note that the residual at this Dirichlet node is linear in the state at this node, but also depends
826: * (nonlinearly in general) on the neighboring interior nodes through the local viscosity. This will make
827: * a matrix-free Jacobian have extra entries in the corresponding row. We assemble only the diagonal part,
828: * so the solution will exactly satisfy the boundary condition after the first linear iteration.
829: */
830: const PetscReal hz = PetscRealPart(pn[0].h)/(zm-1.);
831: const PetscScalar diagu = 2*etabase/thi->rhog*(hx*hy/hz + hx*hz/hy + 4*hy*hz/hx),diagv = 2*etabase/thi->rhog*(hx*hy/hz + 4*hx*hz/hy + hy*hz/hx);
832: fn[0]->u = thi->dirichlet_scale*diagu*x[i][j][k].u;
833: fn[0]->v = thi->dirichlet_scale*diagv*x[i][j][k].v;
834: } else { /* Integrate over bottom face to apply boundary condition */
835: for (q=0; q<4; q++) {
836: const PetscReal jw = 0.25*hx*hy/thi->rhog,*phi = QuadQInterp[q];
837: PetscScalar u=0,v=0,rbeta2=0;
838: PetscReal beta2,dbeta2;
839: for (l=0; l<4; l++) {
840: u += phi[l]*n[l].u;
841: v += phi[l]*n[l].v;
842: rbeta2 += phi[l]*pn[l].beta2;
843: }
844: THIFriction(thi,PetscRealPart(rbeta2),PetscRealPart(u*u+v*v)/2,&beta2,&dbeta2);
845: RangeUpdate(&beta2min,&beta2max,beta2);
846: for (l=0; l<4; l++) {
847: const PetscReal pp = phi[l];
848: fn[ls+l]->u += pp*jw*beta2*u;
849: fn[ls+l]->v += pp*jw*beta2*v;
850: }
851: }
852: }
853: }
854: }
855: }
856: }
858: THIDARestorePrm(info->da,&prm);
860: PRangeMinMax(&thi->eta,etamin,etamax);
861: PRangeMinMax(&thi->beta2,beta2min,beta2max);
862: return(0);
863: }
867: static PetscErrorCode THIMatrixStatistics(THI thi,Mat B,PetscViewer viewer)
868: {
870: PetscReal nrm;
871: PetscInt m;
872: PetscMPIInt rank;
875: MatNorm(B,NORM_FROBENIUS,&nrm);
876: MatGetSize(B,&m,0);
877: MPI_Comm_rank(((PetscObject)B)->comm,&rank);
878: if (!rank) {
879: PetscScalar val0,val2;
880: MatGetValue(B,0,0,&val0);
881: MatGetValue(B,2,2,&val2);
882: PetscViewerASCIIPrintf(viewer,"Matrix dim %8d norm %8.2e (0,0) %8.2e (2,2) %8.2e %8.2e <= eta <= %8.2e %8.2e <= beta2 <= %8.2e\n",m,nrm,PetscRealPart(val0),PetscRealPart(val2),thi->eta.cmin,thi->eta.cmax,thi->beta2.cmin,thi->beta2.cmax);
883: }
884: return(0);
885: }
889: static PetscErrorCode THISurfaceStatistics(DM da,Vec X,PetscReal *min,PetscReal *max,PetscReal *mean)
890: {
892: Node ***x;
893: PetscInt i,j,xs,ys,zs,xm,ym,zm,mx,my,mz;
894: PetscReal umin = 1e100,umax=-1e100;
895: PetscScalar usum=0.0,gusum;
898: *min = *max = *mean = 0;
899: DMDAGetInfo(da,0, &mz,&my,&mx, 0,0,0, 0,0,0,0,0,0);
900: DMDAGetCorners(da,&zs,&ys,&xs,&zm,&ym,&xm);
901: if (zs != 0 || zm != mz) SETERRQ(PETSC_COMM_SELF,1,"Unexpected decomposition");
902: DMDAVecGetArray(da,X,&x);
903: for (i=xs; i<xs+xm; i++) {
904: for (j=ys; j<ys+ym; j++) {
905: PetscReal u = PetscRealPart(x[i][j][zm-1].u);
906: RangeUpdate(&umin,&umax,u);
907: usum += u;
908: }
909: }
910: DMDAVecRestoreArray(da,X,&x);
911: MPI_Allreduce(&umin,min,1,MPIU_REAL,MPIU_MIN,((PetscObject)da)->comm);
912: MPI_Allreduce(&umax,max,1,MPIU_REAL,MPIU_MAX,((PetscObject)da)->comm);
913: MPI_Allreduce(&usum,&gusum,1,MPIU_SCALAR,MPIU_SUM,((PetscObject)da)->comm);
914: *mean = PetscRealPart(gusum) / (mx*my);
915: return(0);
916: }
920: static PetscErrorCode THISolveStatistics(THI thi,SNES snes,PetscInt coarsened,const char name[])
921: {
922: MPI_Comm comm = ((PetscObject)thi)->comm;
923: Vec X;
924: DM dm;
928: SNESGetSolution(snes,&X);
929: SNESGetDM(snes,&dm);
930: PetscPrintf(comm,"Solution statistics after solve: %s\n",name);
931: {
932: PetscInt its,lits;
933: SNESConvergedReason reason;
934: SNESGetIterationNumber(snes,&its);
935: SNESGetConvergedReason(snes,&reason);
936: SNESGetLinearSolveIterations(snes,&lits);
937: PetscPrintf(comm,"%s: Number of SNES iterations = %d, total linear iterations = %d\n",SNESConvergedReasons[reason],its,lits);
938: }
939: {
940: PetscReal nrm2,tmin[3]={1e100,1e100,1e100},tmax[3]={-1e100,-1e100,-1e100},min[3],max[3];
941: PetscInt i,j,m;
942: PetscScalar *x;
943: VecNorm(X,NORM_2,&nrm2);
944: VecGetLocalSize(X,&m);
945: VecGetArray(X,&x);
946: for (i=0; i<m; i+=2) {
947: PetscReal u = PetscRealPart(x[i]),v = PetscRealPart(x[i+1]),c = sqrt(u*u+v*v);
948: tmin[0] = PetscMin(u,tmin[0]);
949: tmin[1] = PetscMin(v,tmin[1]);
950: tmin[2] = PetscMin(c,tmin[2]);
951: tmax[0] = PetscMax(u,tmax[0]);
952: tmax[1] = PetscMax(v,tmax[1]);
953: tmax[2] = PetscMax(c,tmax[2]);
954: }
955: VecRestoreArray(X,&x);
956: MPI_Allreduce(tmin,min,3,MPIU_REAL,MPIU_MIN,((PetscObject)thi)->comm);
957: MPI_Allreduce(tmax,max,3,MPIU_REAL,MPIU_MAX,((PetscObject)thi)->comm);
958: /* Dimensionalize to meters/year */
959: nrm2 *= thi->units->year / thi->units->meter;
960: for (j=0; j<3; j++) {
961: min[j] *= thi->units->year / thi->units->meter;
962: max[j] *= thi->units->year / thi->units->meter;
963: }
964: PetscPrintf(comm,"|X|_2 %g %g <= u <= %g %g <= v <= %g %g <= c <= %g \n",nrm2,min[0],max[0],min[1],max[1],min[2],max[2]);
965: {
966: PetscReal umin,umax,umean;
967: THISurfaceStatistics(dm,X,&umin,&umax,&umean);
968: umin *= thi->units->year / thi->units->meter;
969: umax *= thi->units->year / thi->units->meter;
970: umean *= thi->units->year / thi->units->meter;
971: PetscPrintf(comm,"Surface statistics: u in [%12.6e, %12.6e] mean %12.6e\n",umin,umax,umean);
972: }
973: /* These values stay nondimensional */
974: PetscPrintf(comm,"Global eta range %g to %g converged range %g to %g\n",thi->eta.min,thi->eta.max,thi->eta.cmin,thi->eta.cmax);
975: PetscPrintf(comm,"Global beta2 range %g to %g converged range %g to %g\n",thi->beta2.min,thi->beta2.max,thi->beta2.cmin,thi->beta2.cmax);
976: }
977: PetscPrintf(comm,"\n");
978: return(0);
979: }
983: static PetscErrorCode THIJacobianLocal_2D(DMDALocalInfo *info,Node **x,Mat B,THI thi)
984: {
985: PetscInt xs,ys,xm,ym,i,j,q,l,ll;
986: PetscReal hx,hy;
987: PrmNode **prm;
991: xs = info->ys;
992: ys = info->xs;
993: xm = info->ym;
994: ym = info->xm;
995: hx = thi->Lx / info->my;
996: hy = thi->Ly / info->mx;
998: MatZeroEntries(B);
999: THIDAGetPrm(info->da,&prm);
1001: for (i=xs; i<xs+xm; i++) {
1002: for (j=ys; j<ys+ym; j++) {
1003: Node n[4];
1004: PrmNode pn[4];
1005: PetscScalar Ke[4*2][4*2];
1006: QuadExtract(prm,i,j,pn);
1007: QuadExtract(x,i,j,n);
1008: PetscMemzero(Ke,sizeof(Ke));
1009: for (q=0; q<4; q++) {
1010: PetscReal phi[4],dphi[4][2],jw,eta,deta,beta2,dbeta2;
1011: PetscScalar u,v,du[2],dv[2],h = 0,rbeta2 = 0;
1012: for (l=0; l<4; l++) {
1013: phi[l] = QuadQInterp[q][l];
1014: dphi[l][0] = QuadQDeriv[q][l][0]*2./hx;
1015: dphi[l][1] = QuadQDeriv[q][l][1]*2./hy;
1016: h += phi[l] * pn[l].h;
1017: rbeta2 += phi[l] * pn[l].beta2;
1018: }
1019: jw = 0.25*hx*hy / thi->rhog; /* rhog is only scaling */
1020: PointwiseNonlinearity2D(thi,n,phi,dphi,&u,&v,du,dv,&eta,&deta);
1021: THIFriction(thi,PetscRealPart(rbeta2),PetscRealPart(u*u+v*v)/2,&beta2,&dbeta2);
1022: for (l=0; l<4; l++) {
1023: const PetscReal pp = phi[l],*dp = dphi[l];
1024: for (ll=0; ll<4; ll++) {
1025: const PetscReal ppl = phi[ll],*dpl = dphi[ll];
1026: PetscScalar dgdu,dgdv;
1027: dgdu = 2.*du[0]*dpl[0] + dv[1]*dpl[0] + 0.5*(du[1]+dv[0])*dpl[1];
1028: dgdv = 2.*dv[1]*dpl[1] + du[0]*dpl[1] + 0.5*(du[1]+dv[0])*dpl[0];
1029: /* Picard part */
1030: Ke[l*2+0][ll*2+0] += dp[0]*jw*eta*4.*dpl[0] + dp[1]*jw*eta*dpl[1] + pp*jw*(beta2/h)*ppl*thi->ssa_friction_scale;
1031: Ke[l*2+0][ll*2+1] += dp[0]*jw*eta*2.*dpl[1] + dp[1]*jw*eta*dpl[0];
1032: Ke[l*2+1][ll*2+0] += dp[1]*jw*eta*2.*dpl[0] + dp[0]*jw*eta*dpl[1];
1033: Ke[l*2+1][ll*2+1] += dp[1]*jw*eta*4.*dpl[1] + dp[0]*jw*eta*dpl[0] + pp*jw*(beta2/h)*ppl*thi->ssa_friction_scale;
1034: /* extra Newton terms */
1035: Ke[l*2+0][ll*2+0] += dp[0]*jw*deta*dgdu*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdu*(du[1]+dv[0]) + pp*jw*(dbeta2/h)*u*u*ppl*thi->ssa_friction_scale;
1036: Ke[l*2+0][ll*2+1] += dp[0]*jw*deta*dgdv*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdv*(du[1]+dv[0]) + pp*jw*(dbeta2/h)*u*v*ppl*thi->ssa_friction_scale;
1037: Ke[l*2+1][ll*2+0] += dp[1]*jw*deta*dgdu*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdu*(du[1]+dv[0]) + pp*jw*(dbeta2/h)*v*u*ppl*thi->ssa_friction_scale;
1038: Ke[l*2+1][ll*2+1] += dp[1]*jw*deta*dgdv*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdv*(du[1]+dv[0]) + pp*jw*(dbeta2/h)*v*v*ppl*thi->ssa_friction_scale;
1039: }
1040: }
1041: }
1042: {
1043: const MatStencil rc[4] = {{0,i,j,0},{0,i+1,j,0},{0,i+1,j+1,0},{0,i,j+1,0}};
1044: MatSetValuesBlockedStencil(B,4,rc,4,rc,&Ke[0][0],ADD_VALUES);
1045: }
1046: }
1047: }
1048: THIDARestorePrm(info->da,&prm);
1050: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1051: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1052: MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);
1053: if (thi->verbose) {THIMatrixStatistics(thi,B,PETSC_VIEWER_STDOUT_WORLD);}
1054: return(0);
1055: }
1059: static PetscErrorCode THIJacobianLocal_3D(DMDALocalInfo *info,Node ***x,Mat B,THI thi,THIAssemblyMode amode)
1060: {
1061: PetscInt xs,ys,xm,ym,zm,i,j,k,q,l,ll;
1062: PetscReal hx,hy;
1063: PrmNode **prm;
1067: xs = info->zs;
1068: ys = info->ys;
1069: xm = info->zm;
1070: ym = info->ym;
1071: zm = info->xm;
1072: hx = thi->Lx / info->mz;
1073: hy = thi->Ly / info->my;
1075: MatZeroEntries(B);
1076: THIDAGetPrm(info->da,&prm);
1078: for (i=xs; i<xs+xm; i++) {
1079: for (j=ys; j<ys+ym; j++) {
1080: PrmNode pn[4];
1081: QuadExtract(prm,i,j,pn);
1082: for (k=0; k<zm-1; k++) {
1083: Node n[8];
1084: PetscReal zn[8],etabase = 0;
1085: PetscScalar Ke[8*2][8*2];
1086: PetscInt ls = 0;
1088: PrmHexGetZ(pn,k,zm,zn);
1089: HexExtract(x,i,j,k,n);
1090: PetscMemzero(Ke,sizeof(Ke));
1091: if (thi->no_slip && k == 0) {
1092: for (l=0; l<4; l++) n[l].u = n[l].v = 0;
1093: ls = 4;
1094: }
1095: for (q=0; q<8; q++) {
1096: PetscReal dz[3],phi[8],dphi[8][3],jw,eta,deta;
1097: PetscScalar du[3],dv[3],u,v;
1098: HexGrad(HexQDeriv[q],zn,dz);
1099: HexComputeGeometry(q,hx,hy,dz,phi,dphi,&jw);
1100: PointwiseNonlinearity(thi,n,phi,dphi,&u,&v,du,dv,&eta,&deta);
1101: jw /= thi->rhog; /* residuals are scaled by this factor */
1102: if (q == 0) etabase = eta;
1103: for (l=ls; l<8; l++) { /* test functions */
1104: const PetscReal *restrict dp = dphi[l];
1105: #if USE_SSE2_KERNELS
1106: /* gcc (up to my 4.5 snapshot) is really bad at hoisting intrinsics so we do it manually */
1107: __m128d
1108: p4 = _mm_set1_pd(4),p2 = _mm_set1_pd(2),p05 = _mm_set1_pd(0.5),
1109: p42 = _mm_setr_pd(4,2),p24 = _mm_shuffle_pd(p42,p42,_MM_SHUFFLE2(0,1)),
1110: du0 = _mm_set1_pd(du[0]),du1 = _mm_set1_pd(du[1]),du2 = _mm_set1_pd(du[2]),
1111: dv0 = _mm_set1_pd(dv[0]),dv1 = _mm_set1_pd(dv[1]),dv2 = _mm_set1_pd(dv[2]),
1112: jweta = _mm_set1_pd(jw*eta),jwdeta = _mm_set1_pd(jw*deta),
1113: dp0 = _mm_set1_pd(dp[0]),dp1 = _mm_set1_pd(dp[1]),dp2 = _mm_set1_pd(dp[2]),
1114: dp0jweta = _mm_mul_pd(dp0,jweta),dp1jweta = _mm_mul_pd(dp1,jweta),dp2jweta = _mm_mul_pd(dp2,jweta),
1115: p4du0p2dv1 = _mm_add_pd(_mm_mul_pd(p4,du0),_mm_mul_pd(p2,dv1)), /* 4 du0 + 2 dv1 */
1116: p4dv1p2du0 = _mm_add_pd(_mm_mul_pd(p4,dv1),_mm_mul_pd(p2,du0)), /* 4 dv1 + 2 du0 */
1117: pdu2dv2 = _mm_unpacklo_pd(du2,dv2), /* [du2, dv2] */
1118: du1pdv0 = _mm_add_pd(du1,dv0), /* du1 + dv0 */
1119: t1 = _mm_mul_pd(dp0,p4du0p2dv1), /* dp0 (4 du0 + 2 dv1) */
1120: t2 = _mm_mul_pd(dp1,p4dv1p2du0); /* dp1 (4 dv1 + 2 du0) */
1122: #endif
1123: #if defined COMPUTE_LOWER_TRIANGULAR /* The element matrices are always symmetric so computing the lower-triangular part is not necessary */
1124: for (ll=ls; ll<8; ll++) { /* trial functions */
1125: #else
1126: for (ll=l; ll<8; ll++) {
1127: #endif
1128: const PetscReal *restrict dpl = dphi[ll];
1129: if (amode == THIASSEMBLY_TRIDIAGONAL && (l-ll)%4) continue; /* these entries would not be inserted */
1130: #if !USE_SSE2_KERNELS
1131: /* The analytic Jacobian in nice, easy-to-read form */
1132: {
1133: PetscScalar dgdu,dgdv;
1134: dgdu = 2.*du[0]*dpl[0] + dv[1]*dpl[0] + 0.5*(du[1]+dv[0])*dpl[1] + 0.5*du[2]*dpl[2];
1135: dgdv = 2.*dv[1]*dpl[1] + du[0]*dpl[1] + 0.5*(du[1]+dv[0])*dpl[0] + 0.5*dv[2]*dpl[2];
1136: /* Picard part */
1137: Ke[l*2+0][ll*2+0] += dp[0]*jw*eta*4.*dpl[0] + dp[1]*jw*eta*dpl[1] + dp[2]*jw*eta*dpl[2];
1138: Ke[l*2+0][ll*2+1] += dp[0]*jw*eta*2.*dpl[1] + dp[1]*jw*eta*dpl[0];
1139: Ke[l*2+1][ll*2+0] += dp[1]*jw*eta*2.*dpl[0] + dp[0]*jw*eta*dpl[1];
1140: Ke[l*2+1][ll*2+1] += dp[1]*jw*eta*4.*dpl[1] + dp[0]*jw*eta*dpl[0] + dp[2]*jw*eta*dpl[2];
1141: /* extra Newton terms */
1142: Ke[l*2+0][ll*2+0] += dp[0]*jw*deta*dgdu*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdu*(du[1]+dv[0]) + dp[2]*jw*deta*dgdu*du[2];
1143: Ke[l*2+0][ll*2+1] += dp[0]*jw*deta*dgdv*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdv*(du[1]+dv[0]) + dp[2]*jw*deta*dgdv*du[2];
1144: Ke[l*2+1][ll*2+0] += dp[1]*jw*deta*dgdu*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdu*(du[1]+dv[0]) + dp[2]*jw*deta*dgdu*dv[2];
1145: Ke[l*2+1][ll*2+1] += dp[1]*jw*deta*dgdv*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdv*(du[1]+dv[0]) + dp[2]*jw*deta*dgdv*dv[2];
1146: }
1147: #else
1148: /* This SSE2 code is an exact replica of above, but uses explicit packed instructions for some speed
1149: * benefit. On my hardware, these intrinsics are almost twice as fast as above, reducing total assembly cost
1150: * by 25 to 30 percent. */
1151: {
1152: __m128d
1153: keu = _mm_loadu_pd(&Ke[l*2+0][ll*2+0]),
1154: kev = _mm_loadu_pd(&Ke[l*2+1][ll*2+0]),
1155: dpl01 = _mm_loadu_pd(&dpl[0]),dpl10 = _mm_shuffle_pd(dpl01,dpl01,_MM_SHUFFLE2(0,1)),dpl2 = _mm_set_sd(dpl[2]),
1156: t0,t3,pdgduv;
1157: keu = _mm_add_pd(keu,_mm_add_pd(_mm_mul_pd(_mm_mul_pd(dp0jweta,p42),dpl01),
1158: _mm_add_pd(_mm_mul_pd(dp1jweta,dpl10),
1159: _mm_mul_pd(dp2jweta,dpl2))));
1160: kev = _mm_add_pd(kev,_mm_add_pd(_mm_mul_pd(_mm_mul_pd(dp1jweta,p24),dpl01),
1161: _mm_add_pd(_mm_mul_pd(dp0jweta,dpl10),
1162: _mm_mul_pd(dp2jweta,_mm_shuffle_pd(dpl2,dpl2,_MM_SHUFFLE2(0,1))))));
1163: pdgduv = _mm_mul_pd(p05,_mm_add_pd(_mm_add_pd(_mm_mul_pd(p42,_mm_mul_pd(du0,dpl01)),
1164: _mm_mul_pd(p24,_mm_mul_pd(dv1,dpl01))),
1165: _mm_add_pd(_mm_mul_pd(du1pdv0,dpl10),
1166: _mm_mul_pd(pdu2dv2,_mm_set1_pd(dpl[2]))))); /* [dgdu, dgdv] */
1167: t0 = _mm_mul_pd(jwdeta,pdgduv); /* jw deta [dgdu, dgdv] */
1168: t3 = _mm_mul_pd(t0,du1pdv0); /* t0 (du1 + dv0) */
1169: _mm_storeu_pd(&Ke[l*2+0][ll*2+0],_mm_add_pd(keu,_mm_add_pd(_mm_mul_pd(t1,t0),
1170: _mm_add_pd(_mm_mul_pd(dp1,t3),
1171: _mm_mul_pd(t0,_mm_mul_pd(dp2,du2))))));
1172: _mm_storeu_pd(&Ke[l*2+1][ll*2+0],_mm_add_pd(kev,_mm_add_pd(_mm_mul_pd(t2,t0),
1173: _mm_add_pd(_mm_mul_pd(dp0,t3),
1174: _mm_mul_pd(t0,_mm_mul_pd(dp2,dv2))))));
1175: }
1176: #endif
1177: }
1178: }
1179: }
1180: if (k == 0) { /* on a bottom face */
1181: if (thi->no_slip) {
1182: const PetscReal hz = PetscRealPart(pn[0].h)/(zm-1);
1183: const PetscScalar diagu = 2*etabase/thi->rhog*(hx*hy/hz + hx*hz/hy + 4*hy*hz/hx),diagv = 2*etabase/thi->rhog*(hx*hy/hz + 4*hx*hz/hy + hy*hz/hx);
1184: Ke[0][0] = thi->dirichlet_scale*diagu;
1185: Ke[1][1] = thi->dirichlet_scale*diagv;
1186: } else {
1187: for (q=0; q<4; q++) {
1188: const PetscReal jw = 0.25*hx*hy/thi->rhog,*phi = QuadQInterp[q];
1189: PetscScalar u=0,v=0,rbeta2=0;
1190: PetscReal beta2,dbeta2;
1191: for (l=0; l<4; l++) {
1192: u += phi[l]*n[l].u;
1193: v += phi[l]*n[l].v;
1194: rbeta2 += phi[l]*pn[l].beta2;
1195: }
1196: THIFriction(thi,PetscRealPart(rbeta2),PetscRealPart(u*u+v*v)/2,&beta2,&dbeta2);
1197: for (l=0; l<4; l++) {
1198: const PetscReal pp = phi[l];
1199: for (ll=0; ll<4; ll++) {
1200: const PetscReal ppl = phi[ll];
1201: Ke[l*2+0][ll*2+0] += pp*jw*beta2*ppl + pp*jw*dbeta2*u*u*ppl;
1202: Ke[l*2+0][ll*2+1] += pp*jw*dbeta2*u*v*ppl;
1203: Ke[l*2+1][ll*2+0] += pp*jw*dbeta2*v*u*ppl;
1204: Ke[l*2+1][ll*2+1] += pp*jw*beta2*ppl + pp*jw*dbeta2*v*v*ppl;
1205: }
1206: }
1207: }
1208: }
1209: }
1210: {
1211: const MatStencil rc[8] = {{i,j,k,0},{i+1,j,k,0},{i+1,j+1,k,0},{i,j+1,k,0},{i,j,k+1,0},{i+1,j,k+1,0},{i+1,j+1,k+1,0},{i,j+1,k+1,0}};
1212: if (amode == THIASSEMBLY_TRIDIAGONAL) {
1213: for (l=0; l<4; l++) { /* Copy out each of the blocks, discarding horizontal coupling */
1214: const PetscInt l4 = l+4;
1215: const MatStencil rcl[2] = {{rc[l].k,rc[l].j,rc[l].i,0},{rc[l4].k,rc[l4].j,rc[l4].i,0}};
1216: #if defined COMPUTE_LOWER_TRIANGULAR
1217: const PetscScalar Kel[4][4] = {{Ke[2*l+0][2*l+0] ,Ke[2*l+0][2*l+1] ,Ke[2*l+0][2*l4+0] ,Ke[2*l+0][2*l4+1]},
1218: {Ke[2*l+1][2*l+0] ,Ke[2*l+1][2*l+1] ,Ke[2*l+1][2*l4+0] ,Ke[2*l+1][2*l4+1]},
1219: {Ke[2*l4+0][2*l+0],Ke[2*l4+0][2*l+1],Ke[2*l4+0][2*l4+0],Ke[2*l4+0][2*l4+1]},
1220: {Ke[2*l4+1][2*l+0],Ke[2*l4+1][2*l+1],Ke[2*l4+1][2*l4+0],Ke[2*l4+1][2*l4+1]}};
1221: #else
1222: /* Same as above except for the lower-left block */
1223: const PetscScalar Kel[4][4] = {{Ke[2*l+0][2*l+0] ,Ke[2*l+0][2*l+1] ,Ke[2*l+0][2*l4+0] ,Ke[2*l+0][2*l4+1]},
1224: {Ke[2*l+1][2*l+0] ,Ke[2*l+1][2*l+1] ,Ke[2*l+1][2*l4+0] ,Ke[2*l+1][2*l4+1]},
1225: {Ke[2*l+0][2*l4+0],Ke[2*l+1][2*l4+0],Ke[2*l4+0][2*l4+0],Ke[2*l4+0][2*l4+1]},
1226: {Ke[2*l+0][2*l4+1],Ke[2*l+1][2*l4+1],Ke[2*l4+1][2*l4+0],Ke[2*l4+1][2*l4+1]}};
1227: #endif
1228: MatSetValuesBlockedStencil(B,2,rcl,2,rcl,&Kel[0][0],ADD_VALUES);
1229: }
1230: } else {
1231: #if !defined COMPUTE_LOWER_TRIANGULAR /* fill in lower-triangular part, this is really cheap compared to computing the entries */
1232: for (l=0; l<8; l++) {
1233: for (ll=l+1; ll<8; ll++) {
1234: Ke[ll*2+0][l*2+0] = Ke[l*2+0][ll*2+0];
1235: Ke[ll*2+1][l*2+0] = Ke[l*2+0][ll*2+1];
1236: Ke[ll*2+0][l*2+1] = Ke[l*2+1][ll*2+0];
1237: Ke[ll*2+1][l*2+1] = Ke[l*2+1][ll*2+1];
1238: }
1239: }
1240: #endif
1241: MatSetValuesBlockedStencil(B,8,rc,8,rc,&Ke[0][0],ADD_VALUES);
1242: }
1243: }
1244: }
1245: }
1246: }
1247: THIDARestorePrm(info->da,&prm);
1249: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1250: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1251: MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);
1252: if (thi->verbose) {THIMatrixStatistics(thi,B,PETSC_VIEWER_STDOUT_WORLD);}
1253: return(0);
1254: }
1258: static PetscErrorCode THIJacobianLocal_3D_Full(DMDALocalInfo *info,Node ***x,Mat A,Mat B,MatStructure *mstr,THI thi)
1259: {
1263: THIJacobianLocal_3D(info,x,B,thi,THIASSEMBLY_FULL);
1264: *mstr = SAME_NONZERO_PATTERN;
1265: return(0);
1266: }
1270: static PetscErrorCode THIJacobianLocal_3D_Tridiagonal(DMDALocalInfo *info,Node ***x,Mat B,THI thi)
1271: {
1275: THIJacobianLocal_3D(info,x,B,thi,THIASSEMBLY_TRIDIAGONAL);
1276: return(0);
1277: }
1281: static PetscErrorCode DMRefineHierarchy_THI(DM dac0,PetscInt nlevels,DM hierarchy[])
1282: {
1284: THI thi;
1285: PetscInt dim,M,N,m,n,s,dof;
1286: DM dac,daf;
1287: DMDAStencilType st;
1288: DM_DA *ddf,*ddc;
1291: PetscObjectQuery((PetscObject)dac0,"THI",(PetscObject*)&thi);
1292: if (!thi) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot refine this DMDA, missing composed THI instance");
1293: if (nlevels > 1) {
1294: DMRefineHierarchy(dac0,nlevels-1,hierarchy);
1295: dac = hierarchy[nlevels-2];
1296: } else {
1297: dac = dac0;
1298: }
1299: DMDAGetInfo(dac,&dim, &N,&M,0, &n,&m,0, &dof,&s,0,0,0,&st);
1300: if (dim != 2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"This function can only refine 2D DMDAs");
1301: /* Creates a 3D DMDA with the same map-plane layout as the 2D one, with contiguous columns */
1302: DMDACreate3d(((PetscObject)dac)->comm,DMDA_BOUNDARY_NONE,DMDA_BOUNDARY_PERIODIC,DMDA_BOUNDARY_PERIODIC,st,thi->zlevels,N,M,1,n,m,dof,s,PETSC_NULL,PETSC_NULL,PETSC_NULL,&daf);
1303: daf->ops->creatematrix = dac->ops->creatematrix;
1304: daf->ops->createinterpolation = dac->ops->createinterpolation;
1305: daf->ops->getcoloring = dac->ops->getcoloring;
1306: ddf = (DM_DA*)daf->data;
1307: ddc = (DM_DA*)dac->data;
1308: ddf->interptype = ddc->interptype;
1310: DMDASetFieldName(daf,0,"x-velocity");
1311: DMDASetFieldName(daf,1,"y-velocity");
1312: hierarchy[nlevels-1] = daf;
1313: return(0);
1314: }
1318: static PetscErrorCode DMCreateInterpolation_DA_THI(DM dac,DM daf,Mat *A,Vec *scale)
1319: {
1321: PetscInt dim;
1328: DMDAGetInfo(daf,&dim,0,0,0,0,0,0,0,0,0,0,0,0);
1329: if (dim == 2) {
1330: /* We are in the 2D problem and use normal DMDA interpolation */
1331: DMCreateInterpolation(dac,daf,A,scale);
1332: } else {
1333: PetscInt i,j,k,xs,ys,zs,xm,ym,zm,mx,my,mz,rstart,cstart;
1334: Mat B;
1336: DMDAGetInfo(daf,0, &mz,&my,&mx, 0,0,0, 0,0,0,0,0,0);
1337: DMDAGetCorners(daf,&zs,&ys,&xs,&zm,&ym,&xm);
1338: if (zs != 0) SETERRQ(PETSC_COMM_SELF,1,"unexpected");
1339: MatCreate(((PetscObject)daf)->comm,&B);
1340: MatSetSizes(B,xm*ym*zm,xm*ym,mx*my*mz,mx*my);
1341:
1342: MatSetType(B,MATAIJ);
1343: MatSeqAIJSetPreallocation(B,1,NULL);
1344: MatMPIAIJSetPreallocation(B,1,NULL,0,NULL);
1345: MatGetOwnershipRange(B,&rstart,NULL);
1346: MatGetOwnershipRangeColumn(B,&cstart,NULL);
1347: for (i=xs; i<xs+xm; i++) {
1348: for (j=ys; j<ys+ym; j++) {
1349: for (k=zs; k<zs+zm; k++) {
1350: PetscInt i2 = i*ym+j,i3 = i2*zm+k;
1351: PetscScalar val = ((k == 0 || k == mz-1) ? 0.5 : 1.) / (mz-1.); /* Integration using trapezoid rule */
1352: MatSetValue(B,cstart+i3,rstart+i2,val,INSERT_VALUES);
1353: }
1354: }
1355: }
1356: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1357: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1358: MatCreateMAIJ(B,sizeof(Node)/sizeof(PetscScalar),A);
1359: MatDestroy(&B);
1360: }
1361: return(0);
1362: }
1366: static PetscErrorCode DMCreateMatrix_THI_Tridiagonal(DM da,const MatType mtype,Mat *J)
1367: {
1369: Mat A;
1370: PetscInt xm,ym,zm,dim,dof = 2,starts[3],dims[3];
1371: ISLocalToGlobalMapping ltog,ltogb;
1374: DMDAGetInfo(da,&dim, 0,0,0, 0,0,0, 0,0,0,0,0,0);
1375: if (dim != 3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Expected DMDA to be 3D");
1376: DMDAGetCorners(da,0,0,0,&zm,&ym,&xm);
1377: DMGetLocalToGlobalMapping(da,<og);
1378: DMGetLocalToGlobalMappingBlock(da,<ogb);
1379: MatCreate(((PetscObject)da)->comm,&A);
1380: MatSetSizes(A,dof*xm*ym*zm,dof*xm*ym*zm,PETSC_DETERMINE,PETSC_DETERMINE);
1381: MatSetType(A,mtype);
1382: MatSetFromOptions(A);
1383: MatSeqAIJSetPreallocation(A,3*2,PETSC_NULL);
1384: MatMPIAIJSetPreallocation(A,3*2,PETSC_NULL,0,PETSC_NULL);
1385: MatSeqBAIJSetPreallocation(A,2,3,PETSC_NULL);
1386: MatMPIBAIJSetPreallocation(A,2,3,PETSC_NULL,0,PETSC_NULL);
1387: MatSeqSBAIJSetPreallocation(A,2,2,PETSC_NULL);
1388: MatMPISBAIJSetPreallocation(A,2,2,PETSC_NULL,0,PETSC_NULL);
1389: MatSetLocalToGlobalMapping(A,ltog,ltog);
1390: MatSetLocalToGlobalMappingBlock(A,ltogb,ltogb);
1391: DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);
1392: MatSetStencil(A,dim,dims,starts,dof);
1393: *J = A;
1394: return(0);
1395: }
1399: static PetscErrorCode THIDAVecView_VTK_XML(THI thi,DM da,Vec X,const char filename[])
1400: {
1401: const PetscInt dof = 2;
1402: Units units = thi->units;
1403: MPI_Comm comm;
1405: PetscViewer viewer;
1406: PetscMPIInt rank,size,tag,nn,nmax;
1407: PetscInt mx,my,mz,r,range[6];
1408: PetscScalar *x;
1411: comm = ((PetscObject)thi)->comm;
1412: DMDAGetInfo(da,0, &mz,&my,&mx, 0,0,0, 0,0,0,0,0,0);
1413: MPI_Comm_size(comm,&size);
1414: MPI_Comm_rank(comm,&rank);
1415: PetscViewerASCIIOpen(comm,filename,&viewer);
1416: PetscViewerASCIIPrintf(viewer,"<VTKFile type=\"StructuredGrid\" version=\"0.1\" byte_order=\"LittleEndian\">\n");
1417: PetscViewerASCIIPrintf(viewer," <StructuredGrid WholeExtent=\"%d %d %d %d %d %d\">\n",0,mz-1,0,my-1,0,mx-1);
1419: DMDAGetCorners(da,range,range+1,range+2,range+3,range+4,range+5);
1420: nn = PetscMPIIntCast(range[3]*range[4]*range[5]*dof);
1421: MPI_Reduce(&nn,&nmax,1,MPI_INT,MPI_MAX,0,comm);
1422: tag = ((PetscObject) viewer)->tag;
1423: VecGetArray(X,&x);
1424: if (!rank) {
1425: PetscScalar *array;
1426: PetscMalloc(nmax*sizeof(PetscScalar),&array);
1427: for (r=0; r<size; r++) {
1428: PetscInt i,j,k,xs,xm,ys,ym,zs,zm;
1429: PetscScalar *ptr;
1430: MPI_Status status;
1431: if (r) {
1432: MPI_Recv(range,6,MPIU_INT,r,tag,comm,MPI_STATUS_IGNORE);
1433: }
1434: zs = range[0];ys = range[1];xs = range[2];zm = range[3];ym = range[4];xm = range[5];
1435: if (xm*ym*zm*dof > nmax) SETERRQ(PETSC_COMM_SELF,1,"should not happen");
1436: if (r) {
1437: MPI_Recv(array,nmax,MPIU_SCALAR,r,tag,comm,&status);
1438: MPI_Get_count(&status,MPIU_SCALAR,&nn);
1439: if (nn != xm*ym*zm*dof) SETERRQ(PETSC_COMM_SELF,1,"should not happen");
1440: ptr = array;
1441: } else ptr = x;
1442: PetscViewerASCIIPrintf(viewer," <Piece Extent=\"%d %d %d %d %d %d\">\n",zs,zs+zm-1,ys,ys+ym-1,xs,xs+xm-1);
1444: PetscViewerASCIIPrintf(viewer," <Points>\n");
1445: PetscViewerASCIIPrintf(viewer," <DataArray type=\"Float32\" NumberOfComponents=\"3\" format=\"ascii\">\n");
1446: for (i=xs; i<xs+xm; i++) {
1447: for (j=ys; j<ys+ym; j++) {
1448: for (k=zs; k<zs+zm; k++) {
1449: PrmNode p;
1450: PetscReal xx = thi->Lx*i/mx,yy = thi->Ly*j/my,zz;
1451: thi->initialize(thi,xx,yy,&p);
1452: zz = PetscRealPart(p.b) + PetscRealPart(p.h)*k/(mz-1);
1453: PetscViewerASCIIPrintf(viewer,"%f %f %f\n",xx,yy,zz);
1454: }
1455: }
1456: }
1457: PetscViewerASCIIPrintf(viewer," </DataArray>\n");
1458: PetscViewerASCIIPrintf(viewer," </Points>\n");
1460: PetscViewerASCIIPrintf(viewer," <PointData>\n");
1461: PetscViewerASCIIPrintf(viewer," <DataArray type=\"Float32\" Name=\"velocity\" NumberOfComponents=\"3\" format=\"ascii\">\n");
1462: for (i=0; i<nn; i+=dof) {
1463: PetscViewerASCIIPrintf(viewer,"%f %f %f\n",PetscRealPart(ptr[i])*units->year/units->meter,PetscRealPart(ptr[i+1])*units->year/units->meter,0.0);
1464: }
1465: PetscViewerASCIIPrintf(viewer," </DataArray>\n");
1467: PetscViewerASCIIPrintf(viewer," <DataArray type=\"Int32\" Name=\"rank\" NumberOfComponents=\"1\" format=\"ascii\">\n");
1468: for (i=0; i<nn; i+=dof) {
1469: PetscViewerASCIIPrintf(viewer,"%d\n",r);
1470: }
1471: PetscViewerASCIIPrintf(viewer," </DataArray>\n");
1472: PetscViewerASCIIPrintf(viewer," </PointData>\n");
1474: PetscViewerASCIIPrintf(viewer," </Piece>\n");
1475: }
1476: PetscFree(array);
1477: } else {
1478: MPI_Send(range,6,MPIU_INT,0,tag,comm);
1479: MPI_Send(x,nn,MPIU_SCALAR,0,tag,comm);
1480: }
1481: VecRestoreArray(X,&x);
1482: PetscViewerASCIIPrintf(viewer," </StructuredGrid>\n");
1483: PetscViewerASCIIPrintf(viewer,"</VTKFile>\n");
1484: PetscViewerDestroy(&viewer);
1485: return(0);
1486: }
1490: int main(int argc,char *argv[])
1491: {
1492: MPI_Comm comm;
1493: THI thi;
1495: DM da;
1496: SNES snes;
1498: PetscInitialize(&argc,&argv,0,help);
1499: comm = PETSC_COMM_WORLD;
1501: THICreate(comm,&thi);
1502: {
1503: PetscInt M = 3,N = 3,P = 2;
1504: PetscOptionsBegin(comm,NULL,"Grid resolution options","");
1505: {
1506: PetscOptionsInt("-M","Number of elements in x-direction on coarse level","",M,&M,NULL);
1507: N = M;
1508: PetscOptionsInt("-N","Number of elements in y-direction on coarse level (if different from M)","",N,&N,NULL);
1509: if (thi->coarse2d) {
1510: PetscOptionsInt("-zlevels","Number of elements in z-direction on fine level","",thi->zlevels,&thi->zlevels,NULL);
1511: } else {
1512: PetscOptionsInt("-P","Number of elements in z-direction on coarse level","",P,&P,NULL);
1513: }
1514: }
1515: PetscOptionsEnd();
1516: if (thi->coarse2d) {
1517: DMDACreate2d(comm,DMDA_BOUNDARY_PERIODIC,DMDA_BOUNDARY_PERIODIC,DMDA_STENCIL_BOX,-N,-M,PETSC_DETERMINE,PETSC_DETERMINE,sizeof(Node)/sizeof(PetscScalar),1,0,0,&da);
1518: da->ops->refinehierarchy = DMRefineHierarchy_THI;
1519: da->ops->createinterpolation = DMCreateInterpolation_DA_THI;
1520: PetscObjectCompose((PetscObject)da,"THI",(PetscObject)thi);
1521: } else {
1522: DMDACreate3d(comm,DMDA_BOUNDARY_NONE,DMDA_BOUNDARY_PERIODIC,DMDA_BOUNDARY_PERIODIC, DMDA_STENCIL_BOX,-P,-N,-M,1,PETSC_DETERMINE,PETSC_DETERMINE,sizeof(Node)/sizeof(PetscScalar),1,0,0,0,&da);
1523: }
1524: DMDASetFieldName(da,0,"x-velocity");
1525: DMDASetFieldName(da,1,"y-velocity");
1526: }
1527: THISetUpDM(thi,da);
1528: if (thi->tridiagonal) {
1529: da->ops->creatematrix = DMCreateMatrix_THI_Tridiagonal;
1530: }
1531: { /* Set the fine level matrix type if -da_refine */
1532: PetscInt rlevel,clevel;
1533: DMGetRefineLevel(da,&rlevel);
1534: DMGetCoarsenLevel(da,&clevel);
1535: if (rlevel - clevel > 0) {DMSetMatType(da,thi->mattype);}
1536: }
1538: DMDASNESSetFunctionLocal(da,ADD_VALUES,(DMDASNESFunctionLocal)THIFunctionLocal,thi);
1539: if (thi->tridiagonal) {
1540: DMDASNESSetJacobianLocal(da,(DMDASNESJacobianLocal)THIJacobianLocal_3D_Tridiagonal,thi);
1541: } else {
1542: DMDASNESSetJacobianLocal(da,(DMDASNESJacobianLocal)THIJacobianLocal_3D_Full,thi);
1543: }
1544: DMCoarsenHookAdd(da,DMCoarsenHook_THI,PETSC_NULL,thi);
1545: DMRefineHookAdd(da,DMRefineHook_THI,PETSC_NULL,thi);
1547: DMSetApplicationContext(da,thi);
1548: DMSetInitialGuess(da,THIInitial);
1550: SNESCreate(comm,&snes);
1551: SNESSetDM(snes,da);
1552: DMDestroy(&da);
1553: SNESSetFromOptions(snes);
1555: SNESSolve(snes,PETSC_NULL,PETSC_NULL);
1557: THISolveStatistics(thi,snes,0,"Full");
1559: {
1560: PetscBool flg;
1561: char filename[PETSC_MAX_PATH_LEN] = "";
1562: PetscOptionsGetString(PETSC_NULL,"-o",filename,sizeof(filename),&flg);
1563: if (flg) {
1564: Vec X;
1565: DM dm;
1566: SNESGetSolution(snes,&X);
1567: SNESGetDM(snes,&dm);
1568: THIDAVecView_VTK_XML(thi,dm,X,filename);
1569: }
1570: }
1572: DMDestroy(&da);
1573: SNESDestroy(&snes);
1574: THIDestroy(&thi);
1575: PetscFinalize();
1576: return 0;
1577: }