2: static char help[] = "2d Bratu problem in shared memory parallel with SNES.\n\
3: We solve the Bratu (SFI - solid fuel ignition) problem in a 2D rectangular\n\
4: domain, uses SHARED MEMORY to evaluate the user function.\n\
5: The command line options include:\n\
6: -par <parameter>, where <parameter> indicates the problem's nonlinearity\n\
7: problem SFI: <parameter> = Bratu parameter (0 <= par <= 6.81)\n\
8: -mx <xg>, where <xg> = number of grid points in the x-direction\n\
9: -my <yg>, where <yg> = number of grid points in the y-direction\n\
10: -use_fortran_function: use Fortran coded function, rather than C\n";
12: /*
13: This code compiles ONLY on SGI systems
14: ========================================
15: */
16: /*T
17: Concepts: SNES^parallel Bratu example
18: Concepts: shared memory
19: Processors: n
20: T*/
22: /*
24: Programming model: Combination of
25: 1) MPI message passing for PETSc routines
26: 2) automatic loop parallelism (using shared memory) for user
27: provided function.
29: While the user function is being evaluated all MPI processes except process
30: 0 blocks. Process zero spawns nt threads to evaluate the user function. Once
31: the user function is complete, the worker threads are suspended and all the MPI processes
32: continue.
34: Other useful options:
36: -snes_mf : use matrix free operator and no preconditioner
37: -snes_mf_operator : use matrix free operator but compute Jacobian via
38: finite differences to form preconditioner
40: Environmental variable:
42: setenv MPC_NUM_THREADS nt <- set number of threads processor 0 should
43: use to evaluate user provided function
45: Note: The number of MPI processes (set with the mpiexec option -np) can
46: be set completely independently from the number of threads process 0
47: uses to evaluate the function (though usually one would make them the same).
48: */
50: /* ------------------------------------------------------------------------
52: Solid Fuel Ignition (SFI) problem. This problem is modeled by
53: the partial differential equation
55: -Laplacian u - lambda*exp(u) = 0, 0 < x,y < 1,
57: with boundary conditions
59: u = 0 for x = 0, x = 1, y = 0, y = 1.
61: A finite difference approximation with the usual 5-point stencil
62: is used to discretize the boundary value problem to obtain a nonlinear
63: system of equations.
65: The uniprocessor version of this code is snes/examples/tutorials/ex4.c
66: A parallel distributed memory version is snes/examples/tutorials/ex5.c and ex5f.F
68: ------------------------------------------------------------------------- */
70: /*
71: Include "petscsnes.h" so that we can use SNES solvers. Note that this
72: file automatically includes:
73: petscsys.h - base PETSc routines petscvec.h - vectors
74: petscmat.h - matrices
75: petscis.h - index sets petscksp.h - Krylov subspace methods
76: petscviewer.h - viewers petscpc.h - preconditioners
77: petscksp.h - linear solvers
78: */
79: #include <petscsnes.h> 81: /*
82: User-defined application context - contains data needed by the
83: application-provided call-back routines FormFunction().
84: */
85: typedef struct {
86: PetscReal param; /* test problem parameter */
87: int mx,my; /* discretization in x, y directions */
88: int rank; /* processor rank */
89: } AppCtx;
91: /*
92: User-defined routines
93: */
94: extern int FormFunction(SNES,Vec,Vec,void*),FormInitialGuess(AppCtx*,Vec);
95: extern int FormFunctionFortran(SNES,Vec,Vec,void*);
97: /*
98: The main program is written in C while the user provided function
99: is given in both Fortran and C. The main program could also be written
100: in Fortran; the ONE PROBLEM is that VecGetArray() cannot be called from
101: Fortran on the SGI machines; thus the routine FormFunctionFortran() must
102: be written in C.
103: */
104: int main(int argc,char **argv)105: {
106: SNES snes; /* nonlinear solver */
107: Vec x,r; /* solution, residual vectors */
108: AppCtx user; /* user-defined work context */
109: int its; /* iterations for convergence */
110: int N,ierr,rstart,rend,*colors,i,ii,ri,rj;
111: PetscErrorCode (*fnc)(SNES,Vec,Vec,void*);
112: PetscReal bratu_lambda_max = 6.81,bratu_lambda_min = 0.;
113: MatFDColoring fdcoloring;
114: ISColoring iscoloring;
115: Mat J;
116: PetscScalar zero = 0.0;
117: PetscBool flg;
119: PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr;
120: MPI_Comm_rank(PETSC_COMM_WORLD,&user.rank);
122: /*
123: Initialize problem parameters
124: */
125: user.mx = 4; user.my = 4; user.param = 6.0;
126: PetscOptionsGetInt(NULL,NULL,"-mx",&user.mx,NULL);
127: PetscOptionsGetInt(NULL,NULL,"-my",&user.my,NULL);
128: PetscOptionsGetReal(NULL,NULL,"-par",&user.param,NULL);
129: if (user.param >= bratu_lambda_max || user.param <= bratu_lambda_min) SETERRQ(PETSC_COMM_SELF,1,"Lambda is out of range");
130: N = user.mx*user.my;
132: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
133: Create nonlinear solver context
134: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
136: SNESCreate(PETSC_COMM_WORLD,&snes);
138: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
139: Create vector data structures; set function evaluation routine
140: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
142: /*
143: The routine VecCreateShared() creates a parallel vector with each processor
144: assigned its own segment, BUT, in addition, the first processor has access to the
145: entire array. This is to allow the users function to be based on loop level
146: parallelism rather than MPI.
147: */
148: VecCreateShared(PETSC_COMM_WORLD,PETSC_DECIDE,N,&x);
149: VecDuplicate(x,&r);
151: PetscOptionsHasName(NULL,NULL,"-use_fortran_function",&flg);
152: if (flg) fnc = FormFunctionFortran;
153: else fnc = FormFunction;
155: /*
156: Set function evaluation routine and vector
157: */
158: SNESSetFunction(snes,r,fnc,&user);
160: /*
161: Currently when using VecCreateShared() and using loop level parallelism
162: to automatically parallelise the user function it makes no sense for the
163: Jacobian to be computed via loop level parallelism, because all the threads
164: would be simultaneously calling MatSetValues() causing a bottle-neck.
166: Thus this example uses the PETSc Jacobian calculations via finite differencing
167: to approximate the Jacobian
168: */
170: /*
172: */
173: VecGetOwnershipRange(r,&rstart,&rend);
174: PetscMalloc1(rend-rstart,&colors);
175: for (i=rstart; i<rend; i++) colors[i - rstart] = 3*((i/user.mx) % 3) + (i % 3);
177: ISColoringCreate(PETSC_COMM_WORLD,3*2+2,rend-rstart,colors,&iscoloring);
178: PetscFree(colors);
180: /*
181: Create and set the nonzero pattern for the Jacobian: This is not done
182: particularly efficiently. One should process the boundary nodes separately and
183: then use a simple loop for the interior nodes.
184: Note that for this code we use the "natural" number of the nodes on the
185: grid (since that is what is good for the user provided function). In the
186: DMDA examples we must use the DMDA numbering where each processor is assigned a
187: chunk of data.
188: */
189: MatCreateAIJ(PETSC_COMM_WORLD,rend-rstart,rend-rstart,N,N,5,0,0,0,&J);
190: for (i=rstart; i<rend; i++) {
191: rj = i % user.mx; /* column in grid */
192: ri = i / user.mx; /* row in grid */
193: if (ri != 0) { /* first row does not have neighbor below */
194: ii = i - user.mx;
195: MatSetValues(J,1,&i,1,&ii,&zero,INSERT_VALUES);
196: }
197: if (ri != user.my - 1) { /* last row does not have neighbors above */
198: ii = i + user.mx;
199: MatSetValues(J,1,&i,1,&ii,&zero,INSERT_VALUES);
200: }
201: if (rj != 0) { /* first column does not have neighbor to left */
202: ii = i - 1;
203: MatSetValues(J,1,&i,1,&ii,&zero,INSERT_VALUES);
204: }
205: if (rj != user.mx - 1) { /* last column does not have neighbor to right */
206: ii = i + 1;
207: MatSetValues(J,1,&i,1,&ii,&zero,INSERT_VALUES);
208: }
209: MatSetValues(J,1,&i,1,&i,&zero,INSERT_VALUES);
210: }
211: MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
212: MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
214: /*
215: Create the data structure that SNESComputeJacobianDefaultColor() uses
216: to compute the actual Jacobians via finite differences.
217: */
218: MatFDColoringCreate(J,iscoloring,&fdcoloring);
219: MatFDColoringSetFunction(fdcoloring,(PetscErrorCode (*)(void))fnc,&user);
220: MatFDColoringSetFromOptions(fdcoloring);
221: MatFDColoringSetUp(J,iscoloring,fdcoloring);
222: /*
223: Tell SNES to use the routine SNESComputeJacobianDefaultColor()
224: to compute Jacobians.
225: */
226: SNESSetJacobian(snes,J,J,SNESComputeJacobianDefaultColor,fdcoloring);
227: ISColoringDestroy(&iscoloring);
230: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
231: Customize nonlinear solver; set runtime options
232: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
234: /*
235: Set runtime options (e.g., -snes_monitor -snes_rtol <rtol> -ksp_type <type>)
236: */
237: SNESSetFromOptions(snes);
239: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
240: Evaluate initial guess; then solve nonlinear system
241: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
242: /*
243: Note: The user should initialize the vector, x, with the initial guess
244: for the nonlinear solver prior to calling SNESSolve(). In particular,
245: to employ an initial guess of zero, the user should explicitly set
246: this vector to zero by calling VecSet().
247: */
248: FormInitialGuess(&user,x);
249: SNESSolve(snes,NULL,x);
250: SNESGetIterationNumber(snes,&its);
251: PetscPrintf(PETSC_COMM_WORLD,"Number of SNES iterations = %D\n",its);
253: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
254: Free work space. All PETSc objects should be destroyed when they
255: are no longer needed.
256: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
257: VecDestroy(&x);
258: VecDestroy(&r);
259: SNESDestroy(&snes);
260: PetscFinalize();
262: return 0;
263: }
264: /* ------------------------------------------------------------------- */
266: /*
267: FormInitialGuess - Forms initial approximation.
269: Input Parameters:
270: user - user-defined application context
271: X - vector
273: Output Parameter:
274: X - vector
275: */
276: int FormInitialGuess(AppCtx *user,Vec X)277: {
278: int i,j,row,mx,my,ierr;
279: PetscReal one = 1.0,lambda,temp1,temp,hx,hy,hxdhy,hydhx,sc;
280: PetscScalar *x;
282: /*
283: Process 0 has to wait for all other processes to get here
284: before proceeding to write in the shared vector
285: */
286: PetscBarrier((PetscObject)X);
287: if (user->rank) {
288: /*
289: All the non-busy processors have to wait here for process 0 to finish
290: evaluating the function; otherwise they will start using the vector values
291: before they have been computed
292: */
293: PetscBarrier((PetscObject)X);
294: return 0;
295: }
297: mx = user->mx; my = user->my; lambda = user->param;
298: hx = one/(PetscReal)(mx-1); hy = one/(PetscReal)(my-1);
299: sc = hx*hy*lambda; hxdhy = hx/hy; hydhx = hy/hx;
301: temp1 = lambda/(lambda + one);
303: /*
304: Get a pointer to vector data.
305: - For default PETSc vectors, VecGetArray() returns a pointer to
306: the data array. Otherwise, the routine is implementation dependent.
307: - You MUST call VecRestoreArray() when you no longer need access to
308: the array.
309: */
310: VecGetArray(X,&x);
312: /*
313: Compute initial guess over the locally owned part of the grid
314: */
315: #pragma arl(4)
316: #pragma distinct (*x,*f)
317: #pragma no side effects (sqrt)
318: for (j=0; j<my; j++) {
319: temp = (PetscReal)(PetscMin(j,my-j-1))*hy;
320: for (i=0; i<mx; i++) {
321: row = i + j*mx;
322: if (i == 0 || j == 0 || i == mx-1 || j == my-1) {
323: x[row] = 0.0;
324: continue;
325: }
326: x[row] = temp1*PetscSqrtReal(PetscMin((PetscReal)(PetscMin(i,mx-i-1))*hx,temp));
327: }
328: }
330: /*
331: Restore vector
332: */
333: VecRestoreArray(X,&x);
335: PetscBarrier((PetscObject)X);
336: return 0;
337: }
338: /* ------------------------------------------------------------------- */
339: /*
340: FormFunction - Evaluates nonlinear function, F(x).
342: Input Parameters:
343: . snes - the SNES context
344: . X - input vector
345: . ptr - optional user-defined context, as set by SNESSetFunction()
347: Output Parameter:
348: . F - function vector
349: */
350: int FormFunction(SNES snes,Vec X,Vec F,void *ptr)351: {
352: AppCtx *user = (AppCtx*)ptr;
353: int ierr,i,j,row,mx,my;
354: PetscReal two = 2.0,one = 1.0,lambda,hx,hy,hxdhy,hydhx,sc;
355: PetscScalar u,uxx,uyy,*f;
356: const PetscScalar *x;
358: /*
359: Process 0 has to wait for all other processes to get here
360: before proceeding to write in the shared vector
361: */
362: PetscBarrier((PetscObject)X);
364: if (user->rank) {
365: /*
366: All the non-busy processors have to wait here for process 0 to finish
367: evaluating the function; otherwise they will start using the vector values
368: before they have been computed
369: */
370: PetscBarrier((PetscObject)X);
371: return 0;
372: }
374: mx = user->mx; my = user->my; lambda = user->param;
375: hx = one/(PetscReal)(mx-1); hy = one/(PetscReal)(my-1);
376: sc = hx*hy*lambda; hxdhy = hx/hy; hydhx = hy/hx;
378: /*
379: Get pointers to vector data
380: */
381: VecGetArrayRead(X,&x);
382: VecGetArray(F,&f);
384: /*
385: The next line tells the SGI compiler that x and f contain no overlapping
386: regions and thus it can use addition optimizations.
387: */
388: #pragma arl(4)
389: #pragma distinct (*x,*f)
390: #pragma no side effects (exp)
392: /*
393: Compute function over the entire grid
394: */
395: for (j=0; j<my; j++) {
396: for (i=0; i<mx; i++) {
397: row = i + j*mx;
398: if (i == 0 || j == 0 || i == mx-1 || j == my-1) {
399: f[row] = x[row];
400: continue;
401: }
402: u = x[row];
403: uxx = (two*u - x[row-1] - x[row+1])*hydhx;
404: uyy = (two*u - x[row-mx] - x[row+mx])*hxdhy;
405: f[row] = uxx + uyy - sc*PetscExpScalar(u);
406: }
407: }
409: /*
410: Restore vectors
411: */
412: VecRestoreArrayRead(X,&x);
413: VecRestoreArray(F,&f);
415: PetscLogFlops(11.0*(mx-2)*(my-2))
416: PetscBarrier((PetscObject)X);
417: return 0;
418: }
420: #if defined(PETSC_HAVE_FORTRAN_CAPS)
421: #define applicationfunctionfortran_ APPLICATIONFUNCTIONFORTRAN422: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
423: #define applicationfunctionfortran_ applicationfunctionfortran424: #endif
426: /* ------------------------------------------------------------------- */
427: /*
428: FormFunctionFortran - Evaluates nonlinear function, F(x) in Fortran.
430: */
431: int FormFunctionFortran(SNES snes,Vec X,Vec F,void *ptr)432: {
433: AppCtx *user = (AppCtx*)ptr;
434: int ierr;
435: PetscScalar *f;
436: const PetscScalar *x;
438: /*
439: Process 0 has to wait for all other processes to get here
440: before proceeding to write in the shared vector
441: */
442: PetscBarrier((PetscObject)snes);
443: if (!user->rank) {
444: VecGetArrayRead(X,&x);
445: VecGetArray(F,&f);
446: applicationfunctionfortran_(&user->param,&user->mx,&user->my,x,f,&ierr);
447: VecRestoreArrayRead(X,&x);
448: VecRestoreArray(F,&f);
449: PetscLogFlops(11.0*(user->mx-2)*(user->my-2))
450: }
451: /*
452: All the non-busy processors have to wait here for process 0 to finish
453: evaluating the function; otherwise they will start using the vector values
454: before they have been computed
455: */
456: PetscBarrier((PetscObject)snes);
457: return 0;
458: }