2: static char help[] = "2d Bratu problem in shared memory parallel with SNES.\n\
3: We solve the Bratu (SFI - solid fuel ignition) problem in a 2D rectangular\n\
4: domain, uses SHARED MEMORY to evaluate the user function.\n\
5: The command line options include:\n\
6: -par <parameter>, where <parameter> indicates the problem's nonlinearity\n\
7: problem SFI: <parameter> = Bratu parameter (0 <= par <= 6.81)\n\
8: -mx <xg>, where <xg> = number of grid points in the x-direction\n\
9: -my <yg>, where <yg> = number of grid points in the y-direction\n\
10: -use_fortran_function: use Fortran coded function, rather than C\n";
12: /*
13: This code compiles ONLY on SGI systems
14: ========================================
15: */
16: /*T
17: Concepts: SNES^parallel Bratu example
18: Concepts: shared memory
19: Processors: n
20: T*/
22: /*
24: Programming model: Combination of
25: 1) MPI message passing for PETSc routines
26: 2) automatic loop parallism (using shared memory) for user
27: provided function.
29: While the user function is being evaluated all MPI processes except process
30: 0 blocks. Process zero spawns nt threads to evaluate the user function. Once
31: the user function is complete, the worker threads are suspended and all the MPI processes
32: continue.
34: Other useful options:
36: -snes_mf : use matrix free operator and no preconditioner
37: -snes_mf_operator : use matrix free operator but compute Jacobian via
38: finite differences to form preconditioner
40: Environmental variable:
42: setenv MPC_NUM_THREADS nt <- set number of threads processor 0 should
43: use to evaluate user provided function
45: Note: The number of MPI processes (set with the mpiexec option -np) can
46: be set completely independently from the number of threads process 0
47: uses to evaluate the function (though usually one would make them the same).
48: */
49: 50: /* ------------------------------------------------------------------------
52: Solid Fuel Ignition (SFI) problem. This problem is modeled by
53: the partial differential equation
54: 55: -Laplacian u - lambda*exp(u) = 0, 0 < x,y < 1,
56: 57: with boundary conditions
58: 59: u = 0 for x = 0, x = 1, y = 0, y = 1.
60: 61: A finite difference approximation with the usual 5-point stencil
62: is used to discretize the boundary value problem to obtain a nonlinear
63: system of equations.
65: The uniprocessor version of this code is snes/examples/tutorials/ex4.c
66: A parallel distributed memory version is snes/examples/tutorials/ex5.c and ex5f.F
68: ------------------------------------------------------------------------- */
70: /*
71: Include "petscsnes.h" so that we can use SNES solvers. Note that this
72: file automatically includes:
73: petscsys.h - base PETSc routines petscvec.h - vectors
74: petscmat.h - matrices
75: petscis.h - index sets petscksp.h - Krylov subspace methods
76: petscviewer.h - viewers petscpc.h - preconditioners
77: petscksp.h - linear solvers
78: */
79: #include <petscsnes.h>
81: /*
82: User-defined application context - contains data needed by the
83: application-provided call-back routines FormFunction().
84: */
85: typedef struct {
86: PetscReal param; /* test problem parameter */
87: int mx,my; /* discretization in x, y directions */
88: int rank; /* processor rank */
89: } AppCtx;
91: /*
92: User-defined routines
93: */
94: extern int FormFunction(SNES,Vec,Vec,void*),FormInitialGuess(AppCtx*,Vec);
95: extern int FormFunctionFortran(SNES,Vec,Vec,void*);
99: /*
100: The main program is written in C while the user provided function
101: is given in both Fortran and C. The main program could also be written
102: in Fortran; the ONE PROBLEM is that VecGetArray() cannot be called from
103: Fortran on the SGI machines; thus the routine FormFunctionFortran() must
104: be written in C.
105: */
106: int main(int argc,char **argv)107: {
108: SNES snes; /* nonlinear solver */
109: Vec x,r; /* solution, residual vectors */
110: AppCtx user; /* user-defined work context */
111: int its; /* iterations for convergence */
112: int N,ierr,rstart,rend,*colors,i,ii,ri,rj;
113: PetscErrorCode (*fnc)(SNES,Vec,Vec,void*);
114: PetscReal bratu_lambda_max = 6.81,bratu_lambda_min = 0.;
115: MatFDColoring fdcoloring;
116: ISColoring iscoloring;
117: Mat J;
118: PetscScalar zero = 0.0;
119: PetscBool flg;
121: PetscInitialize(&argc,&argv,(char *)0,help);
122: MPI_Comm_rank(PETSC_COMM_WORLD,&user.rank);
124: /*
125: Initialize problem parameters
126: */
127: user.mx = 4; user.my = 4; user.param = 6.0;
128: PetscOptionsGetInt(PETSC_NULL,"-mx",&user.mx,PETSC_NULL);
129: PetscOptionsGetInt(PETSC_NULL,"-my",&user.my,PETSC_NULL);
130: PetscOptionsGetReal(PETSC_NULL,"-par",&user.param,PETSC_NULL);
131: if (user.param >= bratu_lambda_max || user.param <= bratu_lambda_min) SETERRQ(PETSC_COMM_SELF,1,"Lambda is out of range");
132: N = user.mx*user.my;
134: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
135: Create nonlinear solver context
136: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
138: SNESCreate(PETSC_COMM_WORLD,&snes);
140: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
141: Create vector data structures; set function evaluation routine
142: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
144: /*
145: The routine VecCreateShared() creates a parallel vector with each processor
146: assigned its own segment, BUT, in addition, the first processor has access to the
147: entire array. This is to allow the users function to be based on loop level
148: parallelism rather than MPI.
149: */
150: VecCreateShared(PETSC_COMM_WORLD,PETSC_DECIDE,N,&x);
151: VecDuplicate(x,&r);
153: PetscOptionsHasName(PETSC_NULL,"-use_fortran_function",&flg);
154: if (flg) {
155: fnc = FormFunctionFortran;
156: } else {
157: fnc = FormFunction;
158: }
160: /*
161: Set function evaluation routine and vector
162: */
163: SNESSetFunction(snes,r,fnc,&user);
165: /*
166: Currently when using VecCreateShared() and using loop level parallelism
167: to automatically parallelise the user function it makes no sense for the
168: Jacobian to be computed via loop level parallelism, because all the threads
169: would be simultaneously calling MatSetValues() causing a bottle-neck.
171: Thus this example uses the PETSc Jacobian calculations via finite differencing
172: to approximate the Jacobian
173: */
175: /*
177: */
178: VecGetOwnershipRange(r,&rstart,&rend);
179: PetscMalloc((rend-rstart)*sizeof(PetscInt),&colors);
180: for (i=rstart; i<rend; i++) {
181: colors[i - rstart] = 3*((i/user.mx) % 3) + (i % 3);
182: }
183: ISColoringCreate(PETSC_COMM_WORLD,3*2+2,rend-rstart,colors,&iscoloring);
184: PetscFree(colors);
186: /*
187: Create and set the nonzero pattern for the Jacobian: This is not done
188: particularly efficiently. One should process the boundary nodes separately and
189: then use a simple loop for the interior nodes.
190: Note that for this code we use the "natural" number of the nodes on the
191: grid (since that is what is good for the user provided function). In the
192: DMDA examples we must use the DMDA numbering where each processor is assigned a
193: chunk of data.
194: */
195: MatCreateAIJ(PETSC_COMM_WORLD,rend-rstart,rend-rstart,N,N,5,0,0,0,&J);
196: for (i=rstart; i<rend; i++) {
197: rj = i % user.mx; /* column in grid */
198: ri = i / user.mx; /* row in grid */
199: if (ri != 0) { /* first row does not have neighbor below */
200: ii = i - user.mx;
201: MatSetValues(J,1,&i,1,&ii,&zero,INSERT_VALUES);
202: }
203: if (ri != user.my - 1) { /* last row does not have neighbors above */
204: ii = i + user.mx;
205: MatSetValues(J,1,&i,1,&ii,&zero,INSERT_VALUES);
206: }
207: if (rj != 0) { /* first column does not have neighbor to left */
208: ii = i - 1;
209: MatSetValues(J,1,&i,1,&ii,&zero,INSERT_VALUES);
210: }
211: if (rj != user.mx - 1) { /* last column does not have neighbor to right */
212: ii = i + 1;
213: MatSetValues(J,1,&i,1,&ii,&zero,INSERT_VALUES);
214: }
215: MatSetValues(J,1,&i,1,&i,&zero,INSERT_VALUES);
216: }
217: MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
218: MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
220: /*
221: Create the data structure that SNESDefaultComputeJacobianColor() uses
222: to compute the actual Jacobians via finite differences.
223: */
224: MatFDColoringCreate(J,iscoloring,&fdcoloring);
225: MatFDColoringSetFunction(fdcoloring,(PetscErrorCode (*)(void))fnc,&user);
226: MatFDColoringSetFromOptions(fdcoloring);
227: /*
228: Tell SNES to use the routine SNESDefaultComputeJacobianColor()
229: to compute Jacobians.
230: */
231: SNESSetJacobian(snes,J,J,SNESDefaultComputeJacobianColor,fdcoloring);
232: ISColoringDestroy(&iscoloring);
235: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
236: Customize nonlinear solver; set runtime options
237: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
239: /*
240: Set runtime options (e.g., -snes_monitor -snes_rtol <rtol> -ksp_type <type>)
241: */
242: SNESSetFromOptions(snes);
244: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
245: Evaluate initial guess; then solve nonlinear system
246: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
247: /*
248: Note: The user should initialize the vector, x, with the initial guess
249: for the nonlinear solver prior to calling SNESSolve(). In particular,
250: to employ an initial guess of zero, the user should explicitly set
251: this vector to zero by calling VecSet().
252: */
253: FormInitialGuess(&user,x);
254: SNESSolve(snes,PETSC_NULL,x);
255: SNESGetIterationNumber(snes,&its);
256: PetscPrintf(PETSC_COMM_WORLD,"Number of SNES iterations = %D\n",its);
258: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
259: Free work space. All PETSc objects should be destroyed when they
260: are no longer needed.
261: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
262: VecDestroy(&x);
263: VecDestroy(&r);
264: SNESDestroy(&snes);
265: PetscFinalize();
267: return 0;
268: }
269: /* ------------------------------------------------------------------- */
273: /*
274: FormInitialGuess - Forms initial approximation.
276: Input Parameters:
277: user - user-defined application context
278: X - vector
280: Output Parameter:
281: X - vector
282: */
283: int FormInitialGuess(AppCtx *user,Vec X)284: {
285: int i,j,row,mx,my,ierr;
286: PetscReal one = 1.0,lambda,temp1,temp,hx,hy,hxdhy,hydhx,sc;
287: PetscScalar *x;
289: /*
290: Process 0 has to wait for all other processes to get here
291: before proceeding to write in the shared vector
292: */
293: PetscBarrier((PetscObject)X);
294: if (user->rank) {
295: /*
296: All the non-busy processors have to wait here for process 0 to finish
297: evaluating the function; otherwise they will start using the vector values
298: before they have been computed
299: */
300: PetscBarrier((PetscObject)X);
301: return 0;
302: }
304: mx = user->mx; my = user->my; lambda = user->param;
305: hx = one/(PetscReal)(mx-1); hy = one/(PetscReal)(my-1);
306: sc = hx*hy*lambda; hxdhy = hx/hy; hydhx = hy/hx;
307: temp1 = lambda/(lambda + one);
309: /*
310: Get a pointer to vector data.
311: - For default PETSc vectors, VecGetArray() returns a pointer to
312: the data array. Otherwise, the routine is implementation dependent.
313: - You MUST call VecRestoreArray() when you no longer need access to
314: the array.
315: */
316: VecGetArray(X,&x);
318: /*
319: Compute initial guess over the locally owned part of the grid
320: */
321: #pragma arl(4)
322: #pragma distinct (*x,*f)
323: #pragma no side effects (sqrt)
324: for (j=0; j<my; j++) {
325: temp = (PetscReal)(PetscMin(j,my-j-1))*hy;
326: for (i=0; i<mx; i++) {
327: row = i + j*mx;
328: if (i == 0 || j == 0 || i == mx-1 || j == my-1) {
329: x[row] = 0.0;
330: continue;
331: }
332: x[row] = temp1*sqrt(PetscMin((PetscReal)(PetscMin(i,mx-i-1))*hx,temp));
333: }
334: }
336: /*
337: Restore vector
338: */
339: VecRestoreArray(X,&x);
341: PetscBarrier((PetscObject)X);
342: return 0;
343: }
344: /* ------------------------------------------------------------------- */
347: /*
348: FormFunction - Evaluates nonlinear function, F(x).
350: Input Parameters:
351: . snes - the SNES context
352: . X - input vector
353: . ptr - optional user-defined context, as set by SNESSetFunction()
355: Output Parameter:
356: . F - function vector
357: */
358: int FormFunction(SNES snes,Vec X,Vec F,void *ptr)359: {
360: AppCtx *user = (AppCtx*)ptr;
361: int ierr,i,j,row,mx,my;
362: PetscReal two = 2.0,one = 1.0,lambda,hx,hy,hxdhy,hydhx,sc;
363: PetscScalar u,uxx,uyy,*x,*f;
365: /*
366: Process 0 has to wait for all other processes to get here
367: before proceeding to write in the shared vector
368: */
369: PetscBarrier((PetscObject)X);
371: if (user->rank) {
372: /*
373: All the non-busy processors have to wait here for process 0 to finish
374: evaluating the function; otherwise they will start using the vector values
375: before they have been computed
376: */
377: PetscBarrier((PetscObject)X);
378: return 0;
379: }
381: mx = user->mx; my = user->my; lambda = user->param;
382: hx = one/(PetscReal)(mx-1); hy = one/(PetscReal)(my-1);
383: sc = hx*hy*lambda; hxdhy = hx/hy; hydhx = hy/hx;
385: /*
386: Get pointers to vector data
387: */
388: VecGetArray(X,&x);
389: VecGetArray(F,&f);
391: /*
392: The next line tells the SGI compiler that x and f contain no overlapping
393: regions and thus it can use addition optimizations.
394: */
395: #pragma arl(4)
396: #pragma distinct (*x,*f)
397: #pragma no side effects (exp)
399: /*
400: Compute function over the entire grid
401: */
402: for (j=0; j<my; j++) {
403: for (i=0; i<mx; i++) {
404: row = i + j*mx;
405: if (i == 0 || j == 0 || i == mx-1 || j == my-1) {
406: f[row] = x[row];
407: continue;
408: }
409: u = x[row];
410: uxx = (two*u - x[row-1] - x[row+1])*hydhx;
411: uyy = (two*u - x[row-mx] - x[row+mx])*hxdhy;
412: f[row] = uxx + uyy - sc*exp(u);
413: }
414: }
416: /*
417: Restore vectors
418: */
419: VecRestoreArray(X,&x);
420: VecRestoreArray(F,&f);
422: PetscLogFlops(11.0*(mx-2)*(my-2))
423: PetscBarrier((PetscObject)X);
424: return 0;
425: }
427: #if defined(PETSC_HAVE_FORTRAN_CAPS)
428: #define applicationfunctionfortran_ APPLICATIONFUNCTIONFORTRAN429: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
430: #define applicationfunctionfortran_ applicationfunctionfortran431: #endif
433: /* ------------------------------------------------------------------- */
436: /*
437: FormFunctionFortran - Evaluates nonlinear function, F(x) in Fortran.
439: */
440: int FormFunctionFortran(SNES snes,Vec X,Vec F,void *ptr)441: {
442: AppCtx *user = (AppCtx*)ptr;
443: int ierr;
444: PetscScalar *x,*f;
446: /*
447: Process 0 has to wait for all other processes to get here
448: before proceeding to write in the shared vector
449: */
450: PetscBarrier((PetscObject)snes);
451: if (!user->rank) {
452: VecGetArray(X,&x);
453: VecGetArray(F,&f);
454: applicationfunctionfortran_(&user->param,&user->mx,&user->my,x,f,&ierr);
455: VecRestoreArray(X,&x);
456: VecRestoreArray(F,&f);
457: PetscLogFlops(11.0*(user->mx-2)*(user->my-2))
458: }
459: /*
460: All the non-busy processors have to wait here for process 0 to finish
461: evaluating the function; otherwise they will start using the vector values
462: before they have been computed
463: */
464: PetscBarrier((PetscObject)snes);
465: return 0;
466: }