Actual source code: ex5s.c

petsc-3.3-p7 2013-05-11
  2: static char help[] = "2d Bratu problem in shared memory parallel with SNES.\n\
  3: We solve the  Bratu (SFI - solid fuel ignition) problem in a 2D rectangular\n\
  4: domain, uses SHARED MEMORY to evaluate the user function.\n\
  5: The command line options include:\n\
  6:   -par <parameter>, where <parameter> indicates the problem's nonlinearity\n\
  7:      problem SFI:  <parameter> = Bratu parameter (0 <= par <= 6.81)\n\
  8:   -mx <xg>, where <xg> = number of grid points in the x-direction\n\
  9:   -my <yg>, where <yg> = number of grid points in the y-direction\n\
 10:   -use_fortran_function: use Fortran coded function, rather than C\n";

 12: /*
 13:              This code compiles ONLY on SGI systems
 14:             ========================================
 15: */
 16: /*T
 17:    Concepts: SNES^parallel Bratu example
 18:    Concepts: shared memory
 19:    Processors: n
 20: T*/

 22: /*

 24:      Programming model: Combination of 
 25:         1) MPI message passing for PETSc routines
 26:         2) automatic loop parallism (using shared memory) for user
 27:            provided function.

 29:        While the user function is being evaluated all MPI processes except process
 30:      0 blocks. Process zero spawns nt threads to evaluate the user function. Once 
 31:      the user function is complete, the worker threads are suspended and all the MPI processes
 32:      continue.

 34:      Other useful options:

 36:        -snes_mf : use matrix free operator and no preconditioner
 37:        -snes_mf_operator : use matrix free operator but compute Jacobian via
 38:                            finite differences to form preconditioner

 40:        Environmental variable:

 42:          setenv MPC_NUM_THREADS nt <- set number of threads processor 0 should 
 43:                                       use to evaluate user provided function

 45:        Note: The number of MPI processes (set with the mpiexec option -np) can 
 46:        be set completely independently from the number of threads process 0 
 47:        uses to evaluate the function (though usually one would make them the same).
 48: */
 49: 
 50: /* ------------------------------------------------------------------------

 52:     Solid Fuel Ignition (SFI) problem.  This problem is modeled by
 53:     the partial differential equation
 54:   
 55:             -Laplacian u - lambda*exp(u) = 0,  0 < x,y < 1,
 56:   
 57:     with boundary conditions
 58:    
 59:              u = 0  for  x = 0, x = 1, y = 0, y = 1.
 60:   
 61:     A finite difference approximation with the usual 5-point stencil
 62:     is used to discretize the boundary value problem to obtain a nonlinear 
 63:     system of equations.

 65:     The uniprocessor version of this code is snes/examples/tutorials/ex4.c
 66:     A parallel distributed memory version is snes/examples/tutorials/ex5.c and ex5f.F

 68:   ------------------------------------------------------------------------- */

 70: /* 
 71:    Include "petscsnes.h" so that we can use SNES solvers.  Note that this
 72:    file automatically includes:
 73:      petscsys.h       - base PETSc routines   petscvec.h - vectors
 74:      petscmat.h - matrices
 75:      petscis.h     - index sets            petscksp.h - Krylov subspace methods
 76:      petscviewer.h - viewers               petscpc.h  - preconditioners
 77:      petscksp.h   - linear solvers
 78: */
 79: #include <petscsnes.h>

 81: /* 
 82:    User-defined application context - contains data needed by the 
 83:    application-provided call-back routines   FormFunction().
 84: */
 85: typedef struct {
 86:    PetscReal   param;          /* test problem parameter */
 87:    int         mx,my;          /* discretization in x, y directions */
 88:    int         rank;           /* processor rank */
 89: } AppCtx;

 91: /* 
 92:    User-defined routines
 93: */
 94: extern int FormFunction(SNES,Vec,Vec,void*),FormInitialGuess(AppCtx*,Vec);
 95: extern int FormFunctionFortran(SNES,Vec,Vec,void*);

 99: /* 
100:     The main program is written in C while the user provided function
101:  is given in both Fortran and C. The main program could also be written 
102:  in Fortran; the ONE PROBLEM is that VecGetArray() cannot be called from 
103:  Fortran on the SGI machines; thus the routine FormFunctionFortran() must
104:  be written in C.
105: */
106: int main(int argc,char **argv)
107: {
108:   SNES           snes;                /* nonlinear solver */
109:   Vec            x,r;                 /* solution, residual vectors */
110:   AppCtx         user;                /* user-defined work context */
111:   int            its;                 /* iterations for convergence */
112:   int            N,ierr,rstart,rend,*colors,i,ii,ri,rj;
113:   PetscErrorCode (*fnc)(SNES,Vec,Vec,void*);
114:   PetscReal      bratu_lambda_max = 6.81,bratu_lambda_min = 0.;
115:   MatFDColoring  fdcoloring;
116:   ISColoring     iscoloring;
117:   Mat            J;
118:   PetscScalar    zero = 0.0;
119:   PetscBool      flg;

121:   PetscInitialize(&argc,&argv,(char *)0,help);
122:   MPI_Comm_rank(PETSC_COMM_WORLD,&user.rank);

124:   /*
125:      Initialize problem parameters
126:   */
127:   user.mx = 4; user.my = 4; user.param = 6.0;
128:   PetscOptionsGetInt(PETSC_NULL,"-mx",&user.mx,PETSC_NULL);
129:   PetscOptionsGetInt(PETSC_NULL,"-my",&user.my,PETSC_NULL);
130:   PetscOptionsGetReal(PETSC_NULL,"-par",&user.param,PETSC_NULL);
131:   if (user.param >= bratu_lambda_max || user.param <= bratu_lambda_min) SETERRQ(PETSC_COMM_SELF,1,"Lambda is out of range");
132:   N = user.mx*user.my;

134:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
135:      Create nonlinear solver context
136:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

138:   SNESCreate(PETSC_COMM_WORLD,&snes);

140:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
141:      Create vector data structures; set function evaluation routine
142:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

144:   /*
145:       The routine VecCreateShared() creates a parallel vector with each processor
146:     assigned its own segment, BUT, in addition, the first processor has access to the 
147:     entire array. This is to allow the users function to be based on loop level
148:     parallelism rather than MPI.
149:   */
150:   VecCreateShared(PETSC_COMM_WORLD,PETSC_DECIDE,N,&x);
151:   VecDuplicate(x,&r);

153:   PetscOptionsHasName(PETSC_NULL,"-use_fortran_function",&flg);
154:   if (flg) {
155:     fnc = FormFunctionFortran;
156:   } else {
157:     fnc = FormFunction;
158:   }

160:   /* 
161:      Set function evaluation routine and vector
162:   */
163:   SNESSetFunction(snes,r,fnc,&user);

165:   /*
166:        Currently when using VecCreateShared() and using loop level parallelism
167:     to automatically parallelise the user function it makes no sense for the 
168:     Jacobian to be computed via loop level parallelism, because all the threads
169:     would be simultaneously calling MatSetValues() causing a bottle-neck.

171:     Thus this example uses the PETSc Jacobian calculations via finite differencing
172:     to approximate the Jacobian
173:   */

175:   /*

177:   */
178:   VecGetOwnershipRange(r,&rstart,&rend);
179:   PetscMalloc((rend-rstart)*sizeof(PetscInt),&colors);
180:   for (i=rstart; i<rend; i++) {
181:     colors[i - rstart] = 3*((i/user.mx) % 3) + (i % 3);
182:   }
183:   ISColoringCreate(PETSC_COMM_WORLD,3*2+2,rend-rstart,colors,&iscoloring);
184:   PetscFree(colors);

186:   /*
187:      Create and set the nonzero pattern for the Jacobian: This is not done 
188:      particularly efficiently. One should process the boundary nodes separately and 
189:      then use a simple loop for the interior nodes.
190:        Note that for this code we use the "natural" number of the nodes on the 
191:      grid (since that is what is good for the user provided function). In the 
192:      DMDA examples we must use the DMDA numbering where each processor is assigned a
193:      chunk of data.
194:   */
195:   MatCreateAIJ(PETSC_COMM_WORLD,rend-rstart,rend-rstart,N,N,5,0,0,0,&J);
196:   for (i=rstart; i<rend; i++) {
197:     rj = i % user.mx;         /* column in grid */
198:     ri = i / user.mx;         /* row in grid */
199:     if (ri != 0) {     /* first row does not have neighbor below */
200:       ii   = i - user.mx;
201:       MatSetValues(J,1,&i,1,&ii,&zero,INSERT_VALUES);
202:     }
203:     if (ri != user.my - 1) { /* last row does not have neighbors above */
204:       ii   = i + user.mx;
205:       MatSetValues(J,1,&i,1,&ii,&zero,INSERT_VALUES);
206:     }
207:     if (rj != 0) {     /* first column does not have neighbor to left */
208:       ii   = i - 1;
209:       MatSetValues(J,1,&i,1,&ii,&zero,INSERT_VALUES);
210:     }
211:     if (rj != user.mx - 1) {     /* last column does not have neighbor to right */
212:       ii   = i + 1;
213:       MatSetValues(J,1,&i,1,&ii,&zero,INSERT_VALUES);
214:     }
215:     MatSetValues(J,1,&i,1,&i,&zero,INSERT_VALUES);
216:   }
217:   MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
218:   MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);

220:   /*
221:        Create the data structure that SNESDefaultComputeJacobianColor() uses
222:        to compute the actual Jacobians via finite differences.
223:   */
224:   MatFDColoringCreate(J,iscoloring,&fdcoloring);
225:   MatFDColoringSetFunction(fdcoloring,(PetscErrorCode (*)(void))fnc,&user);
226:   MatFDColoringSetFromOptions(fdcoloring);
227:   /*
228:         Tell SNES to use the routine SNESDefaultComputeJacobianColor()
229:       to compute Jacobians.
230:   */
231:   SNESSetJacobian(snes,J,J,SNESDefaultComputeJacobianColor,fdcoloring);
232:   ISColoringDestroy(&iscoloring);


235:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
236:      Customize nonlinear solver; set runtime options
237:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

239:   /*
240:      Set runtime options (e.g., -snes_monitor -snes_rtol <rtol> -ksp_type <type>)
241:   */
242:   SNESSetFromOptions(snes);

244:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
245:      Evaluate initial guess; then solve nonlinear system
246:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
247:   /*
248:      Note: The user should initialize the vector, x, with the initial guess
249:      for the nonlinear solver prior to calling SNESSolve().  In particular,
250:      to employ an initial guess of zero, the user should explicitly set
251:      this vector to zero by calling VecSet().
252:   */
253:   FormInitialGuess(&user,x);
254:   SNESSolve(snes,PETSC_NULL,x);
255:   SNESGetIterationNumber(snes,&its);
256:   PetscPrintf(PETSC_COMM_WORLD,"Number of SNES iterations = %D\n",its);

258:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
259:      Free work space.  All PETSc objects should be destroyed when they
260:      are no longer needed.
261:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
262:   VecDestroy(&x);
263:   VecDestroy(&r);
264:   SNESDestroy(&snes);
265:   PetscFinalize();

267:   return 0;
268: }
269: /* ------------------------------------------------------------------- */

273: /* 
274:    FormInitialGuess - Forms initial approximation.

276:    Input Parameters:
277:    user - user-defined application context
278:    X - vector

280:    Output Parameter:
281:    X - vector
282:  */
283: int FormInitialGuess(AppCtx *user,Vec X)
284: {
285:   int          i,j,row,mx,my,ierr;
286:   PetscReal    one = 1.0,lambda,temp1,temp,hx,hy,hxdhy,hydhx,sc;
287:   PetscScalar  *x;

289:   /*
290:       Process 0 has to wait for all other processes to get here 
291:    before proceeding to write in the shared vector
292:   */
293:   PetscBarrier((PetscObject)X);
294:   if (user->rank) {
295:      /*
296:         All the non-busy processors have to wait here for process 0 to finish
297:         evaluating the function; otherwise they will start using the vector values
298:         before they have been computed
299:      */
300:      PetscBarrier((PetscObject)X);
301:      return 0;
302:   }

304:   mx = user->mx;            my = user->my;            lambda = user->param;
305:   hx = one/(PetscReal)(mx-1);  hy = one/(PetscReal)(my-1);
306:   sc = hx*hy*lambda;        hxdhy = hx/hy;            hydhx = hy/hx;
307:   temp1 = lambda/(lambda + one);

309:   /*
310:      Get a pointer to vector data.
311:        - For default PETSc vectors, VecGetArray() returns a pointer to
312:          the data array.  Otherwise, the routine is implementation dependent.
313:        - You MUST call VecRestoreArray() when you no longer need access to
314:          the array.
315:   */
316:   VecGetArray(X,&x);

318:   /*
319:      Compute initial guess over the locally owned part of the grid
320:   */
321: #pragma arl(4)
322: #pragma distinct (*x,*f)
323: #pragma no side effects (sqrt)
324:   for (j=0; j<my; j++) {
325:     temp = (PetscReal)(PetscMin(j,my-j-1))*hy;
326:     for (i=0; i<mx; i++) {
327:       row = i + j*mx;
328:       if (i == 0 || j == 0 || i == mx-1 || j == my-1) {
329:         x[row] = 0.0;
330:         continue;
331:       }
332:       x[row] = temp1*sqrt(PetscMin((PetscReal)(PetscMin(i,mx-i-1))*hx,temp));
333:     }
334:   }

336:   /*
337:      Restore vector
338:   */
339:   VecRestoreArray(X,&x);

341:   PetscBarrier((PetscObject)X);
342:   return 0;
343: }
344: /* ------------------------------------------------------------------- */
347: /* 
348:    FormFunction - Evaluates nonlinear function, F(x).

350:    Input Parameters:
351: .  snes - the SNES context
352: .  X - input vector
353: .  ptr - optional user-defined context, as set by SNESSetFunction()

355:    Output Parameter:
356: .  F - function vector
357:  */
358: int FormFunction(SNES snes,Vec X,Vec F,void *ptr)
359: {
360:   AppCtx       *user = (AppCtx*)ptr;
361:   int          ierr,i,j,row,mx,my;
362:   PetscReal    two = 2.0,one = 1.0,lambda,hx,hy,hxdhy,hydhx,sc;
363:   PetscScalar  u,uxx,uyy,*x,*f;

365:   /*
366:       Process 0 has to wait for all other processes to get here 
367:    before proceeding to write in the shared vector
368:   */
369:   PetscBarrier((PetscObject)X);

371:   if (user->rank) {
372:      /*
373:         All the non-busy processors have to wait here for process 0 to finish
374:         evaluating the function; otherwise they will start using the vector values
375:         before they have been computed
376:      */
377:      PetscBarrier((PetscObject)X);
378:      return 0;
379:   }

381:   mx = user->mx;            my = user->my;            lambda = user->param;
382:   hx = one/(PetscReal)(mx-1);  hy = one/(PetscReal)(my-1);
383:   sc = hx*hy*lambda;        hxdhy = hx/hy;            hydhx = hy/hx;

385:   /*
386:      Get pointers to vector data
387:   */
388:   VecGetArray(X,&x);
389:   VecGetArray(F,&f);

391:   /*
392:       The next line tells the SGI compiler that x and f contain no overlapping 
393:     regions and thus it can use addition optimizations.
394:   */
395: #pragma arl(4)
396: #pragma distinct (*x,*f)
397: #pragma no side effects (exp)

399:   /*
400:      Compute function over the entire  grid
401:   */
402:   for (j=0; j<my; j++) {
403:     for (i=0; i<mx; i++) {
404:       row = i + j*mx;
405:       if (i == 0 || j == 0 || i == mx-1 || j == my-1) {
406:         f[row] = x[row];
407:         continue;
408:       }
409:       u = x[row];
410:       uxx = (two*u - x[row-1] - x[row+1])*hydhx;
411:       uyy = (two*u - x[row-mx] - x[row+mx])*hxdhy;
412:       f[row] = uxx + uyy - sc*exp(u);
413:     }
414:   }

416:   /*
417:      Restore vectors
418:   */
419:   VecRestoreArray(X,&x);
420:   VecRestoreArray(F,&f);

422:   PetscLogFlops(11.0*(mx-2)*(my-2))
423:   PetscBarrier((PetscObject)X);
424:   return 0;
425: }

427: #if defined(PETSC_HAVE_FORTRAN_CAPS)
428: #define applicationfunctionfortran_ APPLICATIONFUNCTIONFORTRAN
429: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
430: #define applicationfunctionfortran_ applicationfunctionfortran
431: #endif

433: /* ------------------------------------------------------------------- */
436: /* 
437:    FormFunctionFortran - Evaluates nonlinear function, F(x) in Fortran.

439: */
440: int FormFunctionFortran(SNES snes,Vec X,Vec F,void *ptr)
441: {
442:   AppCtx  *user = (AppCtx*)ptr;
443:   int     ierr;
444:   PetscScalar  *x,*f;

446:   /*
447:       Process 0 has to wait for all other processes to get here 
448:    before proceeding to write in the shared vector
449:   */
450:   PetscBarrier((PetscObject)snes);
451:   if (!user->rank) {
452:     VecGetArray(X,&x);
453:     VecGetArray(F,&f);
454:     applicationfunctionfortran_(&user->param,&user->mx,&user->my,x,f,&ierr);
455:     VecRestoreArray(X,&x);
456:     VecRestoreArray(F,&f);
457:     PetscLogFlops(11.0*(user->mx-2)*(user->my-2))
458:   }
459:   /*
460:       All the non-busy processors have to wait here for process 0 to finish
461:       evaluating the function; otherwise they will start using the vector values
462:       before they have been computed
463:   */
464:   PetscBarrier((PetscObject)snes);
465:   return 0;
466: }