Actual source code: ex5s.c

petsc-3.8.4 2018-03-24
Report Typos and Errors

  2: static char help[] = "2d Bratu problem in shared memory parallel with SNES.\n\
  3: We solve the  Bratu (SFI - solid fuel ignition) problem in a 2D rectangular\n\
  4: domain, uses SHARED MEMORY to evaluate the user function.\n\
  5: The command line options include:\n\
  6:   -par <parameter>, where <parameter> indicates the problem's nonlinearity\n\
  7:      problem SFI:  <parameter> = Bratu parameter (0 <= par <= 6.81)\n\
  8:   -mx <xg>, where <xg> = number of grid points in the x-direction\n\
  9:   -my <yg>, where <yg> = number of grid points in the y-direction\n\
 10:   -use_fortran_function: use Fortran coded function, rather than C\n";

 12: /*
 13:              This code compiles ONLY on SGI systems
 14:             ========================================
 15: */
 16: /*T
 17:    Concepts: SNES^parallel Bratu example
 18:    Concepts: shared memory
 19:    Processors: n
 20: T*/

 22: /*

 24:      Programming model: Combination of
 25:         1) MPI message passing for PETSc routines
 26:         2) automatic loop parallelism (using shared memory) for user
 27:            provided function.

 29:        While the user function is being evaluated all MPI processes except process
 30:      0 blocks. Process zero spawns nt threads to evaluate the user function. Once
 31:      the user function is complete, the worker threads are suspended and all the MPI processes
 32:      continue.

 34:      Other useful options:

 36:        -snes_mf : use matrix free operator and no preconditioner
 37:        -snes_mf_operator : use matrix free operator but compute Jacobian via
 38:                            finite differences to form preconditioner

 40:        Environmental variable:

 42:          setenv MPC_NUM_THREADS nt <- set number of threads processor 0 should
 43:                                       use to evaluate user provided function

 45:        Note: The number of MPI processes (set with the mpiexec option -np) can
 46:        be set completely independently from the number of threads process 0
 47:        uses to evaluate the function (though usually one would make them the same).
 48: */

 50: /* ------------------------------------------------------------------------

 52:     Solid Fuel Ignition (SFI) problem.  This problem is modeled by
 53:     the partial differential equation

 55:             -Laplacian u - lambda*exp(u) = 0,  0 < x,y < 1,

 57:     with boundary conditions

 59:              u = 0  for  x = 0, x = 1, y = 0, y = 1.

 61:     A finite difference approximation with the usual 5-point stencil
 62:     is used to discretize the boundary value problem to obtain a nonlinear
 63:     system of equations.

 65:     The uniprocessor version of this code is snes/examples/tutorials/ex4.c
 66:     A parallel distributed memory version is snes/examples/tutorials/ex5.c and ex5f.F

 68:   ------------------------------------------------------------------------- */

 70: /*
 71:    Include "petscsnes.h" so that we can use SNES solvers.  Note that this
 72:    file automatically includes:
 73:      petscsys.h       - base PETSc routines   petscvec.h - vectors
 74:      petscmat.h - matrices
 75:      petscis.h     - index sets            petscksp.h - Krylov subspace methods
 76:      petscviewer.h - viewers               petscpc.h  - preconditioners
 77:      petscksp.h   - linear solvers
 78: */
 79:  #include <petscsnes.h>

 81: /*
 82:    User-defined application context - contains data needed by the
 83:    application-provided call-back routines   FormFunction().
 84: */
 85: typedef struct {
 86:   PetscReal param;             /* test problem parameter */
 87:   int       mx,my;             /* discretization in x, y directions */
 88:   int       rank;              /* processor rank */
 89: } AppCtx;

 91: /*
 92:    User-defined routines
 93: */
 94: extern int FormFunction(SNES,Vec,Vec,void*),FormInitialGuess(AppCtx*,Vec);
 95: extern int FormFunctionFortran(SNES,Vec,Vec,void*);

 97: /*
 98:     The main program is written in C while the user provided function
 99:  is given in both Fortran and C. The main program could also be written
100:  in Fortran; the ONE PROBLEM is that VecGetArray() cannot be called from
101:  Fortran on the SGI machines; thus the routine FormFunctionFortran() must
102:  be written in C.
103: */
104: int main(int argc,char **argv)
105: {
106:   SNES           snes;                /* nonlinear solver */
107:   Vec            x,r;                 /* solution, residual vectors */
108:   AppCtx         user;                /* user-defined work context */
109:   int            its;                 /* iterations for convergence */
110:   int            N,ierr,rstart,rend,*colors,i,ii,ri,rj;
111:   PetscErrorCode (*fnc)(SNES,Vec,Vec,void*);
112:   PetscReal      bratu_lambda_max = 6.81,bratu_lambda_min = 0.;
113:   MatFDColoring  fdcoloring;
114:   ISColoring     iscoloring;
115:   Mat            J;
116:   PetscScalar    zero = 0.0;
117:   PetscBool      flg;

119:   PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr;
120:   MPI_Comm_rank(PETSC_COMM_WORLD,&user.rank);

122:   /*
123:      Initialize problem parameters
124:   */
125:   user.mx = 4; user.my = 4; user.param = 6.0;
126:   PetscOptionsGetInt(NULL,NULL,"-mx",&user.mx,NULL);
127:   PetscOptionsGetInt(NULL,NULL,"-my",&user.my,NULL);
128:   PetscOptionsGetReal(NULL,NULL,"-par",&user.param,NULL);
129:   if (user.param >= bratu_lambda_max || user.param <= bratu_lambda_min) SETERRQ(PETSC_COMM_SELF,1,"Lambda is out of range");
130:   N = user.mx*user.my;

132:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
133:      Create nonlinear solver context
134:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

136:   SNESCreate(PETSC_COMM_WORLD,&snes);

138:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
139:      Create vector data structures; set function evaluation routine
140:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

142:   /*
143:       The routine VecCreateShared() creates a parallel vector with each processor
144:     assigned its own segment, BUT, in addition, the first processor has access to the
145:     entire array. This is to allow the users function to be based on loop level
146:     parallelism rather than MPI.
147:   */
148:   VecCreateShared(PETSC_COMM_WORLD,PETSC_DECIDE,N,&x);
149:   VecDuplicate(x,&r);

151:   PetscOptionsHasName(NULL,NULL,"-use_fortran_function",&flg);
152:   if (flg) fnc = FormFunctionFortran;
153:   else     fnc = FormFunction;

155:   /*
156:      Set function evaluation routine and vector
157:   */
158:   SNESSetFunction(snes,r,fnc,&user);

160:   /*
161:        Currently when using VecCreateShared() and using loop level parallelism
162:     to automatically parallelise the user function it makes no sense for the
163:     Jacobian to be computed via loop level parallelism, because all the threads
164:     would be simultaneously calling MatSetValues() causing a bottle-neck.

166:     Thus this example uses the PETSc Jacobian calculations via finite differencing
167:     to approximate the Jacobian
168:   */

170:   /*

172:   */
173:   VecGetOwnershipRange(r,&rstart,&rend);
174:   PetscMalloc1(rend-rstart,&colors);
175:   for (i=rstart; i<rend; i++) colors[i - rstart] = 3*((i/user.mx) % 3) + (i % 3);

177:   ISColoringCreate(PETSC_COMM_WORLD,3*2+2,rend-rstart,colors,&iscoloring);
178:   PetscFree(colors);

180:   /*
181:      Create and set the nonzero pattern for the Jacobian: This is not done
182:      particularly efficiently. One should process the boundary nodes separately and
183:      then use a simple loop for the interior nodes.
184:        Note that for this code we use the "natural" number of the nodes on the
185:      grid (since that is what is good for the user provided function). In the
186:      DMDA examples we must use the DMDA numbering where each processor is assigned a
187:      chunk of data.
188:   */
189:   MatCreateAIJ(PETSC_COMM_WORLD,rend-rstart,rend-rstart,N,N,5,0,0,0,&J);
190:   for (i=rstart; i<rend; i++) {
191:     rj = i % user.mx;         /* column in grid */
192:     ri = i / user.mx;         /* row in grid */
193:     if (ri != 0) {     /* first row does not have neighbor below */
194:       ii   = i - user.mx;
195:       MatSetValues(J,1,&i,1,&ii,&zero,INSERT_VALUES);
196:     }
197:     if (ri != user.my - 1) { /* last row does not have neighbors above */
198:       ii   = i + user.mx;
199:       MatSetValues(J,1,&i,1,&ii,&zero,INSERT_VALUES);
200:     }
201:     if (rj != 0) {     /* first column does not have neighbor to left */
202:       ii   = i - 1;
203:       MatSetValues(J,1,&i,1,&ii,&zero,INSERT_VALUES);
204:     }
205:     if (rj != user.mx - 1) {     /* last column does not have neighbor to right */
206:       ii   = i + 1;
207:       MatSetValues(J,1,&i,1,&ii,&zero,INSERT_VALUES);
208:     }
209:     MatSetValues(J,1,&i,1,&i,&zero,INSERT_VALUES);
210:   }
211:   MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
212:   MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);

214:   /*
215:        Create the data structure that SNESComputeJacobianDefaultColor() uses
216:        to compute the actual Jacobians via finite differences.
217:   */
218:   MatFDColoringCreate(J,iscoloring,&fdcoloring);
219:   MatFDColoringSetFunction(fdcoloring,(PetscErrorCode (*)(void))fnc,&user);
220:   MatFDColoringSetFromOptions(fdcoloring);
221:   MatFDColoringSetUp(J,iscoloring,fdcoloring);
222:   /*
223:         Tell SNES to use the routine SNESComputeJacobianDefaultColor()
224:       to compute Jacobians.
225:   */
226:   SNESSetJacobian(snes,J,J,SNESComputeJacobianDefaultColor,fdcoloring);
227:   ISColoringDestroy(&iscoloring);


230:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
231:      Customize nonlinear solver; set runtime options
232:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

234:   /*
235:      Set runtime options (e.g., -snes_monitor -snes_rtol <rtol> -ksp_type <type>)
236:   */
237:   SNESSetFromOptions(snes);

239:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
240:      Evaluate initial guess; then solve nonlinear system
241:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
242:   /*
243:      Note: The user should initialize the vector, x, with the initial guess
244:      for the nonlinear solver prior to calling SNESSolve().  In particular,
245:      to employ an initial guess of zero, the user should explicitly set
246:      this vector to zero by calling VecSet().
247:   */
248:   FormInitialGuess(&user,x);
249:   SNESSolve(snes,NULL,x);
250:   SNESGetIterationNumber(snes,&its);
251:   PetscPrintf(PETSC_COMM_WORLD,"Number of SNES iterations = %D\n",its);

253:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
254:      Free work space.  All PETSc objects should be destroyed when they
255:      are no longer needed.
256:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
257:   VecDestroy(&x);
258:   VecDestroy(&r);
259:   SNESDestroy(&snes);
260:   PetscFinalize();

262:   return 0;
263: }
264: /* ------------------------------------------------------------------- */

266: /*
267:    FormInitialGuess - Forms initial approximation.

269:    Input Parameters:
270:    user - user-defined application context
271:    X - vector

273:    Output Parameter:
274:    X - vector
275:  */
276: int FormInitialGuess(AppCtx *user,Vec X)
277: {
278:   int         i,j,row,mx,my,ierr;
279:   PetscReal   one = 1.0,lambda,temp1,temp,hx,hy,hxdhy,hydhx,sc;
280:   PetscScalar *x;

282:   /*
283:       Process 0 has to wait for all other processes to get here
284:    before proceeding to write in the shared vector
285:   */
286:   PetscBarrier((PetscObject)X);
287:   if (user->rank) {
288:     /*
289:        All the non-busy processors have to wait here for process 0 to finish
290:        evaluating the function; otherwise they will start using the vector values
291:        before they have been computed
292:     */
293:     PetscBarrier((PetscObject)X);
294:     return 0;
295:   }

297:   mx = user->mx;               my = user->my;        lambda = user->param;
298:   hx = one/(PetscReal)(mx-1);  hy = one/(PetscReal)(my-1);
299:   sc = hx*hy*lambda;        hxdhy = hx/hy;            hydhx = hy/hx;

301:   temp1 = lambda/(lambda + one);

303:   /*
304:      Get a pointer to vector data.
305:        - For default PETSc vectors, VecGetArray() returns a pointer to
306:          the data array.  Otherwise, the routine is implementation dependent.
307:        - You MUST call VecRestoreArray() when you no longer need access to
308:          the array.
309:   */
310:   VecGetArray(X,&x);

312:   /*
313:      Compute initial guess over the locally owned part of the grid
314:   */
315: #pragma arl(4)
316: #pragma distinct (*x,*f)
317: #pragma no side effects (sqrt)
318:   for (j=0; j<my; j++) {
319:     temp = (PetscReal)(PetscMin(j,my-j-1))*hy;
320:     for (i=0; i<mx; i++) {
321:       row = i + j*mx;
322:       if (i == 0 || j == 0 || i == mx-1 || j == my-1) {
323:         x[row] = 0.0;
324:         continue;
325:       }
326:       x[row] = temp1*PetscSqrtReal(PetscMin((PetscReal)(PetscMin(i,mx-i-1))*hx,temp));
327:     }
328:   }

330:   /*
331:      Restore vector
332:   */
333:   VecRestoreArray(X,&x);

335:   PetscBarrier((PetscObject)X);
336:   return 0;
337: }
338: /* ------------------------------------------------------------------- */
339: /*
340:    FormFunction - Evaluates nonlinear function, F(x).

342:    Input Parameters:
343: .  snes - the SNES context
344: .  X - input vector
345: .  ptr - optional user-defined context, as set by SNESSetFunction()

347:    Output Parameter:
348: .  F - function vector
349:  */
350: int FormFunction(SNES snes,Vec X,Vec F,void *ptr)
351: {
352:   AppCtx            *user = (AppCtx*)ptr;
353:   int               ierr,i,j,row,mx,my;
354:   PetscReal         two = 2.0,one = 1.0,lambda,hx,hy,hxdhy,hydhx,sc;
355:   PetscScalar       u,uxx,uyy,*f;
356:   const PetscScalar *x;

358:   /*
359:       Process 0 has to wait for all other processes to get here
360:    before proceeding to write in the shared vector
361:   */
362:   PetscBarrier((PetscObject)X);

364:   if (user->rank) {
365:     /*
366:        All the non-busy processors have to wait here for process 0 to finish
367:        evaluating the function; otherwise they will start using the vector values
368:        before they have been computed
369:     */
370:     PetscBarrier((PetscObject)X);
371:     return 0;
372:   }

374:   mx = user->mx;            my = user->my;            lambda = user->param;
375:   hx = one/(PetscReal)(mx-1);  hy = one/(PetscReal)(my-1);
376:   sc = hx*hy*lambda;        hxdhy = hx/hy;            hydhx = hy/hx;

378:   /*
379:      Get pointers to vector data
380:   */
381:   VecGetArrayRead(X,&x);
382:   VecGetArray(F,&f);

384:   /*
385:       The next line tells the SGI compiler that x and f contain no overlapping
386:     regions and thus it can use addition optimizations.
387:   */
388: #pragma arl(4)
389: #pragma distinct (*x,*f)
390: #pragma no side effects (exp)

392:   /*
393:      Compute function over the entire  grid
394:   */
395:   for (j=0; j<my; j++) {
396:     for (i=0; i<mx; i++) {
397:       row = i + j*mx;
398:       if (i == 0 || j == 0 || i == mx-1 || j == my-1) {
399:         f[row] = x[row];
400:         continue;
401:       }
402:       u      = x[row];
403:       uxx    = (two*u - x[row-1] - x[row+1])*hydhx;
404:       uyy    = (two*u - x[row-mx] - x[row+mx])*hxdhy;
405:       f[row] = uxx + uyy - sc*PetscExpScalar(u);
406:     }
407:   }

409:   /*
410:      Restore vectors
411:   */
412:   VecRestoreArrayRead(X,&x);
413:   VecRestoreArray(F,&f);

415:   PetscLogFlops(11.0*(mx-2)*(my-2))
416:   PetscBarrier((PetscObject)X);
417:   return 0;
418: }

420: #if defined(PETSC_HAVE_FORTRAN_CAPS)
421: #define applicationfunctionfortran_ APPLICATIONFUNCTIONFORTRAN
422: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
423: #define applicationfunctionfortran_ applicationfunctionfortran
424: #endif

426: /* ------------------------------------------------------------------- */
427: /*
428:    FormFunctionFortran - Evaluates nonlinear function, F(x) in Fortran.

430: */
431: int FormFunctionFortran(SNES snes,Vec X,Vec F,void *ptr)
432: {
433:   AppCtx            *user = (AppCtx*)ptr;
434:   int               ierr;
435:   PetscScalar       *f;
436:   const PetscScalar *x;

438:   /*
439:       Process 0 has to wait for all other processes to get here
440:    before proceeding to write in the shared vector
441:   */
442:   PetscBarrier((PetscObject)snes);
443:   if (!user->rank) {
444:     VecGetArrayRead(X,&x);
445:     VecGetArray(F,&f);
446:     applicationfunctionfortran_(&user->param,&user->mx,&user->my,x,f,&ierr);
447:     VecRestoreArrayRead(X,&x);
448:     VecRestoreArray(F,&f);
449:     PetscLogFlops(11.0*(user->mx-2)*(user->my-2))
450:   }
451:   /*
452:       All the non-busy processors have to wait here for process 0 to finish
453:       evaluating the function; otherwise they will start using the vector values
454:       before they have been computed
455:   */
456:   PetscBarrier((PetscObject)snes);
457:   return 0;
458: }