Actual source code: hyperbolic.c

petsc-3.12.5 2020-03-29
Report Typos and Errors
  1: #include <petsctao.h>

  3: /*T
  4:    Concepts: TAO^Solving a system of nonlinear equations, nonlinear least squares
  5:    Routines: TaoCreate();
  6:    Routines: TaoSetType();
  7:    Routines: TaoSetInitialVector();
  8:    Routines: TaoSetObjectiveRoutine();
  9:    Routines: TaoSetGradientRoutine();
 10:    Routines: TaoSetConstraintsRoutine();
 11:    Routines: TaoSetJacobianStateRoutine();
 12:    Routines: TaoSetJacobianDesignRoutine();
 13:    Routines: TaoSetStateDesignIS();
 14:    Routines: TaoSetFromOptions();
 15:    Routines: TaoSolve();
 16:    Routines: TaoDestroy();
 17:    Processors: 1
 18: T*/



 22: typedef struct {
 23:   PetscInt n; /*  Number of variables */
 24:   PetscInt m; /*  Number of constraints */
 25:   PetscInt mx; /*  grid points in each direction */
 26:   PetscInt nt; /*  Number of time steps */
 27:   PetscInt ndata; /*  Number of data points per sample */
 28:   IS       s_is;
 29:   IS       d_is;
 30:   VecScatter state_scatter;
 31:   VecScatter design_scatter;
 32:   VecScatter *uxi_scatter,*uyi_scatter,*ux_scatter,*uy_scatter,*ui_scatter;
 33:   VecScatter *yi_scatter;

 35:   Mat       Js,Jd,JsBlockPrec,JsInv,JsBlock;
 36:   PetscBool jformed,c_formed;

 38:   PetscReal alpha; /*  Regularization parameter */
 39:   PetscReal gamma;
 40:   PetscReal ht; /*  Time step */
 41:   PetscReal T; /*  Final time */
 42:   Mat Q,QT;
 43:   Mat L,LT;
 44:   Mat Div,Divwork,Divxy[2];
 45:   Mat Grad,Gradxy[2];
 46:   Mat M;
 47:   Mat *C,*Cwork;
 48:   /* Mat Hs,Hd,Hsd; */
 49:   Vec q;
 50:   Vec ur; /*  reference */

 52:   Vec d;
 53:   Vec dwork;

 55:   Vec y; /*  state variables */
 56:   Vec ywork;
 57:   Vec ytrue;
 58:   Vec *yi,*yiwork,*ziwork;
 59:   Vec *uxi,*uyi,*uxiwork,*uyiwork,*ui,*uiwork;

 61:   Vec u; /*  design variables */
 62:   Vec uwork,vwork;
 63:   Vec utrue;

 65:   Vec js_diag;

 67:   Vec c; /*  constraint vector */
 68:   Vec cwork;

 70:   Vec lwork;

 72:   KSP      solver;
 73:   PC       prec;
 74:   PetscInt block_index;

 76:   PetscInt ksp_its;
 77:   PetscInt ksp_its_initial;
 78: } AppCtx;


 81: PetscErrorCode FormFunction(Tao, Vec, PetscReal*, void*);
 82: PetscErrorCode FormGradient(Tao, Vec, Vec, void*);
 83: PetscErrorCode FormFunctionGradient(Tao, Vec, PetscReal*, Vec, void*);
 84: PetscErrorCode FormJacobianState(Tao, Vec, Mat, Mat, Mat, void*);
 85: PetscErrorCode FormJacobianDesign(Tao, Vec, Mat,void*);
 86: PetscErrorCode FormConstraints(Tao, Vec, Vec, void*);
 87: PetscErrorCode FormHessian(Tao, Vec, Mat, Mat, void*);
 88: PetscErrorCode Gather(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat);
 89: PetscErrorCode Scatter(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat);
 90: PetscErrorCode HyperbolicInitialize(AppCtx *user);
 91: PetscErrorCode HyperbolicDestroy(AppCtx *user);
 92: PetscErrorCode HyperbolicMonitor(Tao, void*);

 94: PetscErrorCode StateMatMult(Mat,Vec,Vec);
 95: PetscErrorCode StateMatBlockMult(Mat,Vec,Vec);
 96: PetscErrorCode StateMatBlockMultTranspose(Mat,Vec,Vec);
 97: PetscErrorCode StateMatMultTranspose(Mat,Vec,Vec);
 98: PetscErrorCode StateMatGetDiagonal(Mat,Vec);
 99: PetscErrorCode StateMatDuplicate(Mat,MatDuplicateOption,Mat*);
100: PetscErrorCode StateMatInvMult(Mat,Vec,Vec);
101: PetscErrorCode StateMatInvTransposeMult(Mat,Vec,Vec);
102: PetscErrorCode StateMatBlockPrecMult(PC,Vec,Vec);

104: PetscErrorCode DesignMatMult(Mat,Vec,Vec);
105: PetscErrorCode DesignMatMultTranspose(Mat,Vec,Vec);

107: PetscErrorCode Scatter_yi(Vec,Vec*,VecScatter*,PetscInt); /*  y to y1,y2,...,y_nt */
108: PetscErrorCode Gather_yi(Vec,Vec*,VecScatter*,PetscInt);
109: PetscErrorCode Scatter_uxi_uyi(Vec,Vec*,VecScatter*,Vec*,VecScatter*,PetscInt); /*  u to ux_1,uy_1,ux_2,uy_2,...,u */
110: PetscErrorCode Gather_uxi_uyi(Vec,Vec*,VecScatter*,Vec*,VecScatter*,PetscInt);

112: static  char help[]="";

114: int main(int argc, char **argv)
115: {
116:   PetscErrorCode     ierr;
117:   Vec                x,x0;
118:   Tao                tao;
119:   AppCtx             user;
120:   IS                 is_allstate,is_alldesign;
121:   PetscInt           lo,hi,hi2,lo2,ksp_old;
122:   PetscInt           ntests = 1;
123:   PetscInt           i;
124: #if defined(PETSC_USE_LOG)
125:   PetscLogStage      stages[1];
126: #endif

128:   PetscInitialize(&argc, &argv, (char*)0,help);if (ierr) return ierr;
129:   user.mx = 32;
130:   PetscOptionsBegin(PETSC_COMM_WORLD,NULL,NULL,NULL);
131:   PetscOptionsInt("-mx","Number of grid points in each direction","",user.mx,&user.mx,NULL);
132:   user.nt = 16;
133:   PetscOptionsInt("-nt","Number of time steps","",user.nt,&user.nt,NULL);
134:   user.ndata = 64;
135:   PetscOptionsInt("-ndata","Numbers of data points per sample","",user.ndata,&user.ndata,NULL);
136:   user.alpha = 10.0;
137:   PetscOptionsReal("-alpha","Regularization parameter","",user.alpha,&user.alpha,NULL);
138:   user.T = 1.0/32.0;
139:   PetscOptionsReal("-Tfinal","Final time","",user.T,&user.T,NULL);

141:   user.m = user.mx*user.mx*user.nt; /*  number of constraints */
142:   user.n = user.mx*user.mx*3*user.nt; /*  number of variables */
143:   user.ht = user.T/user.nt; /*  Time step */
144:   user.gamma = user.T*user.ht / (user.mx*user.mx);

146:   VecCreate(PETSC_COMM_WORLD,&user.u);
147:   VecCreate(PETSC_COMM_WORLD,&user.y);
148:   VecCreate(PETSC_COMM_WORLD,&user.c);
149:   VecSetSizes(user.u,PETSC_DECIDE,user.n-user.m);
150:   VecSetSizes(user.y,PETSC_DECIDE,user.m);
151:   VecSetSizes(user.c,PETSC_DECIDE,user.m);
152:   VecSetFromOptions(user.u);
153:   VecSetFromOptions(user.y);
154:   VecSetFromOptions(user.c);

156:   /* Create scatters for reduced spaces.
157:      If the state vector y and design vector u are partitioned as
158:      [y_1; y_2; ...; y_np] and [u_1; u_2; ...; u_np] (with np = # of processors),
159:      then the solution vector x is organized as
160:      [y_1; u_1; y_2; u_2; ...; y_np; u_np].
161:      The index sets user.s_is and user.d_is correspond to the indices of the
162:      state and design variables owned by the current processor.
163:   */
164:   VecCreate(PETSC_COMM_WORLD,&x);

166:   VecGetOwnershipRange(user.y,&lo,&hi);
167:   VecGetOwnershipRange(user.u,&lo2,&hi2);

169:   ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_allstate);
170:   ISCreateStride(PETSC_COMM_SELF,hi-lo,lo+lo2,1,&user.s_is);

172:   ISCreateStride(PETSC_COMM_SELF,hi2-lo2,lo2,1,&is_alldesign);
173:   ISCreateStride(PETSC_COMM_SELF,hi2-lo2,hi+lo2,1,&user.d_is);

175:   VecSetSizes(x,hi-lo+hi2-lo2,user.n);
176:   VecSetFromOptions(x);

178:   VecScatterCreate(x,user.s_is,user.y,is_allstate,&user.state_scatter);
179:   VecScatterCreate(x,user.d_is,user.u,is_alldesign,&user.design_scatter);
180:   ISDestroy(&is_alldesign);
181:   ISDestroy(&is_allstate);

183:   /* Create TAO solver and set desired solution method */
184:   TaoCreate(PETSC_COMM_WORLD,&tao);
185:   TaoSetType(tao,TAOLCL);

187:   /* Set up initial vectors and matrices */
188:   HyperbolicInitialize(&user);

190:   Gather(x,user.y,user.state_scatter,user.u,user.design_scatter);
191:   VecDuplicate(x,&x0);
192:   VecCopy(x,x0);

194:   /* Set solution vector with an initial guess */
195:   TaoSetInitialVector(tao,x);
196:   TaoSetObjectiveRoutine(tao, FormFunction, (void *)&user);
197:   TaoSetGradientRoutine(tao, FormGradient, (void *)&user);
198:   TaoSetConstraintsRoutine(tao, user.c, FormConstraints, (void *)&user);
199:   TaoSetJacobianStateRoutine(tao, user.Js, user.Js, user.JsInv, FormJacobianState, (void *)&user);
200:   TaoSetJacobianDesignRoutine(tao, user.Jd, FormJacobianDesign, (void *)&user);
201:   TaoSetFromOptions(tao);
202:   TaoSetStateDesignIS(tao,user.s_is,user.d_is);

204:   /* SOLVE THE APPLICATION */
205:   PetscOptionsInt("-ntests","Number of times to repeat TaoSolve","",ntests,&ntests,NULL);
206:   PetscOptionsEnd();
207:   PetscLogStageRegister("Trials",&stages[0]);
208:   PetscLogStagePush(stages[0]);
209:   user.ksp_its_initial = user.ksp_its;
210:   ksp_old = user.ksp_its;
211:   for (i=0; i<ntests; i++){
212:     TaoSolve(tao);
213:     PetscPrintf(PETSC_COMM_WORLD,"KSP Iterations = %D\n",user.ksp_its-ksp_old);
214:     VecCopy(x0,x);
215:     TaoSetInitialVector(tao,x);
216:   }
217:   PetscLogStagePop();
218:   PetscBarrier((PetscObject)x);
219:   PetscPrintf(PETSC_COMM_WORLD,"KSP iterations within initialization: ");
220:   PetscPrintf(PETSC_COMM_WORLD,"%D\n",user.ksp_its_initial);
221:   PetscPrintf(PETSC_COMM_WORLD,"Total KSP iterations over %D trial(s): ",ntests);
222:   PetscPrintf(PETSC_COMM_WORLD,"%D\n",user.ksp_its);
223:   PetscPrintf(PETSC_COMM_WORLD,"KSP iterations per trial: ");
224:   PetscPrintf(PETSC_COMM_WORLD,"%D\n",(user.ksp_its-user.ksp_its_initial)/ntests);

226:   TaoDestroy(&tao);
227:   VecDestroy(&x);
228:   VecDestroy(&x0);
229:   HyperbolicDestroy(&user);
230:   PetscFinalize();
231:   return ierr;
232: }
233: /* ------------------------------------------------------------------- */
234: /*
235:    dwork = Qy - d
236:    lwork = L*(u-ur).^2
237:    f = 1/2 * (dwork.dork + alpha*y.lwork)
238: */
239: PetscErrorCode FormFunction(Tao tao,Vec X,PetscReal *f,void *ptr)
240: {
242:   PetscReal      d1=0,d2=0;
243:   AppCtx         *user = (AppCtx*)ptr;

246:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter);
247:   MatMult(user->Q,user->y,user->dwork);
248:   VecAXPY(user->dwork,-1.0,user->d);
249:   VecDot(user->dwork,user->dwork,&d1);

251:   VecWAXPY(user->uwork,-1.0,user->ur,user->u);
252:   VecPointwiseMult(user->uwork,user->uwork,user->uwork);
253:   MatMult(user->L,user->uwork,user->lwork);
254:   VecDot(user->y,user->lwork,&d2);
255:   *f = 0.5 * (d1 + user->alpha*d2);
256:   return(0);
257: }

259: /* ------------------------------------------------------------------- */
260: /*
261:     state: g_s = Q' *(Qy - d) + 0.5*alpha*L*(u-ur).^2
262:     design: g_d = alpha*(L'y).*(u-ur)
263: */
264: PetscErrorCode FormGradient(Tao tao,Vec X,Vec G,void *ptr)
265: {
267:   AppCtx         *user = (AppCtx*)ptr;

270:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter);
271:   MatMult(user->Q,user->y,user->dwork);
272:   VecAXPY(user->dwork,-1.0,user->d);

274:   MatMult(user->QT,user->dwork,user->ywork);

276:   MatMult(user->LT,user->y,user->uwork);
277:   VecWAXPY(user->vwork,-1.0,user->ur,user->u);
278:   VecPointwiseMult(user->uwork,user->vwork,user->uwork);
279:   VecScale(user->uwork,user->alpha);

281:   VecPointwiseMult(user->vwork,user->vwork,user->vwork);
282:   MatMult(user->L,user->vwork,user->lwork);
283:   VecAXPY(user->ywork,0.5*user->alpha,user->lwork);

285:   Gather(G,user->ywork,user->state_scatter,user->uwork,user->design_scatter);
286:   return(0);
287: }

289: PetscErrorCode FormFunctionGradient(Tao tao, Vec X, PetscReal *f, Vec G, void *ptr)
290: {
292:   PetscReal      d1,d2;
293:   AppCtx         *user = (AppCtx*)ptr;

296:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter);
297:   MatMult(user->Q,user->y,user->dwork);
298:   VecAXPY(user->dwork,-1.0,user->d);

300:   MatMult(user->QT,user->dwork,user->ywork);

302:   VecDot(user->dwork,user->dwork,&d1);

304:   MatMult(user->LT,user->y,user->uwork);
305:   VecWAXPY(user->vwork,-1.0,user->ur,user->u);
306:   VecPointwiseMult(user->uwork,user->vwork,user->uwork);
307:   VecScale(user->uwork,user->alpha);

309:   VecPointwiseMult(user->vwork,user->vwork,user->vwork);
310:   MatMult(user->L,user->vwork,user->lwork);
311:   VecAXPY(user->ywork,0.5*user->alpha,user->lwork);

313:   VecDot(user->y,user->lwork,&d2);

315:   *f = 0.5 * (d1 + user->alpha*d2);
316:   Gather(G,user->ywork,user->state_scatter,user->uwork,user->design_scatter);
317:   return(0);
318: }

320: /* ------------------------------------------------------------------- */
321: /* A
322: MatShell object
323: */
324: PetscErrorCode FormJacobianState(Tao tao, Vec X, Mat J, Mat JPre, Mat JInv, void *ptr)
325: {
327:   PetscInt       i;
328:   AppCtx         *user = (AppCtx*)ptr;

331:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter);
332:   Scatter_yi(user->u,user->ui,user->ui_scatter,user->nt);
333:   Scatter_uxi_uyi(user->u,user->uxi,user->uxi_scatter,user->uyi,user->uyi_scatter,user->nt);
334:   for (i=0; i<user->nt; i++){
335:     MatCopy(user->Divxy[0],user->C[i],SAME_NONZERO_PATTERN);
336:     MatCopy(user->Divxy[1],user->Cwork[i],SAME_NONZERO_PATTERN);

338:     MatDiagonalScale(user->C[i],NULL,user->uxi[i]);
339:     MatDiagonalScale(user->Cwork[i],NULL,user->uyi[i]);
340:     MatAXPY(user->C[i],1.0,user->Cwork[i],SUBSET_NONZERO_PATTERN);
341:     MatScale(user->C[i],user->ht);
342:     MatShift(user->C[i],1.0);
343:   }
344:   return(0);
345: }

347: /* ------------------------------------------------------------------- */
348: /* B */
349: PetscErrorCode FormJacobianDesign(Tao tao, Vec X, Mat J, void *ptr)
350: {
352:   AppCtx         *user = (AppCtx*)ptr;

355:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter);
356:   return(0);
357: }

359: PetscErrorCode StateMatMult(Mat J_shell, Vec X, Vec Y)
360: {
362:   PetscInt       i;
363:   AppCtx         *user;

366:   MatShellGetContext(J_shell,(void**)&user);
367:   Scatter_yi(X,user->yi,user->yi_scatter,user->nt);
368:   user->block_index = 0;
369:   MatMult(user->JsBlock,user->yi[0],user->yiwork[0]);

371:   for (i=1; i<user->nt; i++){
372:     user->block_index = i;
373:     MatMult(user->JsBlock,user->yi[i],user->yiwork[i]);
374:     MatMult(user->M,user->yi[i-1],user->ziwork[i-1]);
375:     VecAXPY(user->yiwork[i],-1.0,user->ziwork[i-1]);
376:   }
377:   Gather_yi(Y,user->yiwork,user->yi_scatter,user->nt);
378:   return(0);
379: }

381: PetscErrorCode StateMatMultTranspose(Mat J_shell, Vec X, Vec Y)
382: {
384:   PetscInt       i;
385:   AppCtx         *user;

388:   MatShellGetContext(J_shell,(void**)&user);
389:   Scatter_yi(X,user->yi,user->yi_scatter,user->nt);

391:   for (i=0; i<user->nt-1; i++){
392:     user->block_index = i;
393:     MatMultTranspose(user->JsBlock,user->yi[i],user->yiwork[i]);
394:     MatMult(user->M,user->yi[i+1],user->ziwork[i+1]);
395:     VecAXPY(user->yiwork[i],-1.0,user->ziwork[i+1]);
396:   }

398:   i = user->nt-1;
399:   user->block_index = i;
400:   MatMultTranspose(user->JsBlock,user->yi[i],user->yiwork[i]);
401:   Gather_yi(Y,user->yiwork,user->yi_scatter,user->nt);
402:   return(0);
403: }

405: PetscErrorCode StateMatBlockMult(Mat J_shell, Vec X, Vec Y)
406: {
408:   PetscInt       i;
409:   AppCtx         *user;

412:   MatShellGetContext(J_shell,(void**)&user);
413:   i = user->block_index;
414:   VecPointwiseMult(user->uxiwork[i],X,user->uxi[i]);
415:   VecPointwiseMult(user->uyiwork[i],X,user->uyi[i]);
416:   Gather(user->uiwork[i],user->uxiwork[i],user->ux_scatter[i],user->uyiwork[i],user->uy_scatter[i]);
417:   MatMult(user->Div,user->uiwork[i],Y);
418:   VecAYPX(Y,user->ht,X);
419:   return(0);
420: }

422: PetscErrorCode StateMatBlockMultTranspose(Mat J_shell, Vec X, Vec Y)
423: {
425:   PetscInt       i;
426:   AppCtx         *user;

429:   MatShellGetContext(J_shell,(void**)&user);
430:   i = user->block_index;
431:   MatMult(user->Grad,X,user->uiwork[i]);
432:   Scatter(user->uiwork[i],user->uxiwork[i],user->ux_scatter[i],user->uyiwork[i],user->uy_scatter[i]);
433:   VecPointwiseMult(user->uxiwork[i],user->uxi[i],user->uxiwork[i]);
434:   VecPointwiseMult(user->uyiwork[i],user->uyi[i],user->uyiwork[i]);
435:   VecWAXPY(Y,1.0,user->uxiwork[i],user->uyiwork[i]);
436:   VecAYPX(Y,user->ht,X);
437:   return(0);
438: }

440: PetscErrorCode DesignMatMult(Mat J_shell, Vec X, Vec Y)
441: {
443:   PetscInt       i;
444:   AppCtx         *user;

447:   MatShellGetContext(J_shell,(void**)&user);
448:   Scatter_yi(user->y,user->yi,user->yi_scatter,user->nt);
449:   Scatter_uxi_uyi(X,user->uxiwork,user->uxi_scatter,user->uyiwork,user->uyi_scatter,user->nt);
450:   for (i=0; i<user->nt; i++){
451:     VecPointwiseMult(user->uxiwork[i],user->yi[i],user->uxiwork[i]);
452:     VecPointwiseMult(user->uyiwork[i],user->yi[i],user->uyiwork[i]);
453:     Gather(user->uiwork[i],user->uxiwork[i],user->ux_scatter[i],user->uyiwork[i],user->uy_scatter[i]);
454:     MatMult(user->Div,user->uiwork[i],user->ziwork[i]);
455:     VecScale(user->ziwork[i],user->ht);
456:   }
457:   Gather_yi(Y,user->ziwork,user->yi_scatter,user->nt);
458:   return(0);
459: }

461: PetscErrorCode DesignMatMultTranspose(Mat J_shell, Vec X, Vec Y)
462: {
464:   PetscInt       i;
465:   AppCtx         *user;

468:   MatShellGetContext(J_shell,(void**)&user);
469:   Scatter_yi(user->y,user->yi,user->yi_scatter,user->nt);
470:   Scatter_yi(X,user->yiwork,user->yi_scatter,user->nt);
471:   for (i=0; i<user->nt; i++){
472:     MatMult(user->Grad,user->yiwork[i],user->uiwork[i]);
473:     Scatter(user->uiwork[i],user->uxiwork[i],user->ux_scatter[i],user->uyiwork[i],user->uy_scatter[i]);
474:     VecPointwiseMult(user->uxiwork[i],user->yi[i],user->uxiwork[i]);
475:     VecPointwiseMult(user->uyiwork[i],user->yi[i],user->uyiwork[i]);
476:     Gather(user->uiwork[i],user->uxiwork[i],user->ux_scatter[i],user->uyiwork[i],user->uy_scatter[i]);
477:     VecScale(user->uiwork[i],user->ht);
478:   }
479:   Gather_yi(Y,user->uiwork,user->ui_scatter,user->nt);
480:   return(0);
481: }

483: PetscErrorCode StateMatBlockPrecMult(PC PC_shell, Vec X, Vec Y)
484: {
486:   PetscInt       i;
487:   AppCtx         *user;

490:   PCShellGetContext(PC_shell,(void**)&user);
491:   i = user->block_index;
492:   if (user->c_formed) {
493:     MatSOR(user->C[i],X,1.0,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_SYMMETRIC_SWEEP),0.0,1,1,Y);
494:   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not formed");
495:   return(0);
496: }

498: PetscErrorCode StateMatBlockPrecMultTranspose(PC PC_shell, Vec X, Vec Y)
499: {
501:   PetscInt       i;
502:   AppCtx         *user;

505:   PCShellGetContext(PC_shell,(void**)&user);

507:   i = user->block_index;
508:   if (user->c_formed) {
509:     MatSOR(user->C[i],X,1.0,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_SYMMETRIC_SWEEP),0.0,1,1,Y);
510:   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not formed");
511:   return(0);
512: }

514: PetscErrorCode StateMatInvMult(Mat J_shell, Vec X, Vec Y)
515: {
517:   AppCtx         *user;
518:   PetscInt       its,i;

521:   MatShellGetContext(J_shell,(void**)&user);

523:   if (Y == user->ytrue) {
524:     /* First solve is done using true solution to set up problem */
525:     KSPSetTolerances(user->solver,1e-4,1e-20,PETSC_DEFAULT,PETSC_DEFAULT);
526:   } else {
527:     KSPSetTolerances(user->solver,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT);
528:   }
529:   Scatter_yi(X,user->yi,user->yi_scatter,user->nt);
530:   Scatter_yi(Y,user->yiwork,user->yi_scatter,user->nt);
531:   Scatter_uxi_uyi(user->u,user->uxi,user->uxi_scatter,user->uyi,user->uyi_scatter,user->nt);

533:   user->block_index = 0;
534:   KSPSolve(user->solver,user->yi[0],user->yiwork[0]);

536:   KSPGetIterationNumber(user->solver,&its);
537:   user->ksp_its = user->ksp_its + its;
538:   for (i=1; i<user->nt; i++){
539:     MatMult(user->M,user->yiwork[i-1],user->ziwork[i-1]);
540:     VecAXPY(user->yi[i],1.0,user->ziwork[i-1]);
541:     user->block_index = i;
542:     KSPSolve(user->solver,user->yi[i],user->yiwork[i]);

544:     KSPGetIterationNumber(user->solver,&its);
545:     user->ksp_its = user->ksp_its + its;
546:   }
547:   Gather_yi(Y,user->yiwork,user->yi_scatter,user->nt);
548:   return(0);
549: }

551: PetscErrorCode StateMatInvTransposeMult(Mat J_shell, Vec X, Vec Y)
552: {
554:   AppCtx         *user;
555:   PetscInt       its,i;

558:   MatShellGetContext(J_shell,(void**)&user);

560:   Scatter_yi(X,user->yi,user->yi_scatter,user->nt);
561:   Scatter_yi(Y,user->yiwork,user->yi_scatter,user->nt);
562:   Scatter_uxi_uyi(user->u,user->uxi,user->uxi_scatter,user->uyi,user->uyi_scatter,user->nt);

564:   i = user->nt - 1;
565:   user->block_index = i;
566:   KSPSolveTranspose(user->solver,user->yi[i],user->yiwork[i]);

568:   KSPGetIterationNumber(user->solver,&its);
569:   user->ksp_its = user->ksp_its + its;

571:   for (i=user->nt-2; i>=0; i--){
572:     MatMult(user->M,user->yiwork[i+1],user->ziwork[i+1]);
573:     VecAXPY(user->yi[i],1.0,user->ziwork[i+1]);
574:     user->block_index = i;
575:     KSPSolveTranspose(user->solver,user->yi[i],user->yiwork[i]);

577:     KSPGetIterationNumber(user->solver,&its);
578:     user->ksp_its = user->ksp_its + its;
579:   }
580:   Gather_yi(Y,user->yiwork,user->yi_scatter,user->nt);
581:   return(0);
582: }

584: PetscErrorCode StateMatDuplicate(Mat J_shell, MatDuplicateOption opt, Mat *new_shell)
585: {
587:   AppCtx         *user;

590:   MatShellGetContext(J_shell,(void**)&user);

592:   MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,user->m,user->m,user,new_shell);
593:   MatShellSetOperation(*new_shell,MATOP_MULT,(void(*)(void))StateMatMult);
594:   MatShellSetOperation(*new_shell,MATOP_DUPLICATE,(void(*)(void))StateMatDuplicate);
595:   MatShellSetOperation(*new_shell,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatMultTranspose);
596:   MatShellSetOperation(*new_shell,MATOP_GET_DIAGONAL,(void(*)(void))StateMatGetDiagonal);
597:   return(0);
598: }

600: PetscErrorCode StateMatGetDiagonal(Mat J_shell, Vec X)
601: {
603:   AppCtx         *user;

606:   MatShellGetContext(J_shell,(void**)&user);
607:    VecCopy(user->js_diag,X);
608:   return(0);
609: }

611: PetscErrorCode FormConstraints(Tao tao, Vec X, Vec C, void *ptr)
612: {
613:   /* con = Ay - q, A = [C(u1)  0     0     ...   0;
614:                          -M  C(u2)   0     ...   0;
615:                           0   -M   C(u3)   ...   0;
616:                                       ...         ;
617:                           0    ...      -M C(u_nt)]
618:      C(u) = eye + ht*Div*[diag(u1); diag(u2)]       */
620:   PetscInt       i;
621:   AppCtx         *user = (AppCtx*)ptr;

624:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter);
625:   Scatter_yi(user->y,user->yi,user->yi_scatter,user->nt);
626:   Scatter_uxi_uyi(user->u,user->uxi,user->uxi_scatter,user->uyi,user->uyi_scatter,user->nt);

628:   user->block_index = 0;
629:   MatMult(user->JsBlock,user->yi[0],user->yiwork[0]);

631:   for (i=1; i<user->nt; i++){
632:     user->block_index = i;
633:     MatMult(user->JsBlock,user->yi[i],user->yiwork[i]);
634:     MatMult(user->M,user->yi[i-1],user->ziwork[i-1]);
635:     VecAXPY(user->yiwork[i],-1.0,user->ziwork[i-1]);
636:   }

638:   Gather_yi(C,user->yiwork,user->yi_scatter,user->nt);
639:   VecAXPY(C,-1.0,user->q);

641:   return(0);
642: }


645: PetscErrorCode Scatter(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat)
646: {

650:   VecScatterBegin(s_scat,x,state,INSERT_VALUES,SCATTER_FORWARD);
651:   VecScatterEnd(s_scat,x,state,INSERT_VALUES,SCATTER_FORWARD);
652:   VecScatterBegin(d_scat,x,design,INSERT_VALUES,SCATTER_FORWARD);
653:   VecScatterEnd(d_scat,x,design,INSERT_VALUES,SCATTER_FORWARD);
654:   return(0);
655: }

657: PetscErrorCode Scatter_uxi_uyi(Vec u, Vec *uxi, VecScatter *scatx, Vec *uyi, VecScatter *scaty, PetscInt nt)
658: {
660:   PetscInt       i;

663:   for (i=0; i<nt; i++){
664:     VecScatterBegin(scatx[i],u,uxi[i],INSERT_VALUES,SCATTER_FORWARD);
665:     VecScatterEnd(scatx[i],u,uxi[i],INSERT_VALUES,SCATTER_FORWARD);
666:     VecScatterBegin(scaty[i],u,uyi[i],INSERT_VALUES,SCATTER_FORWARD);
667:     VecScatterEnd(scaty[i],u,uyi[i],INSERT_VALUES,SCATTER_FORWARD);
668:   }
669:   return(0);
670: }

672: PetscErrorCode Gather(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat)
673: {

677:   VecScatterBegin(s_scat,state,x,INSERT_VALUES,SCATTER_REVERSE);
678:   VecScatterEnd(s_scat,state,x,INSERT_VALUES,SCATTER_REVERSE);
679:   VecScatterBegin(d_scat,design,x,INSERT_VALUES,SCATTER_REVERSE);
680:   VecScatterEnd(d_scat,design,x,INSERT_VALUES,SCATTER_REVERSE);
681:   return(0);
682: }

684: PetscErrorCode Gather_uxi_uyi(Vec u, Vec *uxi, VecScatter *scatx, Vec *uyi, VecScatter *scaty, PetscInt nt)
685: {
687:   PetscInt       i;

690:   for (i=0; i<nt; i++){
691:     VecScatterBegin(scatx[i],uxi[i],u,INSERT_VALUES,SCATTER_REVERSE);
692:     VecScatterEnd(scatx[i],uxi[i],u,INSERT_VALUES,SCATTER_REVERSE);
693:     VecScatterBegin(scaty[i],uyi[i],u,INSERT_VALUES,SCATTER_REVERSE);
694:     VecScatterEnd(scaty[i],uyi[i],u,INSERT_VALUES,SCATTER_REVERSE);
695:   }
696:   return(0);
697: }

699: PetscErrorCode Scatter_yi(Vec y, Vec *yi, VecScatter *scat, PetscInt nt)
700: {
702:   PetscInt       i;

705:   for (i=0; i<nt; i++){
706:     VecScatterBegin(scat[i],y,yi[i],INSERT_VALUES,SCATTER_FORWARD);
707:     VecScatterEnd(scat[i],y,yi[i],INSERT_VALUES,SCATTER_FORWARD);
708:   }
709:   return(0);
710: }

712: PetscErrorCode Gather_yi(Vec y, Vec *yi, VecScatter *scat, PetscInt nt)
713: {
715:   PetscInt       i;

718:   for (i=0; i<nt; i++){
719:     VecScatterBegin(scat[i],yi[i],y,INSERT_VALUES,SCATTER_REVERSE);
720:     VecScatterEnd(scat[i],yi[i],y,INSERT_VALUES,SCATTER_REVERSE);
721:   }
722:   return(0);
723: }

725: PetscErrorCode HyperbolicInitialize(AppCtx *user)
726: {
728:   PetscInt       n,i,j,linear_index,istart,iend,iblock,lo,hi;
729:   Vec            XX,YY,XXwork,YYwork,yi,uxi,ui,bc;
730:   PetscReal      h,sum;
731:   PetscScalar    hinv,neg_hinv,quarter=0.25,one=1.0,half_hinv,neg_half_hinv;
732:   PetscScalar    vx,vy,zero=0.0;
733:   IS             is_from_y,is_to_yi,is_from_u,is_to_uxi,is_to_uyi;

736:   user->jformed = PETSC_FALSE;
737:   user->c_formed = PETSC_FALSE;

739:   user->ksp_its = 0;
740:   user->ksp_its_initial = 0;

742:   n = user->mx * user->mx;

744:   h = 1.0/user->mx;
745:   hinv = user->mx;
746:   neg_hinv = -hinv;
747:   half_hinv = hinv / 2.0;
748:   neg_half_hinv = neg_hinv / 2.0;

750:   /* Generate Grad matrix */
751:   MatCreate(PETSC_COMM_WORLD,&user->Grad);
752:   MatSetSizes(user->Grad,PETSC_DECIDE,PETSC_DECIDE,2*n,n);
753:   MatSetFromOptions(user->Grad);
754:   MatMPIAIJSetPreallocation(user->Grad,3,NULL,3,NULL);
755:   MatSeqAIJSetPreallocation(user->Grad,3,NULL);
756:   MatGetOwnershipRange(user->Grad,&istart,&iend);

758:   for (i=istart; i<iend; i++){
759:     if (i<n){
760:       iblock = i / user->mx;
761:       j = iblock*user->mx + ((i+user->mx-1) % user->mx);
762:       MatSetValues(user->Grad,1,&i,1,&j,&half_hinv,INSERT_VALUES);
763:       j = iblock*user->mx + ((i+1) % user->mx);
764:       MatSetValues(user->Grad,1,&i,1,&j,&neg_half_hinv,INSERT_VALUES);
765:     }
766:     if (i>=n){
767:       j = (i - user->mx) % n;
768:       MatSetValues(user->Grad,1,&i,1,&j,&half_hinv,INSERT_VALUES);
769:       j = (j + 2*user->mx) % n;
770:       MatSetValues(user->Grad,1,&i,1,&j,&neg_half_hinv,INSERT_VALUES);
771:     }
772:   }

774:   MatAssemblyBegin(user->Grad,MAT_FINAL_ASSEMBLY);
775:   MatAssemblyEnd(user->Grad,MAT_FINAL_ASSEMBLY);

777:   MatCreate(PETSC_COMM_WORLD,&user->Gradxy[0]);
778:   MatSetSizes(user->Gradxy[0],PETSC_DECIDE,PETSC_DECIDE,n,n);
779:   MatSetFromOptions(user->Gradxy[0]);
780:   MatMPIAIJSetPreallocation(user->Gradxy[0],3,NULL,3,NULL);
781:   MatSeqAIJSetPreallocation(user->Gradxy[0],3,NULL);
782:   MatGetOwnershipRange(user->Gradxy[0],&istart,&iend);

784:   for (i=istart; i<iend; i++){
785:     iblock = i / user->mx;
786:     j = iblock*user->mx + ((i+user->mx-1) % user->mx);
787:     MatSetValues(user->Gradxy[0],1,&i,1,&j,&half_hinv,INSERT_VALUES);
788:     j = iblock*user->mx + ((i+1) % user->mx);
789:     MatSetValues(user->Gradxy[0],1,&i,1,&j,&neg_half_hinv,INSERT_VALUES);
790:     MatSetValues(user->Gradxy[0],1,&i,1,&i,&zero,INSERT_VALUES);
791:   }
792:   MatAssemblyBegin(user->Gradxy[0],MAT_FINAL_ASSEMBLY);
793:   MatAssemblyEnd(user->Gradxy[0],MAT_FINAL_ASSEMBLY);

795:   MatCreate(PETSC_COMM_WORLD,&user->Gradxy[1]);
796:   MatSetSizes(user->Gradxy[1],PETSC_DECIDE,PETSC_DECIDE,n,n);
797:   MatSetFromOptions(user->Gradxy[1]);
798:   MatMPIAIJSetPreallocation(user->Gradxy[1],3,NULL,3,NULL);
799:   MatSeqAIJSetPreallocation(user->Gradxy[1],3,NULL);
800:   MatGetOwnershipRange(user->Gradxy[1],&istart,&iend);

802:   for (i=istart; i<iend; i++){
803:     j = (i + n - user->mx) % n;
804:     MatSetValues(user->Gradxy[1],1,&i,1,&j,&half_hinv,INSERT_VALUES);
805:     j = (j + 2*user->mx) % n;
806:     MatSetValues(user->Gradxy[1],1,&i,1,&j,&neg_half_hinv,INSERT_VALUES);
807:     MatSetValues(user->Gradxy[1],1,&i,1,&i,&zero,INSERT_VALUES);
808:   }
809:   MatAssemblyBegin(user->Gradxy[1],MAT_FINAL_ASSEMBLY);
810:   MatAssemblyEnd(user->Gradxy[1],MAT_FINAL_ASSEMBLY);

812:   /* Generate Div matrix */
813:   MatTranspose(user->Grad,MAT_INITIAL_MATRIX,&user->Div);
814:   MatTranspose(user->Gradxy[0],MAT_INITIAL_MATRIX,&user->Divxy[0]);
815:   MatTranspose(user->Gradxy[1],MAT_INITIAL_MATRIX,&user->Divxy[1]);

817:   /* Off-diagonal averaging matrix */
818:   MatCreate(PETSC_COMM_WORLD,&user->M);
819:   MatSetSizes(user->M,PETSC_DECIDE,PETSC_DECIDE,n,n);
820:   MatSetFromOptions(user->M);
821:   MatMPIAIJSetPreallocation(user->M,4,NULL,4,NULL);
822:   MatSeqAIJSetPreallocation(user->M,4,NULL);
823:   MatGetOwnershipRange(user->M,&istart,&iend);

825:   for (i=istart; i<iend; i++){
826:     /* kron(Id,Av) */
827:     iblock = i / user->mx;
828:     j = iblock*user->mx + ((i+user->mx-1) % user->mx);
829:     MatSetValues(user->M,1,&i,1,&j,&quarter,INSERT_VALUES);
830:     j = iblock*user->mx + ((i+1) % user->mx);
831:     MatSetValues(user->M,1,&i,1,&j,&quarter,INSERT_VALUES);

833:     /* kron(Av,Id) */
834:     j = (i + user->mx) % n;
835:     MatSetValues(user->M,1,&i,1,&j,&quarter,INSERT_VALUES);
836:     j = (i + n - user->mx) % n;
837:     MatSetValues(user->M,1,&i,1,&j,&quarter,INSERT_VALUES);
838:   }
839:   MatAssemblyBegin(user->M,MAT_FINAL_ASSEMBLY);
840:   MatAssemblyEnd(user->M,MAT_FINAL_ASSEMBLY);

842:   /* Generate 2D grid */
843:   VecCreate(PETSC_COMM_WORLD,&XX);
844:   VecCreate(PETSC_COMM_WORLD,&user->q);
845:   VecSetSizes(XX,PETSC_DECIDE,n);
846:   VecSetSizes(user->q,PETSC_DECIDE,n*user->nt);
847:   VecSetFromOptions(XX);
848:   VecSetFromOptions(user->q);

850:   VecDuplicate(XX,&YY);
851:   VecDuplicate(XX,&XXwork);
852:   VecDuplicate(XX,&YYwork);
853:   VecDuplicate(XX,&user->d);
854:   VecDuplicate(XX,&user->dwork);

856:   VecGetOwnershipRange(XX,&istart,&iend);
857:   for (linear_index=istart; linear_index<iend; linear_index++){
858:     i = linear_index % user->mx;
859:     j = (linear_index-i)/user->mx;
860:     vx = h*(i+0.5);
861:     vy = h*(j+0.5);
862:     VecSetValues(XX,1,&linear_index,&vx,INSERT_VALUES);
863:     VecSetValues(YY,1,&linear_index,&vy,INSERT_VALUES);
864:   }

866:   VecAssemblyBegin(XX);
867:   VecAssemblyEnd(XX);
868:   VecAssemblyBegin(YY);
869:   VecAssemblyEnd(YY);

871:   /* Compute final density function yT
872:      yT = 1.0 + exp(-30*((x-0.25)^2+(y-0.25)^2)) + exp(-30*((x-0.75)^2+(y-0.75)^2))
873:      yT = yT / (h^2*sum(yT)) */
874:   VecCopy(XX,XXwork);
875:   VecCopy(YY,YYwork);

877:   VecShift(XXwork,-0.25);
878:   VecShift(YYwork,-0.25);

880:   VecPointwiseMult(XXwork,XXwork,XXwork);
881:   VecPointwiseMult(YYwork,YYwork,YYwork);

883:   VecCopy(XXwork,user->dwork);
884:   VecAXPY(user->dwork,1.0,YYwork);
885:   VecScale(user->dwork,-30.0);
886:   VecExp(user->dwork);
887:   VecCopy(user->dwork,user->d);

889:   VecCopy(XX,XXwork);
890:   VecCopy(YY,YYwork);

892:   VecShift(XXwork,-0.75);
893:   VecShift(YYwork,-0.75);

895:   VecPointwiseMult(XXwork,XXwork,XXwork);
896:   VecPointwiseMult(YYwork,YYwork,YYwork);

898:   VecCopy(XXwork,user->dwork);
899:   VecAXPY(user->dwork,1.0,YYwork);
900:   VecScale(user->dwork,-30.0);
901:   VecExp(user->dwork);

903:   VecAXPY(user->d,1.0,user->dwork);
904:   VecShift(user->d,1.0);
905:   VecSum(user->d,&sum);
906:   VecScale(user->d,1.0/(h*h*sum));

908:   /* Initial conditions of forward problem */
909:   VecDuplicate(XX,&bc);
910:   VecCopy(XX,XXwork);
911:   VecCopy(YY,YYwork);

913:   VecShift(XXwork,-0.5);
914:   VecShift(YYwork,-0.5);

916:   VecPointwiseMult(XXwork,XXwork,XXwork);
917:   VecPointwiseMult(YYwork,YYwork,YYwork);

919:   VecWAXPY(bc,1.0,XXwork,YYwork);
920:   VecScale(bc,-50.0);
921:   VecExp(bc);
922:   VecShift(bc,1.0);
923:   VecSum(bc,&sum);
924:   VecScale(bc,1.0/(h*h*sum));

926:   /* Create scatter from y to y_1,y_2,...,y_nt */
927:   /*  TODO: Reorder for better parallelism. (This will require reordering Q and L as well.) */
928:   PetscMalloc1(user->nt*user->mx*user->mx,&user->yi_scatter);
929:   VecCreate(PETSC_COMM_WORLD,&yi);
930:   VecSetSizes(yi,PETSC_DECIDE,user->mx*user->mx);
931:   VecSetFromOptions(yi);
932:   VecDuplicateVecs(yi,user->nt,&user->yi);
933:   VecDuplicateVecs(yi,user->nt,&user->yiwork);
934:   VecDuplicateVecs(yi,user->nt,&user->ziwork);
935:   for (i=0; i<user->nt; i++){
936:     VecGetOwnershipRange(user->yi[i],&lo,&hi);
937:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_to_yi);
938:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo+i*user->mx*user->mx,1,&is_from_y);
939:     VecScatterCreate(user->y,is_from_y,user->yi[i],is_to_yi,&user->yi_scatter[i]);
940:     ISDestroy(&is_to_yi);
941:     ISDestroy(&is_from_y);
942:   }

944:   /* Create scatter from u to ux_1,uy_1,ux_2,uy_2,...,ux_nt,uy_nt */
945:   /*  TODO: reorder for better parallelism */
946:   PetscMalloc1(user->nt*user->mx*user->mx,&user->uxi_scatter);
947:   PetscMalloc1(user->nt*user->mx*user->mx,&user->uyi_scatter);
948:   PetscMalloc1(user->nt*user->mx*user->mx,&user->ux_scatter);
949:   PetscMalloc1(user->nt*user->mx*user->mx,&user->uy_scatter);
950:   PetscMalloc1(2*user->nt*user->mx*user->mx,&user->ui_scatter);
951:   VecCreate(PETSC_COMM_WORLD,&uxi);
952:   VecCreate(PETSC_COMM_WORLD,&ui);
953:   VecSetSizes(uxi,PETSC_DECIDE,user->mx*user->mx);
954:   VecSetSizes(ui,PETSC_DECIDE,2*user->mx*user->mx);
955:   VecSetFromOptions(uxi);
956:   VecSetFromOptions(ui);
957:   VecDuplicateVecs(uxi,user->nt,&user->uxi);
958:   VecDuplicateVecs(uxi,user->nt,&user->uyi);
959:   VecDuplicateVecs(uxi,user->nt,&user->uxiwork);
960:   VecDuplicateVecs(uxi,user->nt,&user->uyiwork);
961:   VecDuplicateVecs(ui,user->nt,&user->ui);
962:   VecDuplicateVecs(ui,user->nt,&user->uiwork);
963:   for (i=0; i<user->nt; i++){
964:     VecGetOwnershipRange(user->uxi[i],&lo,&hi);
965:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_to_uxi);
966:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo+2*i*user->mx*user->mx,1,&is_from_u);
967:     VecScatterCreate(user->u,is_from_u,user->uxi[i],is_to_uxi,&user->uxi_scatter[i]);

969:     ISDestroy(&is_to_uxi);
970:     ISDestroy(&is_from_u);

972:     VecGetOwnershipRange(user->uyi[i],&lo,&hi);
973:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_to_uyi);
974:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo+(2*i+1)*user->mx*user->mx,1,&is_from_u);
975:     VecScatterCreate(user->u,is_from_u,user->uyi[i],is_to_uyi,&user->uyi_scatter[i]);

977:     ISDestroy(&is_to_uyi);
978:     ISDestroy(&is_from_u);

980:     VecGetOwnershipRange(user->uxi[i],&lo,&hi);
981:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_to_uxi);
982:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_from_u);
983:     VecScatterCreate(user->ui[i],is_from_u,user->uxi[i],is_to_uxi,&user->ux_scatter[i]);

985:     ISDestroy(&is_to_uxi);
986:     ISDestroy(&is_from_u);

988:     VecGetOwnershipRange(user->uyi[i],&lo,&hi);
989:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_to_uyi);
990:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo+user->mx*user->mx,1,&is_from_u);
991:     VecScatterCreate(user->ui[i],is_from_u,user->uyi[i],is_to_uyi,&user->uy_scatter[i]);

993:     ISDestroy(&is_to_uyi);
994:     ISDestroy(&is_from_u);

996:     VecGetOwnershipRange(user->ui[i],&lo,&hi);
997:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_to_uxi);
998:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo+2*i*user->mx*user->mx,1,&is_from_u);
999:     VecScatterCreate(user->u,is_from_u,user->ui[i],is_to_uxi,&user->ui_scatter[i]);

1001:     ISDestroy(&is_to_uxi);
1002:     ISDestroy(&is_from_u);
1003:   }

1005:   /* RHS of forward problem */
1006:   MatMult(user->M,bc,user->yiwork[0]);
1007:   for (i=1; i<user->nt; i++){
1008:     VecSet(user->yiwork[i],0.0);
1009:   }
1010:   Gather_yi(user->q,user->yiwork,user->yi_scatter,user->nt);

1012:   /* Compute true velocity field utrue */
1013:   VecDuplicate(user->u,&user->utrue);
1014:   for (i=0; i<user->nt; i++){
1015:     VecCopy(YY,user->uxi[i]);
1016:     VecScale(user->uxi[i],150.0*i*user->ht);
1017:     VecCopy(XX,user->uyi[i]);
1018:     VecShift(user->uyi[i],-10.0);
1019:     VecScale(user->uyi[i],15.0*i*user->ht);
1020:   }
1021:   Gather_uxi_uyi(user->utrue,user->uxi,user->uxi_scatter,user->uyi,user->uyi_scatter,user->nt);

1023:   /* Initial guess and reference model */
1024:   VecDuplicate(user->utrue,&user->ur);
1025:   for (i=0; i<user->nt; i++){
1026:     VecCopy(XX,user->uxi[i]);
1027:     VecShift(user->uxi[i],i*user->ht);
1028:     VecCopy(YY,user->uyi[i]);
1029:     VecShift(user->uyi[i],-i*user->ht);
1030:   }
1031:   Gather_uxi_uyi(user->ur,user->uxi,user->uxi_scatter,user->uyi,user->uyi_scatter,user->nt);

1033:   /* Generate regularization matrix L */
1034:   MatCreate(PETSC_COMM_WORLD,&user->LT);
1035:   MatSetSizes(user->LT,PETSC_DECIDE,PETSC_DECIDE,2*n*user->nt,n*user->nt);
1036:   MatSetFromOptions(user->LT);
1037:   MatMPIAIJSetPreallocation(user->LT,1,NULL,1,NULL);
1038:   MatSeqAIJSetPreallocation(user->LT,1,NULL);
1039:   MatGetOwnershipRange(user->LT,&istart,&iend);

1041:   for (i=istart; i<iend; i++){
1042:     iblock = (i+n) / (2*n);
1043:     j = i - iblock*n;
1044:     MatSetValues(user->LT,1,&i,1,&j,&user->gamma,INSERT_VALUES);
1045:   }

1047:   MatAssemblyBegin(user->LT,MAT_FINAL_ASSEMBLY);
1048:   MatAssemblyEnd(user->LT,MAT_FINAL_ASSEMBLY);

1050:   MatTranspose(user->LT,MAT_INITIAL_MATRIX,&user->L);

1052:   /* Build work vectors and matrices */
1053:   VecCreate(PETSC_COMM_WORLD,&user->lwork);
1054:   VecSetType(user->lwork,VECMPI);
1055:   VecSetSizes(user->lwork,PETSC_DECIDE,user->m);
1056:   VecSetFromOptions(user->lwork);

1058:   MatDuplicate(user->Div,MAT_SHARE_NONZERO_PATTERN,&user->Divwork);

1060:   VecDuplicate(user->y,&user->ywork);
1061:   VecDuplicate(user->u,&user->uwork);
1062:   VecDuplicate(user->u,&user->vwork);
1063:   VecDuplicate(user->u,&user->js_diag);
1064:   VecDuplicate(user->c,&user->cwork);

1066:   /* Create matrix-free shell user->Js for computing A*x */
1067:   MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,user->m,user->m,user,&user->Js);
1068:   MatShellSetOperation(user->Js,MATOP_MULT,(void(*)(void))StateMatMult);
1069:   MatShellSetOperation(user->Js,MATOP_DUPLICATE,(void(*)(void))StateMatDuplicate);
1070:   MatShellSetOperation(user->Js,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatMultTranspose);
1071:   MatShellSetOperation(user->Js,MATOP_GET_DIAGONAL,(void(*)(void))StateMatGetDiagonal);

1073:   /* Diagonal blocks of user->Js */
1074:   MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,n,n,user,&user->JsBlock);
1075:   MatShellSetOperation(user->JsBlock,MATOP_MULT,(void(*)(void))StateMatBlockMult);
1076:   MatShellSetOperation(user->JsBlock,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatBlockMultTranspose);

1078:   /* Create a matrix-free shell user->JsBlockPrec for computing (U+D)\D*(L+D)\x, where JsBlock = L+D+U,
1079:      D is diagonal, L is strictly lower triangular, and U is strictly upper triangular.
1080:      This is an SOR preconditioner for user->JsBlock. */
1081:   MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,n,n,user,&user->JsBlockPrec);
1082:   MatShellSetOperation(user->JsBlockPrec,MATOP_MULT,(void(*)(void))StateMatBlockPrecMult);
1083:   MatShellSetOperation(user->JsBlockPrec,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatBlockPrecMultTranspose);

1085:   /* Create a matrix-free shell user->Jd for computing B*x */
1086:   MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,user->m,user->n-user->m,user,&user->Jd);
1087:   MatShellSetOperation(user->Jd,MATOP_MULT,(void(*)(void))DesignMatMult);
1088:   MatShellSetOperation(user->Jd,MATOP_MULT_TRANSPOSE,(void(*)(void))DesignMatMultTranspose);

1090:   /* User-defined routines for computing user->Js\x and user->Js^T\x*/
1091:   MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,user->m,user->m,user,&user->JsInv);
1092:   MatShellSetOperation(user->JsInv,MATOP_MULT,(void(*)(void))StateMatInvMult);
1093:   MatShellSetOperation(user->JsInv,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatInvTransposeMult);

1095:   /* Build matrices for SOR preconditioner */
1096:   Scatter_uxi_uyi(user->u,user->uxi,user->uxi_scatter,user->uyi,user->uyi_scatter,user->nt);
1097:   MatShift(user->Divxy[0],0.0); /*  Force C[i] and Divxy[0] to share same nonzero pattern */
1098:   MatAXPY(user->Divxy[0],0.0,user->Divxy[1],DIFFERENT_NONZERO_PATTERN);
1099:   PetscMalloc1(5*n,&user->C);
1100:   PetscMalloc1(2*n,&user->Cwork);
1101:   for (i=0; i<user->nt; i++){
1102:     MatDuplicate(user->Divxy[0],MAT_COPY_VALUES,&user->C[i]);
1103:     MatDuplicate(user->Divxy[1],MAT_COPY_VALUES,&user->Cwork[i]);

1105:     MatDiagonalScale(user->C[i],NULL,user->uxi[i]);
1106:     MatDiagonalScale(user->Cwork[i],NULL,user->uyi[i]);
1107:     MatAXPY(user->C[i],1.0,user->Cwork[i],SUBSET_NONZERO_PATTERN);
1108:     MatScale(user->C[i],user->ht);
1109:     MatShift(user->C[i],1.0);
1110:   }

1112:   /* Solver options and tolerances */
1113:   KSPCreate(PETSC_COMM_WORLD,&user->solver);
1114:   KSPSetType(user->solver,KSPGMRES);
1115:   KSPSetOperators(user->solver,user->JsBlock,user->JsBlockPrec);
1116:   KSPSetTolerances(user->solver,1e-4,1e-20,1e3,500);
1117:   /* KSPSetTolerances(user->solver,1e-8,1e-16,1e3,500); */
1118:   KSPGetPC(user->solver,&user->prec);
1119:   PCSetType(user->prec,PCSHELL);

1121:   PCShellSetApply(user->prec,StateMatBlockPrecMult);
1122:   PCShellSetApplyTranspose(user->prec,StateMatBlockPrecMultTranspose);
1123:   PCShellSetContext(user->prec,user);

1125:   /* Compute true state function yt given ut */
1126:   VecCreate(PETSC_COMM_WORLD,&user->ytrue);
1127:   VecSetSizes(user->ytrue,PETSC_DECIDE,n*user->nt);
1128:   VecSetFromOptions(user->ytrue);
1129:   user->c_formed = PETSC_TRUE;
1130:   VecCopy(user->utrue,user->u); /*  Set u=utrue temporarily for StateMatInv */
1131:   VecSet(user->ytrue,0.0); /*  Initial guess */
1132:   StateMatInvMult(user->Js,user->q,user->ytrue);
1133:   VecCopy(user->ur,user->u); /*  Reset u=ur */

1135:   /* Initial guess y0 for state given u0 */
1136:   StateMatInvMult(user->Js,user->q,user->y);

1138:   /* Data discretization */
1139:   MatCreate(PETSC_COMM_WORLD,&user->Q);
1140:   MatSetSizes(user->Q,PETSC_DECIDE,PETSC_DECIDE,user->mx*user->mx,user->m);
1141:   MatSetFromOptions(user->Q);
1142:   MatMPIAIJSetPreallocation(user->Q,0,NULL,1,NULL);
1143:   MatSeqAIJSetPreallocation(user->Q,1,NULL);

1145:   MatGetOwnershipRange(user->Q,&istart,&iend);

1147:   for (i=istart; i<iend; i++){
1148:     j = i + user->m - user->mx*user->mx;
1149:     MatSetValues(user->Q,1,&i,1,&j,&one,INSERT_VALUES);
1150:   }

1152:   MatAssemblyBegin(user->Q,MAT_FINAL_ASSEMBLY);
1153:   MatAssemblyEnd(user->Q,MAT_FINAL_ASSEMBLY);

1155:   MatTranspose(user->Q,MAT_INITIAL_MATRIX,&user->QT);

1157:   VecDestroy(&XX);
1158:   VecDestroy(&YY);
1159:   VecDestroy(&XXwork);
1160:   VecDestroy(&YYwork);
1161:   VecDestroy(&yi);
1162:   VecDestroy(&uxi);
1163:   VecDestroy(&ui);
1164:   VecDestroy(&bc);

1166:   /* Now that initial conditions have been set, let the user pass tolerance options to the KSP solver */
1167:   KSPSetFromOptions(user->solver);
1168:   return(0);
1169: }

1171: PetscErrorCode HyperbolicDestroy(AppCtx *user)
1172: {
1174:   PetscInt       i;

1177:   MatDestroy(&user->Q);
1178:   MatDestroy(&user->QT);
1179:   MatDestroy(&user->Div);
1180:   MatDestroy(&user->Divwork);
1181:   MatDestroy(&user->Grad);
1182:   MatDestroy(&user->L);
1183:   MatDestroy(&user->LT);
1184:   KSPDestroy(&user->solver);
1185:   MatDestroy(&user->Js);
1186:   MatDestroy(&user->Jd);
1187:   MatDestroy(&user->JsBlockPrec);
1188:   MatDestroy(&user->JsInv);
1189:   MatDestroy(&user->JsBlock);
1190:   MatDestroy(&user->Divxy[0]);
1191:   MatDestroy(&user->Divxy[1]);
1192:   MatDestroy(&user->Gradxy[0]);
1193:   MatDestroy(&user->Gradxy[1]);
1194:   MatDestroy(&user->M);
1195:   for (i=0; i<user->nt; i++){
1196:     MatDestroy(&user->C[i]);
1197:     MatDestroy(&user->Cwork[i]);
1198:   }
1199:   PetscFree(user->C);
1200:   PetscFree(user->Cwork);
1201:   VecDestroy(&user->u);
1202:   VecDestroy(&user->uwork);
1203:   VecDestroy(&user->vwork);
1204:   VecDestroy(&user->utrue);
1205:   VecDestroy(&user->y);
1206:   VecDestroy(&user->ywork);
1207:   VecDestroy(&user->ytrue);
1208:   VecDestroyVecs(user->nt,&user->yi);
1209:   VecDestroyVecs(user->nt,&user->yiwork);
1210:   VecDestroyVecs(user->nt,&user->ziwork);
1211:   VecDestroyVecs(user->nt,&user->uxi);
1212:   VecDestroyVecs(user->nt,&user->uyi);
1213:   VecDestroyVecs(user->nt,&user->uxiwork);
1214:   VecDestroyVecs(user->nt,&user->uyiwork);
1215:   VecDestroyVecs(user->nt,&user->ui);
1216:   VecDestroyVecs(user->nt,&user->uiwork);
1217:   VecDestroy(&user->c);
1218:   VecDestroy(&user->cwork);
1219:   VecDestroy(&user->ur);
1220:   VecDestroy(&user->q);
1221:   VecDestroy(&user->d);
1222:   VecDestroy(&user->dwork);
1223:   VecDestroy(&user->lwork);
1224:   VecDestroy(&user->js_diag);
1225:   ISDestroy(&user->s_is);
1226:   ISDestroy(&user->d_is);
1227:   VecScatterDestroy(&user->state_scatter);
1228:   VecScatterDestroy(&user->design_scatter);
1229:   for (i=0; i<user->nt; i++){
1230:     VecScatterDestroy(&user->uxi_scatter[i]);
1231:     VecScatterDestroy(&user->uyi_scatter[i]);
1232:     VecScatterDestroy(&user->ux_scatter[i]);
1233:     VecScatterDestroy(&user->uy_scatter[i]);
1234:     VecScatterDestroy(&user->ui_scatter[i]);
1235:     VecScatterDestroy(&user->yi_scatter[i]);
1236:   }
1237:   PetscFree(user->uxi_scatter);
1238:   PetscFree(user->uyi_scatter);
1239:   PetscFree(user->ux_scatter);
1240:   PetscFree(user->uy_scatter);
1241:   PetscFree(user->ui_scatter);
1242:   PetscFree(user->yi_scatter);
1243:   return(0);
1244: }

1246: PetscErrorCode HyperbolicMonitor(Tao tao, void *ptr)
1247: {
1249:   Vec            X;
1250:   PetscReal      unorm,ynorm;
1251:   AppCtx         *user = (AppCtx*)ptr;

1254:   TaoGetSolutionVector(tao,&X);
1255:   Scatter(X,user->ywork,user->state_scatter,user->uwork,user->design_scatter);
1256:   VecAXPY(user->ywork,-1.0,user->ytrue);
1257:   VecAXPY(user->uwork,-1.0,user->utrue);
1258:   VecNorm(user->uwork,NORM_2,&unorm);
1259:   VecNorm(user->ywork,NORM_2,&ynorm);
1260:   PetscPrintf(MPI_COMM_WORLD, "||u-ut||=%g ||y-yt||=%g\n",(double)unorm,(double)ynorm);
1261:   return(0);
1262: }


1265: /*TEST

1267:    build:
1268:       requires: !complex

1270:    test:
1271:       requires: !single
1272:       args: -tao_cmonitor -tao_max_funcs 10 -tao_type lcl -tao_gatol 1.e-5

1274:    test:
1275:       suffix: guess_pod
1276:       requires: !single
1277:       args: -tao_cmonitor -tao_max_funcs 10 -tao_type lcl -ksp_guess_type pod -tao_gatol 1.e-5

1279: TEST*/