Actual source code: bcgsl.c

  1: /*
  2:  * Implementation of BiCGstab(L) the paper by D.R. Fokkema,
  3:  * "Enhanced implementation of BiCGStab(L) for solving linear systems
  4:  * of equations". This uses tricky delayed updating ideas to prevent
  5:  * round-off buildup.
  6:  */
 7:  #include petscblaslapack.h
 8:  #include src/ksp/ksp/kspimpl.h
 9:  #include bcgsl.h


 12: /****************************************************
 13:  *
 14:  * Some memory allocation functions
 15:  *
 16:  ****************************************************/

 20: PetscErrorCode bcgsl_cleanup_i(KSP ksp)
 21: {
 22:   PetscErrorCode  ierr;

 25:   /* free all workspace */
 26:   PetscFree(ksp->work);
 27:   return(0);
 28: }

 32: PetscErrorCode bcgsl_setup_i(KSP ksp)
 33: {
 34:   KSP_BiCGStabL  *bcgsl = (KSP_BiCGStabL *)ksp->data;
 35:   PetscInt            ell = bcgsl->ell;

 39:   KSPDefaultGetWork(ksp, 6+2*ell);
 40:   return(0);
 41: }

 45: static PetscErrorCode  KSPSolve_BCGSL(KSP ksp)
 46: {
 47:   KSP_BiCGStabL  *bcgsl = (KSP_BiCGStabL *) ksp->data;
 48:   PetscScalar    alpha, beta, nu, omega, sigma;
 49:   PetscScalar    zero = 0;
 50:   PetscScalar    rho0, rho1;
 51:   PetscReal      kappa0, kappaA, kappa1;
 52:   PetscReal      ghat, epsilon, abstol;
 53:   PetscReal      zeta, zeta0, rnmax_computed, rnmax_true, nrm0;
 54:   PetscTruth     bUpdateX;
 55:   PetscTruth     bBombed = PETSC_FALSE;

 57:   PetscInt       maxit;
 58:   PetscInt       h, i, j, k, vi, ell;
 59:   PetscMPIInt    rank;
 60:   PetscBLASInt   ldMZ,bierr;


 65:   /* set up temporary vectors */
 66:   vi = 0;
 67:   ell = bcgsl->ell;
 68:   bcgsl->vB    = ksp->work[vi]; vi++;
 69:   bcgsl->vRt   = ksp->work[vi]; vi++;
 70:   bcgsl->vTm   = ksp->work[vi]; vi++;
 71:   bcgsl->vvR   = ksp->work+vi; vi += ell+1;
 72:   bcgsl->vvU   = ksp->work+vi; vi += ell+1;
 73:   bcgsl->vXr   = ksp->work[vi]; vi++;
 74:   ldMZ = ell+1;
 75:   {
 76:     PetscMalloc(ldMZ*sizeof(PetscScalar), &AY0c);
 77:     PetscMalloc(ldMZ*sizeof(PetscScalar), &AYlc);
 78:     PetscMalloc(ldMZ*sizeof(PetscScalar), &AYtc);
 79:     PetscMalloc(ldMZ*ldMZ*sizeof(PetscScalar), &MZa);
 80:     PetscMalloc(ldMZ*ldMZ*sizeof(PetscScalar), &MZb);
 81:   }

 83:   /* Prime the iterative solver */
 84:   MPI_Comm_rank(PETSC_COMM_WORLD, &rank);

 86:   KSPInitialResidual(ksp, VX, VTM, VB, VVR[0], ksp->vec_rhs);
 87:   VecNorm(VVR[0], NORM_2, &zeta0);
 88:   rnmax_computed = zeta0;
 89:   rnmax_true = zeta0;

 91:   (*ksp->converged)(ksp, 0, zeta0, &ksp->reason, ksp->cnvP);
 92:   if (ksp->reason) {
 93:     PetscFree(AY0c);
 94:     PetscFree(AYlc);
 95:     PetscFree(AYtc);
 96:     PetscFree(MZa);
 97:     PetscFree(MZb);

 99:     return(0);
100:   }

102:   VecSet(&zero, VVU[0]);
103:   alpha = 0;
104:   rho0 = omega = 1;

106:   if (bcgsl->delta>0.0) {
107:     VecCopy(VX, VXR);
108:     VecSet(&zero, VX);
109:     VecCopy(VVR[0], VB);
110:   } else {
111:     VecCopy(ksp->vec_rhs, VB);
112:   }

114:   /* Life goes on */
115:   VecCopy(VVR[0], VRT);
116:   zeta = zeta0;

118:   KSPGetTolerances(ksp, &epsilon, &abstol, PETSC_NULL, &maxit);

120:   for (k=0; k<maxit; k += bcgsl->ell) {
121:     PetscObjectTakeAccess(ksp);
122:     ksp->its   = k;
123:     ksp->rnorm = zeta;
124:     PetscObjectGrantAccess(ksp);

126:     KSPLogResidualHistory(ksp, zeta);
127:     KSPMonitor(ksp, ksp->its, zeta);

129:     (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);
130:     if (ksp->reason) break;

132:     /* BiCG part */
133:     rho0 = -omega*rho0;
134:     nrm0 = zeta;
135:     for (j=0; j<bcgsl->ell; j++) {
136:       /* rho1 <- r_j' * r_tilde */
137:       VecDot(VVR[j], VRT, &rho1);
138:       if (rho1 == 0.0) {
139:         ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
140:         bBombed = PETSC_TRUE;
141:         break;
142:       }
143:       beta = alpha*(rho1/rho0);
144:       rho0 = rho1;
145:       nu = -beta;
146:       for (i=0; i<=j; i++) {
147:         /* u_i <- r_i - beta*u_i */
148:         VecAYPX(&nu, VVR[i], VVU[i]);
149:       }
150:       /* u_{j+1} <- inv(K)*A*u_j */
151:       KSP_PCApplyBAorAB(ksp, VVU[j], VVU[j+1], VTM);

153:       VecDot(VVU[j+1], VRT, &sigma);
154:       if (sigma == 0.0) {
155:         ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
156:         bBombed = PETSC_TRUE;
157:         break;
158:       }
159:       alpha = rho1/sigma;

161:       /* x <- x + alpha*u_0 */
162:       VecAXPY(&alpha, VVU[0], VX);

164:       nu = -alpha;
165:       for (i=0; i<=j; i++) {
166:         /* r_i <- r_i - alpha*u_{i+1} */
167:         VecAXPY(&nu, VVU[i+1], VVR[i]);
168:       }

170:       /* r_{j+1} <- inv(K)*A*r_j */
171:       KSP_PCApplyBAorAB(ksp, VVR[j], VVR[j+1], VTM);

173:       VecNorm(VVR[0], NORM_2, &nrm0);
174:       if (bcgsl->delta>0.0) {
175:         if (rnmax_computed<nrm0) rnmax_computed = nrm0;
176:         if (rnmax_true<nrm0) rnmax_true = nrm0;
177:       }

179:       /* NEW: check for early exit */
180:       (*ksp->converged)(ksp, k+j, nrm0, &ksp->reason, ksp->cnvP);
181:       if (ksp->reason) {
182:         PetscObjectTakeAccess(ksp);
183:         ksp->its   = k+j;
184:         ksp->rnorm = nrm0;
185:         PetscObjectGrantAccess(ksp);
186:         break;
187:       }
188:     }

190:     if (bBombed==PETSC_TRUE) break;

192:     /* Polynomial part */

194:     for (i=0; i<=bcgsl->ell; i++) {
195:       for (j=0; j<i; j++) {
196:         VecDot(VVR[j], VVR[i], &nu);
197:         MZa[i+ldMZ*j] = nu;
198:         MZa[j+ldMZ*i] = nu;
199:         MZb[i+ldMZ*j] = nu;
200:         MZb[j+ldMZ*i] = nu;
201:       }

203:       VecDot(VVR[i], VVR[i], &nu);
204:       MZa[i+ldMZ*i] = nu;
205:       MZb[i+ldMZ*i] = nu;
206:     }

208:     if (!bcgsl->bConvex || bcgsl->ell==1) {
209:       PetscBLASInt ione = 1,bell = bcgsl->ell;

211:       AY0c[0] = -1;
212:       LApotrf_("Lower", &bell, &MZa[1+ldMZ], &ldMZ, &bierr);
213:       if (ierr!=0) {
214:         ksp->reason = KSP_DIVERGED_BREAKDOWN;
215:         bBombed = PETSC_TRUE;
216:         break;
217:       }
218:       BLcopy_(&bell, &MZb[1], &ione, &AY0c[1], &ione);
219:       LApotrs_("Lower", &bell, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr);
220:     } else {
221:       PetscBLASInt neqs = bcgsl->ell-1;
222:       PetscBLASInt ione = 1;
223:       PetscScalar aone = 1.0, azero = 0.0;

225:       LApotrf_("Lower", &neqs, &MZa[1+ldMZ], &ldMZ, &bierr);
226:       if (ierr!=0) {
227:         ksp->reason = KSP_DIVERGED_BREAKDOWN;
228:         bBombed = PETSC_TRUE;
229:         break;
230:       }
231:       BLcopy_(&neqs, &MZb[1], &ione, &AY0c[1], &ione);
232:       LApotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr);
233:       AY0c[0] = -1;
234:       AY0c[bcgsl->ell] = 0;

236:       BLcopy_(&neqs, &MZb[1+ldMZ*(bcgsl->ell)], &ione, &AYlc[1], &ione);
237:       LApotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AYlc[1], &ldMZ, &bierr);

239:       AYlc[0] = 0;
240:       AYlc[bcgsl->ell] = -1;

242:       LAgemv_("NoTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AY0c, &ione, &azero, AYtc, &ione);

244:       kappa0 = BLdot_(&ldMZ, AY0c, &ione, AYtc, &ione);

246:       /* round-off can cause negative kappa's */
247:       if (kappa0<0) kappa0 = -kappa0;
248:       kappa0 = sqrt(kappa0);

250:       kappaA = BLdot_(&ldMZ, AYlc, &ione, AYtc, &ione);

252:       LAgemv_("noTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AYlc, &ione, &azero, AYtc, &ione);

254:       kappa1 = BLdot_(&ldMZ, AYlc, &ione, AYtc, &ione);

256:       if (kappa1<0) kappa1 = -kappa1;
257:       kappa1 = sqrt(kappa1);

259:       if (kappa0!=0.0 && kappa1!=0.0) {
260:         if (kappaA<0.7*kappa0*kappa1) {
261:           ghat = (kappaA<0.0) ?  -0.7*kappa0/kappa1 : 0.7*kappa0/kappa1;
262:         } else {
263:           ghat = kappaA/(kappa1*kappa1);
264:         }
265:         for (i=0; i<=bcgsl->ell; i++) {
266:           AY0c[i] = AY0c[i] - ghat* AYlc[i];
267:         }
268:       }
269:     }

271:     omega = AY0c[bcgsl->ell];
272:     for (h=bcgsl->ell; h>0 && omega==0.0; h--) {
273:       omega = AY0c[h];
274:     }
275:     if (omega==0.0) {
276:       ksp->reason = KSP_DIVERGED_BREAKDOWN;
277:       break;
278:     }

280:     for (i=1; i<=bcgsl->ell; i++) {
281:       nu = -AY0c[i];
282:       VecAXPY(&nu, VVU[i], VVU[0]);
283:       nu = AY0c[i];
284:       VecAXPY(&nu, VVR[i-1], VX);
285:       nu = -AY0c[i];
286:       VecAXPY(&nu, VVR[i], VVR[0]);
287:     }

289:     VecNorm(VVR[0], NORM_2, &zeta);

291:     /* Accurate Update */
292:     if (bcgsl->delta>0.0) {
293:       if (rnmax_computed<zeta) rnmax_computed = zeta;
294:       if (rnmax_true<zeta) rnmax_true = zeta;

296:       bUpdateX = (PetscTruth) (zeta<bcgsl->delta*zeta0 && zeta0<=rnmax_computed);
297:       if ((zeta<bcgsl->delta*rnmax_true && zeta0<=rnmax_true) || bUpdateX) {
298:         /* r0 <- b-inv(K)*A*X */
299:         KSP_PCApplyBAorAB(ksp, VX, VVR[0], VTM);
300:         nu = -1;
301:         VecAYPX(&nu, VB, VVR[0]);
302:         rnmax_true = zeta;

304:         if (bUpdateX) {
305:           nu = 1;
306:           VecAXPY(&nu, VX, VXR);
307:           VecSet(&zero, VX);
308:           VecCopy(VVR[0], VB);
309:           rnmax_computed = zeta;
310:         }
311:       }
312:     }
313:   }

315:   KSPMonitor(ksp, ksp->its, zeta);

317:   if (bcgsl->delta>0.0) {
318:     nu   = 1;
319:     VecAXPY(&nu, VXR, VX);
320:   }

322:   (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);
323:   if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;

325:   PetscFree(AY0c);
326:   PetscFree(AYlc);
327:   PetscFree(AYtc);
328:   PetscFree(MZa);
329:   PetscFree(MZb);

331:   return(0);

333: }
337: /*@C
338:    KSPBCGSLSetXRes - Sets the parameter governing when
339:    exact residuals will be used instead of computed residuals.

341:    Collective on KSP

343:    Input Parameters:
344: +  ksp - iterative context obtained from KSPCreate
345: -  delta - computed residuals are used alone when delta is not positive

347:    Options Database Keys:

349: .  -ksp_bcgsl_xres delta

351:    Level: intermediate

353: .keywords: KSP, BiCGStab(L), set, exact residuals

355: .seealso: KSPBCGSLSetEll(), KSPBCGSLSetPol()
356: @*/
357: PetscErrorCode KSPBCGSLSetXRes(KSP ksp, PetscReal delta)
358: {
359:   KSP_BiCGStabL  *bcgsl = (KSP_BiCGStabL *)ksp->data;

363:   if (ksp->setupcalled) {
364:     if ((delta<=0 && bcgsl->delta>0) || (delta>0 && bcgsl->delta<=0)) {
365:       bcgsl_cleanup_i(ksp);
366:       ksp->setupcalled = 0;
367:     }
368:   }
369:   bcgsl->delta = delta;
370:   return(0);
371: }
376: /*@C
377:    KSPBCGSLSetPol - Sets the type of polynomial part will
378:    be used in the BiCGSTab(L) solver.

380:    Collective on KSP

382:    Input Parameters:
383: +  ksp - iterative context obtained from KSPCreate
384: -  uMROR - set to PETSC_TRUE when the polynomial is a convex
385:    combination of an MR and an OR step.

387:    Options Database Keys:

389: +  -ksp_bcgsl_cxpoly - use enhanced polynomial
390: .  -ksp_bcgsl_mrpoly - use standard polynomial

392:    Level: intermediate

394: .keywords: KSP, BiCGStab(L), set, polynomial

396: .seealso: @()
397: @*/
398: PetscErrorCode KSPBCGSLSetPol(KSP ksp, PetscTruth uMROR)
399: {
400:   KSP_BiCGStabL  *bcgsl = (KSP_BiCGStabL *)ksp->data;

404:   if (!ksp->setupcalled) {
405:     bcgsl->bConvex = uMROR;
406:   } else if (bcgsl->bConvex != uMROR) {
407:     /* free the data structures,
408:        then create them again
409:      */
410:     bcgsl_cleanup_i(ksp);
411:     bcgsl->bConvex = uMROR;
412:     ksp->setupcalled = 0;
413:   }
414:   return(0);
415: }
420: /*@C
421:    KSPBCGSLSetEll - Sets the number of search directions in BiCGStab(L).

423:    Collective on KSP

425:    Input Parameters:
426: +  ksp - iterative context obtained from KSPCreate
427: -  ell - number of search directions

429:    Options Database Keys:

431: .  -ksp_bcgsl_ell ell

433:    Level: intermediate

435: .keywords: KSP, BiCGStab(L), set, exact residuals,

437: .seealso: @()
438: @*/
439: PetscErrorCode KSPBCGSLSetEll(KSP ksp, int ell)
440: {
441:   KSP_BiCGStabL  *bcgsl = (KSP_BiCGStabL *)ksp->data;

445:   if (ell < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE, "KSPBCGSLSetEll: second argument must be positive");

447:   if (!ksp->setupcalled) {
448:     bcgsl->ell = ell;
449:   } else if (bcgsl->ell != ell) {
450:     /* free the data structures, then create them again */
451:     bcgsl_cleanup_i(ksp);
452:     bcgsl->ell = ell;
453:     ksp->setupcalled = 0;
454:   }
455:   return(0);
456: }
460: PetscErrorCode KSPView_BCGSL(KSP ksp, PetscViewer viewer)
461: {
462:   KSP_BiCGStabL       *bcgsl = (KSP_BiCGStabL *)ksp->data;
463:   PetscErrorCode      ierr;
464:   PetscTruth isascii, isstring;

467:   PetscTypeCompare((PetscObject)viewer, PETSC_VIEWER_ASCII, &isascii);

469:   PetscTypeCompare((PetscObject)viewer, PETSC_VIEWER_STRING, &isstring);

471:   if (isascii) {
472:     PetscViewerASCIIPrintf(viewer, "  BCGSL: Ell = %D\n", bcgsl->ell);
473:     PetscViewerASCIIPrintf(viewer, "  BCGSL: Delta = %lg\n", bcgsl->delta);
474:   } else {
475:     SETERRQ1(PETSC_ERR_SUP, "Viewer type %s not supported for KSP BCGSL", ((PetscObject)viewer)->type_name);
476:   }
477:   return(0);
478: }
481: PetscErrorCode KSPSetFromOptions_BCGSL(KSP ksp)
482: {
483:   KSP_BiCGStabL  *bcgsl = (KSP_BiCGStabL *)ksp->data;
485:   PetscInt       this_ell;
486:   PetscReal      delta;
487:   PetscTruth     flga, flg;

490:   /* PetscOptionsBegin/End are called in KSPSetFromOptions. They
491:      don't need to be called here.
492:   */
493:   PetscOptionsHead("KSP BiCGStab(L) Options");

495:   /* Set number of search directions */
496:   PetscOptionsInt("-ksp_bcgsl_ell", "Number of Krylov search directions", "KSPBCGSLSetEll", bcgsl->ell, &this_ell, &flg);
497:   if (flg) {
498:     KSPBCGSLSetEll(ksp, this_ell);
499:   }

501:   /* Set polynomial type */
502:   PetscOptionsName("-ksp_bcgsl_cxpoly", "Polynomial part of BiCGStabL is MinRes + OR", "KSPBCGSLSetPol", &flga);
503:   if (flga) {
504:     KSPBCGSLSetPol(ksp, PETSC_TRUE);
505:   } else {
506:     PetscOptionsName("-ksp_bcgsl_mrpoly", "Polynomial part of BiCGStabL is MinRes", "KSPBCGSLSetPol", &flg);
507:     KSPBCGSLSetPol(ksp, PETSC_FALSE);
508:   }

510:   /* Will computed residual be refreshed? */
511:   PetscOptionsReal("-ksp_bcgsl_xres", "Threshold used to decide when to refresh computed residuals", "KSPBCGSLSetXRes", bcgsl->delta, &delta, &flg);
512:   if (flg) {
513:     KSPBCGSLSetXRes(ksp, delta);
514:   }
515:   PetscOptionsTail();
516:   return(0);
517: }
520: PetscErrorCode KSPSetUp_BCGSL(KSP ksp)
521: {

525:   /* Support left preconditioners only */
526:   if (ksp->pc_side == PC_SYMMETRIC) {
527:     SETERRQ(PETSC_ERR_SUP, "no symmetric preconditioning for KSPBCGSL");
528:   } else if (ksp->pc_side == PC_RIGHT) {
529:     SETERRQ(PETSC_ERR_SUP, "no right preconditioning for KSPBCGSL");
530:   }

532:   bcgsl_setup_i(ksp);
533:   return(0);
534: }
535: /*MC
536:      KSPBCGSL - Implements a slight variant of the Enhanced
537:                 BiCGStab(L) algorithm in (3) and (2).  The variation
538:                 concerns cases when either kappa0**2 or kappa1**2 is
539:                 negative due to round-off. Kappa0 has also been pulled
540:                 out of the denominator in the formula for ghat.

542:     References:
543:       1. G.L.G. Sleijpen, H.A. van der Vorst, "An overview of
544:          approaches for the stable computation of hybrid BiCG
545:          methods", Applied Numerical Mathematics: Transactions
546:          f IMACS, 19(3), pp 235-54, 1996.
547:       2. G.L.G. Sleijpen, H.A. van der Vorst, D.R. Fokkema,
548:          "BiCGStab(L) and other hybrid Bi-CG methods",
549:           Numerical Algorithms, 7, pp 75-109, 1994.
550:       3. D.R. Fokkema, "Enhanced implementation of BiCGStab(L)
551:          for solving linear systems of equations", preprint
552:          from www.citeseer.com.

554:    Contributed by: Joel M. Malard, email jm.malard@pnl.gov

556:    Options Database Keys:
557: +  -ksp_bcgsl_ell <ell> Number of Krylov search directions
558: -  -ksp_bcgsl_cxpol Use a convex function of the MR and OR polynomials after the BiCG step
559: -  -ksp_bcgsl_xres <res> Threshold used to decide when to refresh computed residuals

561:    Level: beginner

563: .seealso:  KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPFGMRES, KSPBCGS

565: M*/
569: PetscErrorCode KSPCreate_BCGSL(KSP ksp)
570: {
572:   KSP_BiCGStabL *bcgsl;

575:   /* allocate BiCGStab(L) context */
576:   PetscNew(KSP_BiCGStabL, &bcgsl);
577:   PetscMemzero(bcgsl, sizeof(KSP_BiCGStabL));
578:   ksp->data = (void*)bcgsl;

580:   ksp->pc_side              = PC_LEFT;
581:   ksp->ops->setup           = KSPSetUp_BCGSL;
582:   ksp->ops->solve           = KSPSolve_BCGSL;
583:   ksp->ops->destroy         = KSPDefaultDestroy;
584:   ksp->ops->buildsolution   = KSPDefaultBuildSolution;
585:   ksp->ops->buildresidual   = KSPDefaultBuildResidual;
586:   ksp->ops->setfromoptions  = KSPSetFromOptions_BCGSL;
587:   ksp->ops->view            = KSPView_BCGSL;

589:   /* Let the user redefine the number of directions vectors */
590:   bcgsl->ell = 2;
591:   PetscObjectComposeFunctionDynamic((PetscObject)ksp, "KSPBCGSLSetEll_C", "KSP_BCGS_SetEll", KSPBCGSLSetEll);

593:   /*Choose between a single MR step or an averaged MR/OR */
594:   bcgsl->bConvex = PETSC_FALSE;
595:   PetscObjectComposeFunctionDynamic((PetscObject)ksp, "KSPBCGSLSetPol_C", "KSP_BCGS_SetPol", KSPBCGSLSetPol);

597:   /* Set the threshold for when exact residuals will be used */
598:   bcgsl->delta = 0.0;
599:   PetscObjectComposeFunctionDynamic((PetscObject)ksp, "KSPBCGSLSetXRes_C", "KSP_BCGS_SetXRes", KSPBCGSLSetXRes);
600:   return(0);
601: }