Actual source code: bcgsl.c
petsc-3.3-p7 2013-05-11
1: /*
2: * Implementation of BiCGstab(L) the paper by D.R. Fokkema,
3: * "Enhanced implementation of BiCGStab(L) for solving linear systems
4: * of equations". This uses tricky delayed updating ideas to prevent
5: * round-off buildup.
6: *
7: * This has not been completely cleaned up into PETSc style.
8: *
9: * All the BLAS and LAPACK calls below should be removed and replaced with
10: * loops and the macros for block solvers converted from LINPACK; there is no way
11: * calls to BLAS/LAPACK make sense for size 2, 3, 4, etc.
12: */
13: #include <petsc-private/kspimpl.h> /*I "petscksp.h" I*/
14: #include <../src/ksp/ksp/impls/bcgsl/bcgslimpl.h>
15: #include <petscblaslapack.h>
20: static PetscErrorCode KSPSolve_BCGSL(KSP ksp)
21: {
22: KSP_BCGSL *bcgsl = (KSP_BCGSL *) ksp->data;
23: PetscScalar alpha, beta, omega, sigma;
24: PetscScalar rho0, rho1;
25: PetscReal kappa0, kappaA, kappa1;
26: PetscReal ghat;
27: PetscReal zeta, zeta0, rnmax_computed, rnmax_true, nrm0;
28: PetscBool bUpdateX;
29: PetscInt maxit;
30: PetscInt h, i, j, k, vi, ell;
31: PetscBLASInt ldMZ,bierr;
36: /* set up temporary vectors */
37: vi = 0;
38: ell = bcgsl->ell;
39: bcgsl->vB = ksp->work[vi]; vi++;
40: bcgsl->vRt = ksp->work[vi]; vi++;
41: bcgsl->vTm = ksp->work[vi]; vi++;
42: bcgsl->vvR = ksp->work+vi; vi += ell+1;
43: bcgsl->vvU = ksp->work+vi; vi += ell+1;
44: bcgsl->vXr = ksp->work[vi]; vi++;
45: ldMZ = PetscBLASIntCast(ell+1);
47: /* Prime the iterative solver */
48: KSPInitialResidual(ksp, VX, VTM, VB, VVR[0], ksp->vec_rhs);
49: VecNorm(VVR[0], NORM_2, &zeta0);
50: rnmax_computed = zeta0;
51: rnmax_true = zeta0;
53: (*ksp->converged)(ksp, 0, zeta0, &ksp->reason, ksp->cnvP);
54: if (ksp->reason) {
55: PetscObjectTakeAccess(ksp);
56: ksp->its = 0;
57: ksp->rnorm = zeta0;
58: PetscObjectGrantAccess(ksp);
59: return(0);
60: }
62: VecSet(VVU[0],0.0);
63: alpha = 0.;
64: rho0 = omega = 1;
66: if (bcgsl->delta>0.0) {
67: VecCopy(VX, VXR);
68: VecSet(VX,0.0);
69: VecCopy(VVR[0], VB);
70: } else {
71: VecCopy(ksp->vec_rhs, VB);
72: }
74: /* Life goes on */
75: VecCopy(VVR[0], VRT);
76: zeta = zeta0;
78: KSPGetTolerances(ksp, PETSC_NULL, PETSC_NULL, PETSC_NULL, &maxit);
80: for (k=0; k<maxit; k += bcgsl->ell) {
81: ksp->its = k;
82: ksp->rnorm = zeta;
84: KSPLogResidualHistory(ksp, zeta);
85: KSPMonitor(ksp, ksp->its, zeta);
87: (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);
88: if (ksp->reason < 0) return(0);
89: else if (ksp->reason) break;
91: /* BiCG part */
92: rho0 = -omega*rho0;
93: nrm0 = zeta;
94: for (j=0; j<bcgsl->ell; j++) {
95: /* rho1 <- r_j' * r_tilde */
96: VecDot(VVR[j], VRT, &rho1);
97: if (rho1 == 0.0) {
98: ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
99: return(0);
100: }
101: beta = alpha*(rho1/rho0);
102: rho0 = rho1;
103: for (i=0; i<=j; i++) {
104: /* u_i <- r_i - beta*u_i */
105: VecAYPX(VVU[i], -beta, VVR[i]);
106: }
107: /* u_{j+1} <- inv(K)*A*u_j */
108: KSP_PCApplyBAorAB(ksp, VVU[j], VVU[j+1], VTM);
110: VecDot(VVU[j+1], VRT, &sigma);
111: if (sigma == 0.0) {
112: ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
113: return(0);
114: }
115: alpha = rho1/sigma;
117: /* x <- x + alpha*u_0 */
118: VecAXPY(VX, alpha, VVU[0]);
120: for (i=0; i<=j; i++) {
121: /* r_i <- r_i - alpha*u_{i+1} */
122: VecAXPY(VVR[i], -alpha, VVU[i+1]);
123: }
125: /* r_{j+1} <- inv(K)*A*r_j */
126: KSP_PCApplyBAorAB(ksp, VVR[j], VVR[j+1], VTM);
128: VecNorm(VVR[0], NORM_2, &nrm0);
129: if (bcgsl->delta>0.0) {
130: if (rnmax_computed<nrm0) rnmax_computed = nrm0;
131: if (rnmax_true<nrm0) rnmax_true = nrm0;
132: }
134: /* NEW: check for early exit */
135: (*ksp->converged)(ksp, k+j, nrm0, &ksp->reason, ksp->cnvP);
136: if (ksp->reason) {
137: PetscObjectTakeAccess(ksp);
138: ksp->its = k+j;
139: ksp->rnorm = nrm0;
140: PetscObjectGrantAccess(ksp);
141: if (ksp->reason < 0) return(0);
142: }
143: }
145: /* Polynomial part */
146: for(i = 0; i <= bcgsl->ell; ++i) {
147: VecMDot(VVR[i], i+1, VVR, &MZa[i*ldMZ]);
148: }
149: /* Symmetrize MZa */
150: for(i = 0; i <= bcgsl->ell; ++i) {
151: for(j = i+1; j <= bcgsl->ell; ++j) {
152: MZa[i*ldMZ+j] = MZa[j*ldMZ+i] = PetscConj(MZa[j*ldMZ+i]);
153: }
154: }
155: /* Copy MZa to MZb */
156: PetscMemcpy(MZb,MZa,ldMZ*ldMZ*sizeof(PetscScalar));
158: if (!bcgsl->bConvex || bcgsl->ell==1) {
159: PetscBLASInt ione = 1,bell = PetscBLASIntCast(bcgsl->ell);
161: AY0c[0] = -1;
162: #if defined(PETSC_MISSING_LAPACK_POTRF)
163: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"POTRF - Lapack routine is unavailable.");
164: #else
165: LAPACKpotrf_("Lower", &bell, &MZa[1+ldMZ], &ldMZ, &bierr);
166: #endif
167: if (ierr!=0) {
168: ksp->reason = KSP_DIVERGED_BREAKDOWN;
169: return(0);
170: }
171: PetscMemcpy(&AY0c[1],&MZb[1],bcgsl->ell*sizeof(PetscScalar));
172: LAPACKpotrs_("Lower", &bell, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr);
173: } else {
174: PetscBLASInt ione = 1;
175: PetscScalar aone = 1.0, azero = 0.0;
176: PetscBLASInt neqs = PetscBLASIntCast(bcgsl->ell-1);
178: #if defined(PETSC_MISSING_LAPACK_POTRF)
179: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"POTRF - Lapack routine is unavailable.");
180: #else
181: LAPACKpotrf_("Lower", &neqs, &MZa[1+ldMZ], &ldMZ, &bierr);
182: #endif
183: if (ierr!=0) {
184: ksp->reason = KSP_DIVERGED_BREAKDOWN;
185: return(0);
186: }
187: PetscMemcpy(&AY0c[1],&MZb[1],(bcgsl->ell-1)*sizeof(PetscScalar));
188: LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr);
189: AY0c[0] = -1;
190: AY0c[bcgsl->ell] = 0.;
192: PetscMemcpy(&AYlc[1],&MZb[1+ldMZ*(bcgsl->ell)],(bcgsl->ell-1)*sizeof(PetscScalar));
193: LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AYlc[1], &ldMZ, &bierr);
195: AYlc[0] = 0.;
196: AYlc[bcgsl->ell] = -1;
198: BLASgemv_("NoTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AY0c, &ione, &azero, AYtc, &ione);
200: kappa0 = PetscRealPart(BLASdot_(&ldMZ, AY0c, &ione, AYtc, &ione));
202: /* round-off can cause negative kappa's */
203: if (kappa0<0) kappa0 = -kappa0;
204: kappa0 = PetscSqrtReal(kappa0);
206: kappaA = PetscRealPart(BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione));
208: BLASgemv_("noTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AYlc, &ione, &azero, AYtc, &ione);
210: kappa1 = PetscRealPart(BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione));
212: if (kappa1<0) kappa1 = -kappa1;
213: kappa1 = PetscSqrtReal(kappa1);
215: if (kappa0!=0.0 && kappa1!=0.0) {
216: if (kappaA<0.7*kappa0*kappa1) {
217: ghat = (kappaA<0.0) ? -0.7*kappa0/kappa1 : 0.7*kappa0/kappa1;
218: } else {
219: ghat = kappaA/(kappa1*kappa1);
220: }
221: for (i=0; i<=bcgsl->ell; i++) {
222: AY0c[i] = AY0c[i] - ghat* AYlc[i];
223: }
224: }
225: }
227: omega = AY0c[bcgsl->ell];
228: for (h=bcgsl->ell; h>0 && omega==0.0; h--) {
229: omega = AY0c[h];
230: }
231: if (omega==0.0) {
232: ksp->reason = KSP_DIVERGED_BREAKDOWN;
233: return(0);
234: }
237: VecMAXPY(VX, bcgsl->ell,AY0c+1, VVR);
238: for (i=1; i<=bcgsl->ell; i++) {
239: AY0c[i] *= -1.0;
240: }
241: VecMAXPY(VVU[0], bcgsl->ell,AY0c+1, VVU+1);
242: VecMAXPY(VVR[0], bcgsl->ell,AY0c+1, VVR+1);
243: for (i=1; i<=bcgsl->ell; i++) {
244: AY0c[i] *= -1.0;
245: }
246: VecNorm(VVR[0], NORM_2, &zeta);
248: /* Accurate Update */
249: if (bcgsl->delta>0.0) {
250: if (rnmax_computed<zeta) rnmax_computed = zeta;
251: if (rnmax_true<zeta) rnmax_true = zeta;
253: bUpdateX = (PetscBool) (zeta<bcgsl->delta*zeta0 && zeta0<=rnmax_computed);
254: if ((zeta<bcgsl->delta*rnmax_true && zeta0<=rnmax_true) || bUpdateX) {
255: /* r0 <- b-inv(K)*A*X */
256: KSP_PCApplyBAorAB(ksp, VX, VVR[0], VTM);
257: VecAYPX(VVR[0], -1.0, VB);
258: rnmax_true = zeta;
260: if (bUpdateX) {
261: VecAXPY(VXR,1.0,VX);
262: VecSet(VX,0.0);
263: VecCopy(VVR[0], VB);
264: rnmax_computed = zeta;
265: }
266: }
267: }
268: }
269: if (bcgsl->delta>0.0) {
270: VecAXPY(VX,1.0,VXR);
271: }
273: (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);
274: if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
275: return(0);
276: }
280: /*@
281: KSPBCGSLSetXRes - Sets the parameter governing when
282: exact residuals will be used instead of computed residuals.
284: Logically Collective on KSP
286: Input Parameters:
287: + ksp - iterative context obtained from KSPCreate
288: - delta - computed residuals are used alone when delta is not positive
290: Options Database Keys:
292: . -ksp_bcgsl_xres delta
294: Level: intermediate
296: .keywords: KSP, BiCGStab(L), set, exact residuals
298: .seealso: KSPBCGSLSetEll(), KSPBCGSLSetPol()
299: @*/
300: PetscErrorCode KSPBCGSLSetXRes(KSP ksp, PetscReal delta)
301: {
302: KSP_BCGSL *bcgsl = (KSP_BCGSL *)ksp->data;
307: if (ksp->setupstage) {
308: if ((delta<=0 && bcgsl->delta>0) || (delta>0 && bcgsl->delta<=0)) {
309: VecDestroyVecs(ksp->nwork,&ksp->work);
310: PetscFree5(AY0c,AYlc,AYtc,MZa,MZb);
311: ksp->setupstage = KSP_SETUP_NEW;
312: }
313: }
314: bcgsl->delta = delta;
315: return(0);
316: }
320: /*@
321: KSPBCGSLSetPol - Sets the type of polynomial part will
322: be used in the BiCGSTab(L) solver.
324: Logically Collective on KSP
326: Input Parameters:
327: + ksp - iterative context obtained from KSPCreate
328: - uMROR - set to PETSC_TRUE when the polynomial is a convex combination of an MR and an OR step.
330: Options Database Keys:
332: + -ksp_bcgsl_cxpoly - use enhanced polynomial
333: . -ksp_bcgsl_mrpoly - use standard polynomial
335: Level: intermediate
337: .keywords: KSP, BiCGStab(L), set, polynomial
339: .seealso: @()
340: @*/
341: PetscErrorCode KSPBCGSLSetPol(KSP ksp, PetscBool uMROR)
342: {
343: KSP_BCGSL *bcgsl = (KSP_BCGSL *)ksp->data;
349: if (!ksp->setupstage) {
350: bcgsl->bConvex = uMROR;
351: } else if (bcgsl->bConvex != uMROR) {
352: /* free the data structures,
353: then create them again
354: */
355: VecDestroyVecs(ksp->nwork,&ksp->work);
356: PetscFree5(AY0c,AYlc,AYtc,MZa,MZb);
357: bcgsl->bConvex = uMROR;
358: ksp->setupstage = KSP_SETUP_NEW;
359: }
360: return(0);
361: }
365: /*@
366: KSPBCGSLSetEll - Sets the number of search directions in BiCGStab(L).
368: Logically Collective on KSP
370: Input Parameters:
371: + ksp - iterative context obtained from KSPCreate
372: - ell - number of search directions
374: Options Database Keys:
376: . -ksp_bcgsl_ell ell
378: Level: intermediate
380: .keywords: KSP, BiCGStab(L), set, exact residuals,
382: .seealso: @()
383: @*/
384: PetscErrorCode KSPBCGSLSetEll(KSP ksp, PetscInt ell)
385: {
386: KSP_BCGSL *bcgsl = (KSP_BCGSL *)ksp->data;
390: if (ell < 1) SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_ARG_OUTOFRANGE, "KSPBCGSLSetEll: second argument must be positive");
393: if (!ksp->setupstage) {
394: bcgsl->ell = ell;
395: } else if (bcgsl->ell != ell) {
396: /* free the data structures, then create them again */
397: VecDestroyVecs(ksp->nwork,&ksp->work);
398: PetscFree5(AY0c,AYlc,AYtc,MZa,MZb);
399: bcgsl->ell = ell;
400: ksp->setupstage = KSP_SETUP_NEW;
401: }
402: return(0);
403: }
407: PetscErrorCode KSPView_BCGSL(KSP ksp, PetscViewer viewer)
408: {
409: KSP_BCGSL *bcgsl = (KSP_BCGSL *)ksp->data;
410: PetscErrorCode ierr;
411: PetscBool isascii, isstring;
414: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii);
415: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring);
417: if (isascii) {
418: PetscViewerASCIIPrintf(viewer, " BCGSL: Ell = %D\n", bcgsl->ell);
419: PetscViewerASCIIPrintf(viewer, " BCGSL: Delta = %lg\n", bcgsl->delta);
420: } else {
421: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP, "Viewer type %s not supported for KSP BCGSL", ((PetscObject)viewer)->type_name);
422: }
423: return(0);
424: }
428: PetscErrorCode KSPSetFromOptions_BCGSL(KSP ksp)
429: {
430: KSP_BCGSL *bcgsl = (KSP_BCGSL *)ksp->data;
432: PetscInt this_ell;
433: PetscReal delta;
434: PetscBool flga = PETSC_FALSE, flg;
437: /* PetscOptionsBegin/End are called in KSPSetFromOptions. They
438: don't need to be called here.
439: */
440: PetscOptionsHead("KSP BiCGStab(L) Options");
442: /* Set number of search directions */
443: PetscOptionsInt("-ksp_bcgsl_ell","Number of Krylov search directions","KSPBCGSLSetEll",bcgsl->ell,&this_ell,&flg);
444: if (flg) {
445: KSPBCGSLSetEll(ksp, this_ell);
446: }
448: /* Set polynomial type */
449: PetscOptionsBool("-ksp_bcgsl_cxpoly", "Polynomial part of BiCGStabL is MinRes + OR", "KSPBCGSLSetPol", flga,&flga,PETSC_NULL);
450: if (flga) {
451: KSPBCGSLSetPol(ksp, PETSC_TRUE);
452: } else {
453: flg = PETSC_FALSE;
454: PetscOptionsBool("-ksp_bcgsl_mrpoly", "Polynomial part of BiCGStabL is MinRes", "KSPBCGSLSetPol", flg,&flg,PETSC_NULL);
455: KSPBCGSLSetPol(ksp, PETSC_FALSE);
456: }
458: /* Will computed residual be refreshed? */
459: PetscOptionsReal("-ksp_bcgsl_xres", "Threshold used to decide when to refresh computed residuals", "KSPBCGSLSetXRes", bcgsl->delta, &delta, &flg);
460: if (flg) {
461: KSPBCGSLSetXRes(ksp, delta);
462: }
463: PetscOptionsTail();
464: return(0);
465: }
469: PetscErrorCode KSPSetUp_BCGSL(KSP ksp)
470: {
471: KSP_BCGSL *bcgsl = (KSP_BCGSL *)ksp->data;
472: PetscInt ell = bcgsl->ell,ldMZ = ell+1;
476: if (ksp->pc_side == PC_SYMMETRIC) SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP, "no symmetric preconditioning for KSPBCGSL");
477: else if (ksp->pc_side == PC_RIGHT) SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP, "no right preconditioning for KSPBCGSL");
478: KSPDefaultGetWork(ksp, 6+2*ell);
479: PetscMalloc5(ldMZ,PetscScalar,&AY0c,ldMZ,PetscScalar,&AYlc,ldMZ,PetscScalar,&AYtc,ldMZ*ldMZ,PetscScalar,&MZa,ldMZ*ldMZ,PetscScalar,&MZb);
480: return(0);
481: }
485: PetscErrorCode KSPReset_BCGSL(KSP ksp)
486: {
487: KSP_BCGSL *bcgsl = (KSP_BCGSL *)ksp->data;
490: VecDestroyVecs(ksp->nwork,&ksp->work);
491: PetscFree5(AY0c,AYlc,AYtc,MZa,MZb);
492: return(0);
493: }
497: PetscErrorCode KSPDestroy_BCGSL(KSP ksp)
498: {
502: KSPReset_BCGSL(ksp);
503: KSPDefaultDestroy(ksp);
504: return(0);
505: }
507: /*MC
508: KSPBCGSL - Implements a slight variant of the Enhanced
509: BiCGStab(L) algorithm in (3) and (2). The variation
510: concerns cases when either kappa0**2 or kappa1**2 is
511: negative due to round-off. Kappa0 has also been pulled
512: out of the denominator in the formula for ghat.
514: References:
515: 1. G.L.G. Sleijpen, H.A. van der Vorst, "An overview of
516: approaches for the stable computation of hybrid BiCG
517: methods", Applied Numerical Mathematics: Transactions
518: f IMACS, 19(3), pp 235-54, 1996.
519: 2. G.L.G. Sleijpen, H.A. van der Vorst, D.R. Fokkema,
520: "BiCGStab(L) and other hybrid Bi-CG methods",
521: Numerical Algorithms, 7, pp 75-109, 1994.
522: 3. D.R. Fokkema, "Enhanced implementation of BiCGStab(L)
523: for solving linear systems of equations", preprint
524: from www.citeseer.com.
526: Contributed by: Joel M. Malard, email jm.malard@pnl.gov
528: Options Database Keys:
529: + -ksp_bcgsl_ell <ell> Number of Krylov search directions, defaults to 2 -- KSPBCGSLSetEll()
530: . -ksp_bcgsl_cxpol - Use a convex function of the MinRes and OR polynomials after the BiCG step instead of default MinRes -- KSPBCGSLSetPol()
531: . -ksp_bcgsl_mrpoly - Use the default MinRes polynomial after the BiCG step -- KSPBCGSLSetPol()
532: - -ksp_bcgsl_xres <res> Threshold used to decide when to refresh computed residuals -- KSPBCGSLSetXRes()
534: Notes: Supports left preconditioning only
536: Level: beginner
538: .seealso: KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPFGMRES, KSPBCGS, KSPSetPCSide(), KSPBCGSLSetEll(), KSPBCGSLSetXRes()
540: M*/
541: EXTERN_C_BEGIN
544: PetscErrorCode KSPCreate_BCGSL(KSP ksp)
545: {
547: KSP_BCGSL *bcgsl;
550: /* allocate BiCGStab(L) context */
551: PetscNewLog(ksp, KSP_BCGSL, &bcgsl);
552: ksp->data = (void*)bcgsl;
554: KSPSetSupportedNorm(ksp,KSP_NORM_PRECONDITIONED,PC_LEFT,2);
555: KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_RIGHT,1);
557: ksp->ops->setup = KSPSetUp_BCGSL;
558: ksp->ops->solve = KSPSolve_BCGSL;
559: ksp->ops->reset = KSPReset_BCGSL;
560: ksp->ops->destroy = KSPDestroy_BCGSL;
561: ksp->ops->buildsolution = KSPDefaultBuildSolution;
562: ksp->ops->buildresidual = KSPDefaultBuildResidual;
563: ksp->ops->setfromoptions = KSPSetFromOptions_BCGSL;
564: ksp->ops->view = KSPView_BCGSL;
566: /* Let the user redefine the number of directions vectors */
567: bcgsl->ell = 2;
569: /*Choose between a single MR step or an averaged MR/OR */
570: bcgsl->bConvex = PETSC_FALSE;
572: /* Set the threshold for when exact residuals will be used */
573: bcgsl->delta = 0.0;
574: return(0);
575: }
576: EXTERN_C_END