Actual source code: pipebcgs.c
2: /*
3: This file implements pipelined BiCGStab (pipe-BiCGStab).
4: Only allow right preconditioning.
5: */
6: #include <../src/ksp/ksp/impls/bcgs/bcgsimpl.h>
8: static PetscErrorCode KSPSetUp_PIPEBCGS(KSP ksp)
9: {
10: KSPSetWorkVecs(ksp, 15);
11: return 0;
12: }
14: /* Only need a few hacks from KSPSolve_BCGS */
15: #include <petsc/private/pcimpl.h>
16: static PetscErrorCode KSPSolve_PIPEBCGS(KSP ksp)
17: {
18: PetscInt i;
19: PetscScalar rho, rhoold, alpha, beta, omega = 0.0, d1, d2, d3;
20: Vec X, B, S, R, RP, Y, Q, P2, Q2, R2, S2, W, Z, W2, Z2, T, V;
21: PetscReal dp = 0.0;
22: KSP_BCGS *bcgs = (KSP_BCGS *)ksp->data;
23: PC pc;
25: X = ksp->vec_sol;
26: B = ksp->vec_rhs;
27: R = ksp->work[0];
28: RP = ksp->work[1];
29: S = ksp->work[2];
30: Y = ksp->work[3];
31: Q = ksp->work[4];
32: Q2 = ksp->work[5];
33: P2 = ksp->work[6];
34: R2 = ksp->work[7];
35: S2 = ksp->work[8];
36: W = ksp->work[9];
37: Z = ksp->work[10];
38: W2 = ksp->work[11];
39: Z2 = ksp->work[12];
40: T = ksp->work[13];
41: V = ksp->work[14];
43: /* Only supports right preconditioning */
45: if (!ksp->guess_zero) {
46: if (!bcgs->guess) VecDuplicate(X, &bcgs->guess);
47: VecCopy(X, bcgs->guess);
48: } else {
49: VecSet(X, 0.0);
50: }
52: /* Compute initial residual */
53: KSPGetPC(ksp, &pc);
54: PCSetUp(pc);
55: if (!ksp->guess_zero) {
56: KSP_MatMult(ksp, pc->mat, X, Q2);
57: VecCopy(B, R);
58: VecAXPY(R, -1.0, Q2);
59: } else {
60: VecCopy(B, R);
61: }
63: /* Test for nothing to do */
64: if (ksp->normtype != KSP_NORM_NONE) {
65: VecNorm(R, NORM_2, &dp);
66: } else dp = 0.0;
67: PetscObjectSAWsTakeAccess((PetscObject)ksp);
68: ksp->its = 0;
69: ksp->rnorm = dp;
70: PetscObjectSAWsGrantAccess((PetscObject)ksp);
71: KSPLogResidualHistory(ksp, dp);
72: KSPMonitor(ksp, 0, dp);
73: (*ksp->converged)(ksp, 0, dp, &ksp->reason, ksp->cnvP);
74: if (ksp->reason) return 0;
76: /* Initialize */
77: VecCopy(R, RP); /* rp <- r */
79: VecDotBegin(R, RP, &rho); /* rho <- (r,rp) */
80: PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R));
81: KSP_PCApply(ksp, R, R2); /* r2 <- K r */
82: KSP_MatMult(ksp, pc->mat, R2, W); /* w <- A r2 */
83: VecDotEnd(R, RP, &rho);
85: VecDotBegin(W, RP, &d2); /* d2 <- (w,rp) */
86: PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)W));
87: KSP_PCApply(ksp, W, W2); /* w2 <- K w */
88: KSP_MatMult(ksp, pc->mat, W2, T); /* t <- A w2 */
89: VecDotEnd(W, RP, &d2);
91: alpha = rho / d2;
92: beta = 0.0;
94: /* Main loop */
95: i = 0;
96: do {
97: if (i == 0) {
98: VecCopy(R2, P2); /* p2 <- r2 */
99: VecCopy(W, S); /* s <- w */
100: VecCopy(W2, S2); /* s2 <- w2 */
101: VecCopy(T, Z); /* z <- t */
102: } else {
103: VecAXPBYPCZ(P2, 1.0, -beta * omega, beta, R2, S2); /* p2 <- beta * p2 + r2 - beta * omega * s2 */
104: VecAXPBYPCZ(S, 1.0, -beta * omega, beta, W, Z); /* s <- beta * s + w - beta * omega * z */
105: VecAXPBYPCZ(S2, 1.0, -beta * omega, beta, W2, Z2); /* s2 <- beta * s2 + w2 - beta * omega * z2 */
106: VecAXPBYPCZ(Z, 1.0, -beta * omega, beta, T, V); /* z <- beta * z + t - beta * omega * v */
107: }
108: VecWAXPY(Q, -alpha, S, R); /* q <- r - alpha s */
109: VecWAXPY(Q2, -alpha, S2, R2); /* q2 <- r2 - alpha s2 */
110: VecWAXPY(Y, -alpha, Z, W); /* y <- w - alpha z */
112: VecDotBegin(Q, Y, &d1); /* d1 <- (q,y) */
113: VecDotBegin(Y, Y, &d2); /* d2 <- (y,y) */
115: PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)Q));
116: KSP_PCApply(ksp, Z, Z2); /* z2 <- K z */
117: KSP_MatMult(ksp, pc->mat, Z2, V); /* v <- A z2 */
119: VecDotEnd(Q, Y, &d1);
120: VecDotEnd(Y, Y, &d2);
122: if (d2 == 0.0) {
123: /* y is 0. if q is 0, then alpha s == r, and hence alpha p may be our solution. Give it a try? */
124: VecDot(Q, Q, &d1);
125: if (d1 != 0.0) {
126: ksp->reason = KSP_DIVERGED_BREAKDOWN;
127: break;
128: }
129: VecAXPY(X, alpha, P2); /* x <- x + alpha p2 */
130: PetscObjectSAWsTakeAccess((PetscObject)ksp);
131: ksp->its++;
132: ksp->rnorm = 0.0;
133: ksp->reason = KSP_CONVERGED_RTOL;
134: PetscObjectSAWsGrantAccess((PetscObject)ksp);
135: KSPLogResidualHistory(ksp, dp);
136: KSPMonitor(ksp, i + 1, 0.0);
137: break;
138: }
139: omega = d1 / d2; /* omega <- (y'q) / (y'y) */
140: VecAXPBYPCZ(X, alpha, omega, 1.0, P2, Q2); /* x <- alpha * p2 + omega * q2 + x */
141: VecWAXPY(R, -omega, Y, Q); /* r <- q - omega y */
142: VecWAXPY(R2, -alpha, Z2, W2); /* r2 <- w2 - alpha z2 */
143: VecAYPX(R2, -omega, Q2); /* r2 <- q2 - omega r2 */
144: VecWAXPY(W, -alpha, V, T); /* w <- t - alpha v */
145: VecAYPX(W, -omega, Y); /* w <- y - omega w */
146: rhoold = rho;
148: if (ksp->normtype != KSP_NORM_NONE && ksp->chknorm < i + 2) { VecNormBegin(R, NORM_2, &dp); /* dp <- norm(r) */ }
149: VecDotBegin(R, RP, &rho); /* rho <- (r,rp) */
150: VecDotBegin(S, RP, &d1); /* d1 <- (s,rp) */
151: VecDotBegin(W, RP, &d2); /* d2 <- (w,rp) */
152: VecDotBegin(Z, RP, &d3); /* d3 <- (z,rp) */
154: PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R));
155: KSP_PCApply(ksp, W, W2); /* w2 <- K w */
156: KSP_MatMult(ksp, pc->mat, W2, T); /* t <- A w2 */
158: if (ksp->normtype != KSP_NORM_NONE && ksp->chknorm < i + 2) VecNormEnd(R, NORM_2, &dp);
159: VecDotEnd(R, RP, &rho);
160: VecDotEnd(S, RP, &d1);
161: VecDotEnd(W, RP, &d2);
162: VecDotEnd(Z, RP, &d3);
166: beta = (rho / rhoold) * (alpha / omega);
167: alpha = rho / (d2 + beta * d1 - beta * omega * d3); /* alpha <- rho / (d2 + beta * d1 - beta * omega * d3) */
169: PetscObjectSAWsTakeAccess((PetscObject)ksp);
170: ksp->its++;
172: /* Residual replacement step */
173: if (i > 0 && i % 100 == 0 && i < 1001) {
174: KSP_MatMult(ksp, pc->mat, X, R);
175: VecAYPX(R, -1.0, B); /* r <- b - Ax */
176: KSP_PCApply(ksp, R, R2); /* r2 <- K r */
177: KSP_MatMult(ksp, pc->mat, R2, W); /* w <- A r2 */
178: KSP_PCApply(ksp, W, W2); /* w2 <- K w */
179: KSP_MatMult(ksp, pc->mat, W2, T); /* t <- A w2 */
180: KSP_MatMult(ksp, pc->mat, P2, S); /* s <- A p2 */
181: KSP_PCApply(ksp, S, S2); /* s2 <- K s */
182: KSP_MatMult(ksp, pc->mat, S2, Z); /* z <- A s2 */
183: KSP_PCApply(ksp, Z, Z2); /* z2 <- K z */
184: KSP_MatMult(ksp, pc->mat, Z2, V); /* v <- A z2 */
185: }
187: ksp->rnorm = dp;
188: PetscObjectSAWsGrantAccess((PetscObject)ksp);
189: KSPLogResidualHistory(ksp, dp);
190: KSPMonitor(ksp, i + 1, dp);
191: (*ksp->converged)(ksp, i + 1, dp, &ksp->reason, ksp->cnvP);
192: if (ksp->reason) break;
193: if (rho == 0.0) {
194: ksp->reason = KSP_DIVERGED_BREAKDOWN;
195: break;
196: }
197: i++;
198: } while (i < ksp->max_it);
200: if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
201: return 0;
202: }
204: /*MC
205: KSPPIPEBCGS - Implements the pipelined BiCGStab method.
207: This method has only two non-blocking reductions per iteration, compared to 3 blocking for standard FBCGS. The
208: non-blocking reductions are overlapped by matrix-vector products and preconditioner applications.
210: Periodic residual replacement may be used to increase robustness and maximal attainable accuracy.
212: Options Database Keys:
213: see KSPSolve()
215: Level: intermediate
217: Notes:
218: Like KSPFBCGS, the KSPPIPEBCGS implementation only allows for right preconditioning.
219: MPI configuration may be necessary for reductions to make asynchronous progress, which is important for
220: performance of pipelined methods. See the FAQ on the PETSc website for details.
222: Contributed by:
223: Siegfried Cools, Universiteit Antwerpen,
224: EXA2CT European Project on EXascale Algorithms and Advanced Computational Techniques, 2016.
226: Reference:
227: S. Cools and W. Vanroose,
228: "The communication-hiding pipelined BiCGStab method for the parallel solution of large unsymmetric linear systems",
229: Parallel Computing, 65:1-20, 2017.
231: .seealso: `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `KSPBICG`, `KSPFBCGS`, `KSPFBCGSL`, `KSPSetPCSide()`
232: M*/
233: PETSC_EXTERN PetscErrorCode KSPCreate_PIPEBCGS(KSP ksp)
234: {
235: KSP_BCGS *bcgs;
237: PetscNew(&bcgs);
239: ksp->data = bcgs;
240: ksp->ops->setup = KSPSetUp_PIPEBCGS;
241: ksp->ops->solve = KSPSolve_PIPEBCGS;
242: ksp->ops->destroy = KSPDestroy_BCGS;
243: ksp->ops->reset = KSPReset_BCGS;
244: ksp->ops->buildresidual = KSPBuildResidualDefault;
245: ksp->ops->setfromoptions = KSPSetFromOptions_BCGS;
247: KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_RIGHT, 2);
248: KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_RIGHT, 1);
249: return 0;
250: }