Actual source code: pipecgrr.c
1: #include <petsc/private/kspimpl.h>
3: /*
4: KSPSetUp_PIPECGRR - Sets up the workspace needed by the PIPECGRR method.
6: This is called once, usually automatically by KSPSolve() or KSPSetUp()
7: but can be called directly by KSPSetUp()
8: */
9: static PetscErrorCode KSPSetUp_PIPECGRR(KSP ksp)
10: {
11: PetscFunctionBegin;
12: /* get work vectors needed by PIPECGRR */
13: PetscCall(KSPSetWorkVecs(ksp, 9));
14: PetscFunctionReturn(PETSC_SUCCESS);
15: }
17: /*
18: KSPSolve_PIPECGRR - This routine actually applies the pipelined conjugate gradient method with automated residual replacement
19: */
20: static PetscErrorCode KSPSolve_PIPECGRR(KSP ksp)
21: {
22: PetscInt i = 0, replace = 0, nsize;
23: PetscScalar alpha = 0.0, beta = 0.0, gamma = 0.0, gammaold = 0.0, delta = 0.0, alphap = 0.0, betap = 0.0;
24: PetscReal dp = 0.0, nsi = 0.0, sqn = 0.0, Anorm = 0.0, rnp = 0.0, pnp = 0.0, snp = 0.0, unp = 0.0, wnp = 0.0, xnp = 0.0, qnp = 0.0, znp = 0.0, mnz = 5.0, tol = PETSC_SQRT_MACHINE_EPSILON, eps = PETSC_MACHINE_EPSILON;
25: PetscReal ds = 0.0, dz = 0.0, dx = 0.0, dpp = 0.0, dq = 0.0, dm = 0.0, du = 0.0, dw = 0.0, db = 0.0, errr = 0.0, errrprev = 0.0, errs = 0.0, errw = 0.0, errz = 0.0, errncr = 0.0, errncs = 0.0, errncw = 0.0, errncz = 0.0;
26: Vec X, B, Z, P, W, Q, U, M, N, R, S;
27: Mat Amat, Pmat;
28: PetscBool diagonalscale;
30: PetscFunctionBegin;
31: PetscCall(PCGetDiagonalScale(ksp->pc, &diagonalscale));
32: PetscCheck(!diagonalscale, PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "Krylov method %s does not support diagonal scaling", ((PetscObject)ksp)->type_name);
34: X = ksp->vec_sol;
35: B = ksp->vec_rhs;
36: M = ksp->work[0];
37: Z = ksp->work[1];
38: P = ksp->work[2];
39: N = ksp->work[3];
40: W = ksp->work[4];
41: Q = ksp->work[5];
42: U = ksp->work[6];
43: R = ksp->work[7];
44: S = ksp->work[8];
46: PetscCall(PCGetOperators(ksp->pc, &Amat, &Pmat));
48: ksp->its = 0;
49: if (!ksp->guess_zero) {
50: PetscCall(KSP_MatMult(ksp, Amat, X, R)); /* r <- b - Ax */
51: PetscCall(VecAYPX(R, -1.0, B));
52: } else {
53: PetscCall(VecCopy(B, R)); /* r <- b (x is 0) */
54: }
56: PetscCall(KSP_PCApply(ksp, R, U)); /* u <- Br */
58: switch (ksp->normtype) {
59: case KSP_NORM_PRECONDITIONED:
60: PetscCall(VecNormBegin(U, NORM_2, &dp)); /* dp <- u'*u = e'*A'*B'*B*A'*e' */
61: PetscCall(VecNormBegin(B, NORM_2, &db));
62: PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)U)));
63: PetscCall(KSP_MatMult(ksp, Amat, U, W)); /* w <- Au */
64: PetscCall(VecNormEnd(U, NORM_2, &dp));
65: PetscCall(VecNormEnd(B, NORM_2, &db));
66: break;
67: case KSP_NORM_UNPRECONDITIONED:
68: PetscCall(VecNormBegin(R, NORM_2, &dp)); /* dp <- r'*r = e'*A'*A*e */
69: PetscCall(VecNormBegin(B, NORM_2, &db));
70: PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R)));
71: PetscCall(KSP_MatMult(ksp, Amat, U, W)); /* w <- Au */
72: PetscCall(VecNormEnd(R, NORM_2, &dp));
73: PetscCall(VecNormEnd(B, NORM_2, &db));
74: break;
75: case KSP_NORM_NATURAL:
76: PetscCall(VecDotBegin(R, U, &gamma)); /* gamma <- u'*r */
77: PetscCall(VecNormBegin(B, NORM_2, &db));
78: PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R)));
79: PetscCall(KSP_MatMult(ksp, Amat, U, W)); /* w <- Au */
80: PetscCall(VecDotEnd(R, U, &gamma));
81: PetscCall(VecNormEnd(B, NORM_2, &db));
82: KSPCheckDot(ksp, gamma);
83: dp = PetscSqrtReal(PetscAbsScalar(gamma)); /* dp <- r'*u = r'*B*r = e'*A'*B*A*e */
84: break;
85: case KSP_NORM_NONE:
86: PetscCall(KSP_MatMult(ksp, Amat, U, W));
87: dp = 0.0;
88: break;
89: default:
90: SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "%s", KSPNormTypes[ksp->normtype]);
91: }
92: PetscCall(KSPLogResidualHistory(ksp, dp));
93: PetscCall(KSPMonitor(ksp, 0, dp));
94: ksp->rnorm = dp;
95: PetscCall((*ksp->converged)(ksp, 0, dp, &ksp->reason, ksp->cnvP)); /* test for convergence */
96: if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);
98: PetscCall(MatNorm(Amat, NORM_INFINITY, &Anorm));
99: PetscCall(VecGetSize(B, &nsize));
100: nsi = (PetscReal)nsize;
101: sqn = PetscSqrtReal(nsi);
103: do {
104: if (i > 1) {
105: pnp = dpp;
106: snp = ds;
107: qnp = dq;
108: znp = dz;
109: }
110: if (i > 0) {
111: rnp = dp;
112: unp = du;
113: wnp = dw;
114: xnp = dx;
115: alphap = alpha;
116: betap = beta;
117: }
119: if (i > 0 && ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
120: PetscCall(VecNormBegin(R, NORM_2, &dp));
121: } else if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
122: PetscCall(VecNormBegin(U, NORM_2, &dp));
123: }
124: if (!(i == 0 && ksp->normtype == KSP_NORM_NATURAL)) PetscCall(VecDotBegin(R, U, &gamma));
125: PetscCall(VecDotBegin(W, U, &delta));
127: if (i > 0) {
128: PetscCall(VecNormBegin(S, NORM_2, &ds));
129: PetscCall(VecNormBegin(Z, NORM_2, &dz));
130: PetscCall(VecNormBegin(P, NORM_2, &dpp));
131: PetscCall(VecNormBegin(Q, NORM_2, &dq));
132: PetscCall(VecNormBegin(M, NORM_2, &dm));
133: }
134: PetscCall(VecNormBegin(X, NORM_2, &dx));
135: PetscCall(VecNormBegin(U, NORM_2, &du));
136: PetscCall(VecNormBegin(W, NORM_2, &dw));
138: PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R)));
139: PetscCall(KSP_PCApply(ksp, W, M)); /* m <- Bw */
140: PetscCall(KSP_MatMult(ksp, Amat, M, N)); /* n <- Am */
142: if (i > 0 && ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
143: PetscCall(VecNormEnd(R, NORM_2, &dp));
144: } else if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
145: PetscCall(VecNormEnd(U, NORM_2, &dp));
146: }
147: if (!(i == 0 && ksp->normtype == KSP_NORM_NATURAL)) PetscCall(VecDotEnd(R, U, &gamma));
148: PetscCall(VecDotEnd(W, U, &delta));
150: if (i > 0) {
151: PetscCall(VecNormEnd(S, NORM_2, &ds));
152: PetscCall(VecNormEnd(Z, NORM_2, &dz));
153: PetscCall(VecNormEnd(P, NORM_2, &dpp));
154: PetscCall(VecNormEnd(Q, NORM_2, &dq));
155: PetscCall(VecNormEnd(M, NORM_2, &dm));
156: }
157: PetscCall(VecNormEnd(X, NORM_2, &dx));
158: PetscCall(VecNormEnd(U, NORM_2, &du));
159: PetscCall(VecNormEnd(W, NORM_2, &dw));
161: if (i > 0) {
162: if (ksp->normtype == KSP_NORM_NATURAL) dp = PetscSqrtReal(PetscAbsScalar(gamma));
163: else if (ksp->normtype == KSP_NORM_NONE) dp = 0.0;
165: ksp->rnorm = dp;
166: PetscCall(KSPLogResidualHistory(ksp, dp));
167: PetscCall(KSPMonitor(ksp, i, dp));
168: PetscCall((*ksp->converged)(ksp, i, dp, &ksp->reason, ksp->cnvP));
169: if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);
170: }
172: if (i == 0) {
173: alpha = gamma / delta;
174: PetscCall(VecCopy(N, Z)); /* z <- n */
175: PetscCall(VecCopy(M, Q)); /* q <- m */
176: PetscCall(VecCopy(U, P)); /* p <- u */
177: PetscCall(VecCopy(W, S)); /* s <- w */
178: } else {
179: beta = gamma / gammaold;
180: alpha = gamma / (delta - beta / alpha * gamma);
181: PetscCall(VecAYPX(Z, beta, N)); /* z <- n + beta * z */
182: PetscCall(VecAYPX(Q, beta, M)); /* q <- m + beta * q */
183: PetscCall(VecAYPX(P, beta, U)); /* p <- u + beta * p */
184: PetscCall(VecAYPX(S, beta, W)); /* s <- w + beta * s */
185: }
186: PetscCall(VecAXPY(X, alpha, P)); /* x <- x + alpha * p */
187: PetscCall(VecAXPY(U, -alpha, Q)); /* u <- u - alpha * q */
188: PetscCall(VecAXPY(W, -alpha, Z)); /* w <- w - alpha * z */
189: PetscCall(VecAXPY(R, -alpha, S)); /* r <- r - alpha * s */
190: gammaold = gamma;
192: if (i > 0) {
193: errncr = PetscSqrtReal(Anorm * xnp + 2.0 * Anorm * PetscAbsScalar(alphap) * dpp + rnp + 2.0 * PetscAbsScalar(alphap) * ds) * eps;
194: errncw = PetscSqrtReal(Anorm * unp + 2.0 * Anorm * PetscAbsScalar(alphap) * dq + wnp + 2.0 * PetscAbsScalar(alphap) * dz) * eps;
195: }
196: if (i > 1) {
197: errncs = PetscSqrtReal(Anorm * unp + 2.0 * Anorm * PetscAbsScalar(betap) * pnp + wnp + 2.0 * PetscAbsScalar(betap) * snp) * eps;
198: errncz = PetscSqrtReal((mnz * sqn + 2) * Anorm * dm + 2.0 * Anorm * PetscAbsScalar(betap) * qnp + 2.0 * PetscAbsScalar(betap) * znp) * eps;
199: }
201: if (i > 0) {
202: if (i == 1) {
203: errr = PetscSqrtReal((mnz * sqn + 1) * Anorm * xnp + db) * eps + PetscSqrtReal(PetscAbsScalar(alphap) * mnz * sqn * Anorm * dpp) * eps + errncr;
204: errs = PetscSqrtReal(mnz * sqn * Anorm * dpp) * eps;
205: errw = PetscSqrtReal(mnz * sqn * Anorm * unp) * eps + PetscSqrtReal(PetscAbsScalar(alphap) * mnz * sqn * Anorm * dq) * eps + errncw;
206: errz = PetscSqrtReal(mnz * sqn * Anorm * dq) * eps;
207: } else if (replace == 1) {
208: errrprev = errr;
209: errr = PetscSqrtReal((mnz * sqn + 1) * Anorm * dx + db) * eps;
210: errs = PetscSqrtReal(mnz * sqn * Anorm * dpp) * eps;
211: errw = PetscSqrtReal(mnz * sqn * Anorm * du) * eps;
212: errz = PetscSqrtReal(mnz * sqn * Anorm * dq) * eps;
213: replace = 0;
214: } else {
215: errrprev = errr;
216: errr = errr + PetscAbsScalar(alphap) * PetscAbsScalar(betap) * errs + PetscAbsScalar(alphap) * errw + errncr + PetscAbsScalar(alphap) * errncs;
217: errs = errw + PetscAbsScalar(betap) * errs + errncs;
218: errw = errw + PetscAbsScalar(alphap) * PetscAbsScalar(betap) * errz + errncw + PetscAbsScalar(alphap) * errncz;
219: errz = PetscAbsScalar(betap) * errz + errncz;
220: }
221: if (i > 1 && errrprev <= (tol * rnp) && errr > (tol * dp)) {
222: PetscCall(KSP_MatMult(ksp, Amat, X, R)); /* r <- Ax - b */
223: PetscCall(VecAYPX(R, -1.0, B));
224: PetscCall(KSP_PCApply(ksp, R, U)); /* u <- Br */
225: PetscCall(KSP_MatMult(ksp, Amat, U, W)); /* w <- Au */
226: PetscCall(KSP_MatMult(ksp, Amat, P, S)); /* s <- Ap */
227: PetscCall(KSP_PCApply(ksp, S, Q)); /* q <- Bs */
228: PetscCall(KSP_MatMult(ksp, Amat, Q, Z)); /* z <- Aq */
229: replace = 1;
230: }
231: }
233: i++;
234: ksp->its = i;
236: } while (i <= ksp->max_it);
237: if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
238: PetscFunctionReturn(PETSC_SUCCESS);
239: }
241: /*MC
242: KSPPIPECGRR - Pipelined conjugate gradient method with automated residual replacements {cite}`cools2018analyzing`. [](sec_pipelineksp)
244: Level: intermediate
246: Notes:
247: This method has only a single non-blocking reduction per iteration, compared to 2 blocking for standard `KSPCG`. The
248: non-blocking reduction is overlapped by the matrix-vector product and preconditioner application.
250: `KSPPIPECGRR` improves the robustness of `KSPPIPECG` by adding an automated residual replacement strategy.
251: True residual and other auxiliary variables are computed explicitly in a number of dynamically determined
252: iterations to counteract the accumulation of rounding errors and thus attain a higher maximal final accuracy.
254: See also `KSPPIPECG`, which is identical to `KSPPIPECGRR` without residual replacements.
255: See also `KSPPIPECR`, where the reduction is only overlapped with the matrix-vector product.
257: MPI configuration may be necessary for reductions to make asynchronous progress, which is important for
258: performance of pipelined methods. See [](doc_faq_pipelined)
260: Contributed by:
261: Siegfried Cools, Universiteit Antwerpen, Dept. Mathematics & Computer Science,
262: European FP7 Project on EXascale Algorithms and Advanced Computational Techniques (EXA2CT) / Research Foundation Flanders (FWO)
264: .seealso: [](ch_ksp), [](doc_faq_pipelined), [](sec_pipelineksp), `KSPCreate()`, `KSPSetType()`, `KSPPIPECR`, `KSPGROPPCG`, `KSPPIPECG`, `KSPPGMRES`, `KSPCG`, `KSPPIPEBCGS`, `KSPCGUseSingleReduction()`
265: M*/
266: PETSC_EXTERN PetscErrorCode KSPCreate_PIPECGRR(KSP ksp)
267: {
268: PetscFunctionBegin;
269: PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_LEFT, 2));
270: PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 2));
271: PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NATURAL, PC_LEFT, 2));
272: PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_LEFT, 1));
274: ksp->ops->setup = KSPSetUp_PIPECGRR;
275: ksp->ops->solve = KSPSolve_PIPECGRR;
276: ksp->ops->destroy = KSPDestroyDefault;
277: ksp->ops->view = NULL;
278: ksp->ops->setfromoptions = NULL;
279: ksp->ops->buildsolution = KSPBuildSolutionDefault;
280: ksp->ops->buildresidual = KSPBuildResidualDefault;
281: PetscFunctionReturn(PETSC_SUCCESS);
282: }