Actual source code: pipecgrr.c
  1: #include <petsc/private/kspimpl.h>

  3: /*
  4:      KSPSetUp_PIPECGRR - Sets up the workspace needed by the PIPECGRR method.

  6:       This is called once, usually automatically by KSPSolve() or KSPSetUp()
  7:      but can be called directly by KSPSetUp()
  8: */
  9: static PetscErrorCode KSPSetUp_PIPECGRR(KSP ksp)
 10: {
 11:   PetscFunctionBegin;
 12:   /* get work vectors needed by PIPECGRR */
 13:   PetscCall(KSPSetWorkVecs(ksp, 9));
 14:   PetscFunctionReturn(PETSC_SUCCESS);
 15: }

 17: /*
 18:  KSPSolve_PIPECGRR - This routine actually applies the pipelined conjugate gradient method with automated residual replacement
 19: */
 20: static PetscErrorCode KSPSolve_PIPECGRR(KSP ksp)
 21: {
 22:   PetscInt    i = 0, replace = 0, nsize;
 23:   PetscScalar alpha = 0.0, beta = 0.0, gamma = 0.0, gammaold = 0.0, delta = 0.0, alphap = 0.0, betap = 0.0;
 24:   PetscReal   dp = 0.0, nsi = 0.0, sqn = 0.0, Anorm = 0.0, rnp = 0.0, pnp = 0.0, snp = 0.0, unp = 0.0, wnp = 0.0, xnp = 0.0, qnp = 0.0, znp = 0.0, mnz = 5.0, tol = PETSC_SQRT_MACHINE_EPSILON, eps = PETSC_MACHINE_EPSILON;
 25:   PetscReal   ds = 0.0, dz = 0.0, dx = 0.0, dpp = 0.0, dq = 0.0, dm = 0.0, du = 0.0, dw = 0.0, db = 0.0, errr = 0.0, errrprev = 0.0, errs = 0.0, errw = 0.0, errz = 0.0, errncr = 0.0, errncs = 0.0, errncw = 0.0, errncz = 0.0;
 26:   Vec         X, B, Z, P, W, Q, U, M, N, R, S;
 27:   Mat         Amat, Pmat;
 28:   PetscBool   diagonalscale;

 30:   PetscFunctionBegin;
 31:   PetscCall(PCGetDiagonalScale(ksp->pc, &diagonalscale));
 32:   PetscCheck(!diagonalscale, PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "Krylov method %s does not support diagonal scaling", ((PetscObject)ksp)->type_name);

 34:   X = ksp->vec_sol;
 35:   B = ksp->vec_rhs;
 36:   M = ksp->work[0];
 37:   Z = ksp->work[1];
 38:   P = ksp->work[2];
 39:   N = ksp->work[3];
 40:   W = ksp->work[4];
 41:   Q = ksp->work[5];
 42:   U = ksp->work[6];
 43:   R = ksp->work[7];
 44:   S = ksp->work[8];

 46:   PetscCall(PCGetOperators(ksp->pc, &Amat, &Pmat));

 48:   ksp->its = 0;
 49:   if (!ksp->guess_zero) {
 50:     PetscCall(KSP_MatMult(ksp, Amat, X, R)); /*  r <- b - Ax  */
 51:     PetscCall(VecAYPX(R, -1.0, B));
 52:   } else {
 53:     PetscCall(VecCopy(B, R)); /*  r <- b (x is 0)  */
 54:   }

 56:   PetscCall(KSP_PCApply(ksp, R, U)); /*  u <- Br  */

 58:   switch (ksp->normtype) {
 59:   case KSP_NORM_PRECONDITIONED:
 60:     PetscCall(VecNormBegin(U, NORM_2, &dp)); /*  dp <- u'*u = e'*A'*B'*B*A'*e'  */
 61:     PetscCall(VecNormBegin(B, NORM_2, &db));
 62:     PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)U)));
 63:     PetscCall(KSP_MatMult(ksp, Amat, U, W)); /*  w <- Au  */
 64:     PetscCall(VecNormEnd(U, NORM_2, &dp));
 65:     PetscCall(VecNormEnd(B, NORM_2, &db));
 66:     break;
 67:   case KSP_NORM_UNPRECONDITIONED:
 68:     PetscCall(VecNormBegin(R, NORM_2, &dp)); /*  dp <- r'*r = e'*A'*A*e  */
 69:     PetscCall(VecNormBegin(B, NORM_2, &db));
 70:     PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R)));
 71:     PetscCall(KSP_MatMult(ksp, Amat, U, W)); /*  w <- Au  */
 72:     PetscCall(VecNormEnd(R, NORM_2, &dp));
 73:     PetscCall(VecNormEnd(B, NORM_2, &db));
 74:     break;
 75:   case KSP_NORM_NATURAL:
 76:     PetscCall(VecDotBegin(R, U, &gamma)); /*  gamma <- u'*r  */
 77:     PetscCall(VecNormBegin(B, NORM_2, &db));
 78:     PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R)));
 79:     PetscCall(KSP_MatMult(ksp, Amat, U, W)); /*  w <- Au  */
 80:     PetscCall(VecDotEnd(R, U, &gamma));
 81:     PetscCall(VecNormEnd(B, NORM_2, &db));
 82:     KSPCheckDot(ksp, gamma);
 83:     dp = PetscSqrtReal(PetscAbsScalar(gamma)); /*  dp <- r'*u = r'*B*r = e'*A'*B*A*e  */
 84:     break;
 85:   case KSP_NORM_NONE:
 86:     PetscCall(KSP_MatMult(ksp, Amat, U, W));
 87:     dp = 0.0;
 88:     break;
 89:   default:
 90:     SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "%s", KSPNormTypes[ksp->normtype]);
 91:   }
 92:   PetscCall(KSPLogResidualHistory(ksp, dp));
 93:   PetscCall(KSPMonitor(ksp, 0, dp));
 94:   ksp->rnorm = dp;
 95:   PetscCall((*ksp->converged)(ksp, 0, dp, &ksp->reason, ksp->cnvP)); /*  test for convergence  */
 96:   if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);

 98:   PetscCall(MatNorm(Amat, NORM_INFINITY, &Anorm));
 99:   PetscCall(VecGetSize(B, &nsize));
100:   nsi = (PetscReal)nsize;
101:   sqn = PetscSqrtReal(nsi);

103:   do {
104:     if (i > 1) {
105:       pnp = dpp;
106:       snp = ds;
107:       qnp = dq;
108:       znp = dz;
109:     }
110:     if (i > 0) {
111:       rnp    = dp;
112:       unp    = du;
113:       wnp    = dw;
114:       xnp    = dx;
115:       alphap = alpha;
116:       betap  = beta;
117:     }

119:     if (i > 0 && ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
120:       PetscCall(VecNormBegin(R, NORM_2, &dp));
121:     } else if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
122:       PetscCall(VecNormBegin(U, NORM_2, &dp));
123:     }
124:     if (!(i == 0 && ksp->normtype == KSP_NORM_NATURAL)) PetscCall(VecDotBegin(R, U, &gamma));
125:     PetscCall(VecDotBegin(W, U, &delta));

127:     if (i > 0) {
128:       PetscCall(VecNormBegin(S, NORM_2, &ds));
129:       PetscCall(VecNormBegin(Z, NORM_2, &dz));
130:       PetscCall(VecNormBegin(P, NORM_2, &dpp));
131:       PetscCall(VecNormBegin(Q, NORM_2, &dq));
132:       PetscCall(VecNormBegin(M, NORM_2, &dm));
133:     }
134:     PetscCall(VecNormBegin(X, NORM_2, &dx));
135:     PetscCall(VecNormBegin(U, NORM_2, &du));
136:     PetscCall(VecNormBegin(W, NORM_2, &dw));

138:     PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R)));
139:     PetscCall(KSP_PCApply(ksp, W, M));       /*   m <- Bw       */
140:     PetscCall(KSP_MatMult(ksp, Amat, M, N)); /*   n <- Am       */

142:     if (i > 0 && ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
143:       PetscCall(VecNormEnd(R, NORM_2, &dp));
144:     } else if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
145:       PetscCall(VecNormEnd(U, NORM_2, &dp));
146:     }
147:     if (!(i == 0 && ksp->normtype == KSP_NORM_NATURAL)) PetscCall(VecDotEnd(R, U, &gamma));
148:     PetscCall(VecDotEnd(W, U, &delta));

150:     if (i > 0) {
151:       PetscCall(VecNormEnd(S, NORM_2, &ds));
152:       PetscCall(VecNormEnd(Z, NORM_2, &dz));
153:       PetscCall(VecNormEnd(P, NORM_2, &dpp));
154:       PetscCall(VecNormEnd(Q, NORM_2, &dq));
155:       PetscCall(VecNormEnd(M, NORM_2, &dm));
156:     }
157:     PetscCall(VecNormEnd(X, NORM_2, &dx));
158:     PetscCall(VecNormEnd(U, NORM_2, &du));
159:     PetscCall(VecNormEnd(W, NORM_2, &dw));

161:     if (i > 0) {
162:       if (ksp->normtype == KSP_NORM_NATURAL) dp = PetscSqrtReal(PetscAbsScalar(gamma));
163:       else if (ksp->normtype == KSP_NORM_NONE) dp = 0.0;

165:       ksp->rnorm = dp;
166:       PetscCall(KSPLogResidualHistory(ksp, dp));
167:       PetscCall(KSPMonitor(ksp, i, dp));
168:       PetscCall((*ksp->converged)(ksp, i, dp, &ksp->reason, ksp->cnvP));
169:       if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);
170:     }

172:     if (i == 0) {
173:       alpha = gamma / delta;
174:       PetscCall(VecCopy(N, Z)); /*  z <- n  */
175:       PetscCall(VecCopy(M, Q)); /*  q <- m  */
176:       PetscCall(VecCopy(U, P)); /*  p <- u  */
177:       PetscCall(VecCopy(W, S)); /*  s <- w  */
178:     } else {
179:       beta  = gamma / gammaold;
180:       alpha = gamma / (delta - beta / alpha * gamma);
181:       PetscCall(VecAYPX(Z, beta, N)); /*  z <- n + beta * z  */
182:       PetscCall(VecAYPX(Q, beta, M)); /*  q <- m + beta * q  */
183:       PetscCall(VecAYPX(P, beta, U)); /*  p <- u + beta * p  */
184:       PetscCall(VecAYPX(S, beta, W)); /*  s <- w + beta * s  */
185:     }
186:     PetscCall(VecAXPY(X, alpha, P));  /*  x <- x + alpha * p  */
187:     PetscCall(VecAXPY(U, -alpha, Q)); /*  u <- u - alpha * q  */
188:     PetscCall(VecAXPY(W, -alpha, Z)); /*  w <- w - alpha * z  */
189:     PetscCall(VecAXPY(R, -alpha, S)); /*  r <- r - alpha * s  */
190:     gammaold = gamma;

192:     if (i > 0) {
193:       errncr = PetscSqrtReal(Anorm * xnp + 2.0 * Anorm * PetscAbsScalar(alphap) * dpp + rnp + 2.0 * PetscAbsScalar(alphap) * ds) * eps;
194:       errncw = PetscSqrtReal(Anorm * unp + 2.0 * Anorm * PetscAbsScalar(alphap) * dq + wnp + 2.0 * PetscAbsScalar(alphap) * dz) * eps;
195:     }
196:     if (i > 1) {
197:       errncs = PetscSqrtReal(Anorm * unp + 2.0 * Anorm * PetscAbsScalar(betap) * pnp + wnp + 2.0 * PetscAbsScalar(betap) * snp) * eps;
198:       errncz = PetscSqrtReal((mnz * sqn + 2) * Anorm * dm + 2.0 * Anorm * PetscAbsScalar(betap) * qnp + 2.0 * PetscAbsScalar(betap) * znp) * eps;
199:     }

201:     if (i > 0) {
202:       if (i == 1) {
203:         errr = PetscSqrtReal((mnz * sqn + 1) * Anorm * xnp + db) * eps + PetscSqrtReal(PetscAbsScalar(alphap) * mnz * sqn * Anorm * dpp) * eps + errncr;
204:         errs = PetscSqrtReal(mnz * sqn * Anorm * dpp) * eps;
205:         errw = PetscSqrtReal(mnz * sqn * Anorm * unp) * eps + PetscSqrtReal(PetscAbsScalar(alphap) * mnz * sqn * Anorm * dq) * eps + errncw;
206:         errz = PetscSqrtReal(mnz * sqn * Anorm * dq) * eps;
207:       } else if (replace == 1) {
208:         errrprev = errr;
209:         errr     = PetscSqrtReal((mnz * sqn + 1) * Anorm * dx + db) * eps;
210:         errs     = PetscSqrtReal(mnz * sqn * Anorm * dpp) * eps;
211:         errw     = PetscSqrtReal(mnz * sqn * Anorm * du) * eps;
212:         errz     = PetscSqrtReal(mnz * sqn * Anorm * dq) * eps;
213:         replace  = 0;
214:       } else {
215:         errrprev = errr;
216:         errr     = errr + PetscAbsScalar(alphap) * PetscAbsScalar(betap) * errs + PetscAbsScalar(alphap) * errw + errncr + PetscAbsScalar(alphap) * errncs;
217:         errs     = errw + PetscAbsScalar(betap) * errs + errncs;
218:         errw     = errw + PetscAbsScalar(alphap) * PetscAbsScalar(betap) * errz + errncw + PetscAbsScalar(alphap) * errncz;
219:         errz     = PetscAbsScalar(betap) * errz + errncz;
220:       }
221:       if (i > 1 && errrprev <= (tol * rnp) && errr > (tol * dp)) {
222:         PetscCall(KSP_MatMult(ksp, Amat, X, R)); /*  r <- Ax - b  */
223:         PetscCall(VecAYPX(R, -1.0, B));
224:         PetscCall(KSP_PCApply(ksp, R, U));       /*  u <- Br  */
225:         PetscCall(KSP_MatMult(ksp, Amat, U, W)); /*  w <- Au  */
226:         PetscCall(KSP_MatMult(ksp, Amat, P, S)); /*  s <- Ap  */
227:         PetscCall(KSP_PCApply(ksp, S, Q));       /*  q <- Bs  */
228:         PetscCall(KSP_MatMult(ksp, Amat, Q, Z)); /*  z <- Aq  */
229:         replace = 1;
230:       }
231:     }

233:     i++;
234:     ksp->its = i;

236:   } while (i <= ksp->max_it);
237:   if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
238:   PetscFunctionReturn(PETSC_SUCCESS);
239: }

241: /*MC
242:    KSPPIPECGRR - Pipelined conjugate gradient method with automated residual replacements {cite}`cools2018analyzing`. [](sec_pipelineksp)

244:    Level: intermediate

246:    Notes:
247:    This method has only a single non-blocking reduction per iteration, compared to 2 blocking for standard `KSPCG`.  The
248:    non-blocking reduction is overlapped by the matrix-vector product and preconditioner application.

250:    `KSPPIPECGRR` improves the robustness of `KSPPIPECG` by adding an automated residual replacement strategy.
251:    True residual and other auxiliary variables are computed explicitly in a number of dynamically determined
252:    iterations to counteract the accumulation of rounding errors and thus attain a higher maximal final accuracy.

254:    See also `KSPPIPECG`, which is identical to `KSPPIPECGRR` without residual replacements.
255:    See also `KSPPIPECR`, where the reduction is only overlapped with the matrix-vector product.

257:    MPI configuration may be necessary for reductions to make asynchronous progress, which is important for
258:    performance of pipelined methods. See [](doc_faq_pipelined)

260:    Contributed by:
261:    Siegfried Cools, Universiteit Antwerpen, Dept. Mathematics & Computer Science,
262:    European FP7 Project on EXascale Algorithms and Advanced Computational Techniques (EXA2CT) / Research Foundation Flanders (FWO)

264: .seealso: [](ch_ksp), [](doc_faq_pipelined), [](sec_pipelineksp), `KSPCreate()`, `KSPSetType()`, `KSPPIPECR`, `KSPGROPPCG`, `KSPPIPECG`, `KSPPGMRES`, `KSPCG`, `KSPPIPEBCGS`, `KSPCGUseSingleReduction()`
265: M*/
266: PETSC_EXTERN PetscErrorCode KSPCreate_PIPECGRR(KSP ksp)
267: {
268:   PetscFunctionBegin;
269:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_LEFT, 2));
270:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 2));
271:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NATURAL, PC_LEFT, 2));
272:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_LEFT, 1));

274:   ksp->ops->setup          = KSPSetUp_PIPECGRR;
275:   ksp->ops->solve          = KSPSolve_PIPECGRR;
276:   ksp->ops->destroy        = KSPDestroyDefault;
277:   ksp->ops->view           = NULL;
278:   ksp->ops->setfromoptions = NULL;
279:   ksp->ops->buildsolution  = KSPBuildSolutionDefault;
280:   ksp->ops->buildresidual  = KSPBuildResidualDefault;
281:   PetscFunctionReturn(PETSC_SUCCESS);
282: }