Actual source code: pipeprcg.c
  1: #include <petsc/private/kspimpl.h>

  3: typedef struct KSP_CG_PIPE_PR_s KSP_CG_PIPE_PR;
  4: struct KSP_CG_PIPE_PR_s {
  5:   PetscBool rc_w_q; /* flag to determine whether w_k should be recomputed with A r_k */
  6: };

  8: /*
  9:      KSPSetUp_PIPEPRCG - Sets up the workspace needed by the PIPEPRCG method.

 11:       This is called once, usually automatically by KSPSolve() or KSPSetUp()
 12:      but can be called directly by KSPSetUp()
 13: */
 14: static PetscErrorCode KSPSetUp_PIPEPRCG(KSP ksp)
 15: {
 16:   PetscFunctionBegin;
 17:   /* get work vectors needed by PIPEPRCG */
 18:   PetscCall(KSPSetWorkVecs(ksp, 9));

 20:   PetscFunctionReturn(PETSC_SUCCESS);
 21: }

 23: static PetscErrorCode KSPSetFromOptions_PIPEPRCG(KSP ksp, PetscOptionItems *PetscOptionsObject)
 24: {
 25:   KSP_CG_PIPE_PR *prcg = (KSP_CG_PIPE_PR *)ksp->data;
 26:   PetscBool       flag = PETSC_FALSE;

 28:   PetscFunctionBegin;
 29:   PetscOptionsHeadBegin(PetscOptionsObject, "KSP PIPEPRCG options");
 30:   PetscCall(PetscOptionsBool("-recompute_w", "-recompute w_k with Ar_k? (default = True)", "", prcg->rc_w_q, &prcg->rc_w_q, &flag));
 31:   if (!flag) prcg->rc_w_q = PETSC_TRUE;
 32:   PetscOptionsHeadEnd();
 33:   PetscFunctionReturn(PETSC_SUCCESS);
 34: }

 36: /*
 37:  KSPSolve_PIPEPRCG - This routine actually applies the pipelined predict and recompute conjugate gradient method
 38: */
 39: static PetscErrorCode KSPSolve_PIPEPRCG(KSP ksp)
 40: {
 41:   PetscInt        i;
 42:   KSP_CG_PIPE_PR *prcg  = (KSP_CG_PIPE_PR *)ksp->data;
 43:   PetscScalar     alpha = 0.0, beta = 0.0, nu = 0.0, nu_old = 0.0, mudelgam[3], *mu_p, *delta_p, *gamma_p;
 44:   PetscReal       dp = 0.0;
 45:   Vec             X, B, R, RT, W, WT, P, S, ST, U, UT, PRTST[3];
 46:   Mat             Amat, Pmat;
 47:   PetscBool       diagonalscale, rc_w_q = prcg->rc_w_q;

 49:   /* note that these are pointers to entries of muldelgam, different than nu */
 50:   mu_p    = &mudelgam[0];
 51:   delta_p = &mudelgam[1];
 52:   gamma_p = &mudelgam[2];

 54:   PetscFunctionBegin;

 56:   PetscCall(PCGetDiagonalScale(ksp->pc, &diagonalscale));
 57:   PetscCheck(!diagonalscale, PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "Krylov method %s does not support diagonal scaling", ((PetscObject)ksp)->type_name);

 59:   X  = ksp->vec_sol;
 60:   B  = ksp->vec_rhs;
 61:   R  = ksp->work[0];
 62:   RT = ksp->work[1];
 63:   W  = ksp->work[2];
 64:   WT = ksp->work[3];
 65:   P  = ksp->work[4];
 66:   S  = ksp->work[5];
 67:   ST = ksp->work[6];
 68:   U  = ksp->work[7];
 69:   UT = ksp->work[8];

 71:   PetscCall(PCGetOperators(ksp->pc, &Amat, &Pmat));

 73:   /* initialize */
 74:   ksp->its = 0;
 75:   if (!ksp->guess_zero) {
 76:     PetscCall(KSP_MatMult(ksp, Amat, X, R)); /*   r <- b - Ax  */
 77:     PetscCall(VecAYPX(R, -1.0, B));
 78:   } else {
 79:     PetscCall(VecCopy(B, R)); /*   r <- b       */
 80:   }

 82:   PetscCall(KSP_PCApply(ksp, R, RT));       /*   rt <- Br     */
 83:   PetscCall(KSP_MatMult(ksp, Amat, RT, W)); /*   w <- A rt    */
 84:   PetscCall(KSP_PCApply(ksp, W, WT));       /*   wt <- B w    */

 86:   PetscCall(VecCopy(RT, P));  /*   p <- rt      */
 87:   PetscCall(VecCopy(W, S));   /*   p <- rt      */
 88:   PetscCall(VecCopy(WT, ST)); /*   p <- rt      */

 90:   PetscCall(KSP_MatMult(ksp, Amat, ST, U)); /*   u <- Ast     */
 91:   PetscCall(KSP_PCApply(ksp, U, UT));       /*   ut <- Bu     */

 93:   PetscCall(VecDotBegin(RT, R, &nu));
 94:   PetscCall(VecDotBegin(P, S, mu_p));
 95:   PetscCall(VecDotBegin(ST, S, gamma_p));

 97:   PetscCall(VecDotEnd(RT, R, &nu));     /*   nu    <- (rt,r)  */
 98:   PetscCall(VecDotEnd(P, S, mu_p));     /*   mu    <- (p,s)   */
 99:   PetscCall(VecDotEnd(ST, S, gamma_p)); /*   gamma <- (st,s)  */
100:   *delta_p = *mu_p;

102:   i = 0;
103:   do {
104:     /* Compute appropriate norm */
105:     switch (ksp->normtype) {
106:     case KSP_NORM_PRECONDITIONED:
107:       PetscCall(VecNormBegin(RT, NORM_2, &dp));
108:       PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)RT)));
109:       PetscCall(VecNormEnd(RT, NORM_2, &dp));
110:       break;
111:     case KSP_NORM_UNPRECONDITIONED:
112:       PetscCall(VecNormBegin(R, NORM_2, &dp));
113:       PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R)));
114:       PetscCall(VecNormEnd(R, NORM_2, &dp));
115:       break;
116:     case KSP_NORM_NATURAL:
117:       dp = PetscSqrtReal(PetscAbsScalar(nu));
118:       break;
119:     case KSP_NORM_NONE:
120:       dp = 0.0;
121:       break;
122:     default:
123:       SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "%s", KSPNormTypes[ksp->normtype]);
124:     }

126:     ksp->rnorm = dp;
127:     PetscCall(KSPLogResidualHistory(ksp, dp));
128:     PetscCall(KSPMonitor(ksp, i, dp));
129:     PetscCall((*ksp->converged)(ksp, i, dp, &ksp->reason, ksp->cnvP));
130:     if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);

132:     /* update scalars */
133:     alpha  = nu / *mu_p;
134:     nu_old = nu;
135:     nu     = nu_old - 2. * alpha * (*delta_p) + (alpha * alpha) * (*gamma_p);
136:     beta   = nu / nu_old;

138:     /* update vectors */
139:     PetscCall(VecAXPY(X, alpha, P));    /*   x  <- x  + alpha * p   */
140:     PetscCall(VecAXPY(R, -alpha, S));   /*   r  <- r  - alpha * s   */
141:     PetscCall(VecAXPY(RT, -alpha, ST)); /*   rt <- rt - alpha * st  */
142:     PetscCall(VecAXPY(W, -alpha, U));   /*   w  <- w  - alpha * u   */
143:     PetscCall(VecAXPY(WT, -alpha, UT)); /*   wt <- wt - alpha * ut  */
144:     PetscCall(VecAYPX(P, beta, RT));    /*   p  <- rt + beta  * p   */
145:     PetscCall(VecAYPX(S, beta, W));     /*   s  <- w  + beta  * s   */
146:     PetscCall(VecAYPX(ST, beta, WT));   /*   st <- wt + beta  * st  */

148:     PetscCall(VecDotBegin(RT, R, &nu));

150:     PRTST[0] = P;
151:     PRTST[1] = RT;
152:     PRTST[2] = ST;

154:     PetscCall(VecMDotBegin(S, 3, PRTST, mudelgam));

156:     PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R)));

158:     PetscCall(KSP_MatMult(ksp, Amat, ST, U)); /*   u  <- A st             */
159:     PetscCall(KSP_PCApply(ksp, U, UT));       /*   ut <- B u              */

161:     /* predict-and-recompute */
162:     /* ideally this is combined with the previous matvec; i.e. equivalent of MDot */
163:     if (rc_w_q) {
164:       PetscCall(KSP_MatMult(ksp, Amat, RT, W)); /*   w  <- A rt             */
165:       PetscCall(KSP_PCApply(ksp, W, WT));       /*   wt <- B w              */
166:     }

168:     PetscCall(VecDotEnd(RT, R, &nu));
169:     PetscCall(VecMDotEnd(S, 3, PRTST, mudelgam));

171:     i++;
172:     ksp->its = i;

174:   } while (i <= ksp->max_it);
175:   if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
176:   PetscFunctionReturn(PETSC_SUCCESS);
177: }

179: /*MC
180:    KSPPIPEPRCG - Pipelined predict-and-recompute conjugate gradient method {cite}`chen2020predict`. [](sec_pipelineksp)

182:    Options Database Key:
183: .  -ksp_pipeprcg_recompute_w - recompute the $w_k$ with $Ar_k$, default is true

185:    Level: intermediate

187:    Notes:
188:    This method has only a single non-blocking reduction per iteration, compared to 2 blocking for standard `KSPCG`.
189:    The non-blocking reduction is overlapped by the matrix-vector product and preconditioner application.

191:    MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods.
192:    See [](doc_faq_pipelined)

194:    Contributed by:
195:    Tyler Chen, University of Washington, Applied Mathematics Department

197:    Acknowledgments:
198:    This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1762114.
199:    Any opinions, findings, and conclusions or recommendations expressed in this material are those of the author and do not necessarily
200:    reflect the views of the National Science Foundation.

202: .seealso: [](ch_ksp), [](doc_faq_pipelined), [](sec_pipelineksp), `KSPCreate()`, `KSPSetType()`, `KSPCG`, `KSPPIPECG`, `KSPPIPECR`, `KSPGROPPCG`, `KSPPGMRES`, `KSPCG`, `KSPCGUseSingleReduction()`
203: M*/
204: PETSC_EXTERN PetscErrorCode KSPCreate_PIPEPRCG(KSP ksp)
205: {
206:   KSP_CG_PIPE_PR *prcg = NULL;
207:   PetscBool       cite = PETSC_FALSE;

209:   PetscFunctionBegin;

211:   PetscCall(PetscCitationsRegister("@article{predict_and_recompute_cg,\n  author = {Tyler Chen and Erin C. Carson},\n  title = {Predict-and-recompute conjugate gradient variants},\n  journal = {},\n  year = {2020},\n  eprint = {1905.01549},\n  "
212:                                    "archivePrefix = {arXiv},\n  primaryClass = {cs.NA}\n}",
213:                                    &cite));

215:   PetscCall(PetscNew(&prcg));
216:   ksp->data = (void *)prcg;

218:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_LEFT, 2));
219:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 2));
220:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NATURAL, PC_LEFT, 2));
221:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_LEFT, 1));

223:   ksp->ops->setup          = KSPSetUp_PIPEPRCG;
224:   ksp->ops->solve          = KSPSolve_PIPEPRCG;
225:   ksp->ops->destroy        = KSPDestroyDefault;
226:   ksp->ops->view           = NULL;
227:   ksp->ops->setfromoptions = KSPSetFromOptions_PIPEPRCG;
228:   ksp->ops->buildsolution  = KSPBuildSolutionDefault;
229:   ksp->ops->buildresidual  = KSPBuildResidualDefault;
230:   PetscFunctionReturn(PETSC_SUCCESS);
231: }