Actual source code: pipebcgs.c


  2: /*
  3:     This file implements pipelined BiCGStab (pipe-BiCGStab).
  4:     Only allow right preconditioning.
  5: */
  6: #include <../src/ksp/ksp/impls/bcgs/bcgsimpl.h>

  8: static PetscErrorCode KSPSetUp_PIPEBCGS(KSP ksp)
  9: {
 10:   KSPSetWorkVecs(ksp, 15);
 11:   return 0;
 12: }

 14: /* Only need a few hacks from KSPSolve_BCGS */
 15: #include <petsc/private/pcimpl.h>
 16: static PetscErrorCode KSPSolve_PIPEBCGS(KSP ksp)
 17: {
 18:   PetscInt    i;
 19:   PetscScalar rho, rhoold, alpha, beta, omega = 0.0, d1, d2, d3;
 20:   Vec         X, B, S, R, RP, Y, Q, P2, Q2, R2, S2, W, Z, W2, Z2, T, V;
 21:   PetscReal   dp   = 0.0;
 22:   KSP_BCGS   *bcgs = (KSP_BCGS *)ksp->data;
 23:   PC          pc;

 25:   X  = ksp->vec_sol;
 26:   B  = ksp->vec_rhs;
 27:   R  = ksp->work[0];
 28:   RP = ksp->work[1];
 29:   S  = ksp->work[2];
 30:   Y  = ksp->work[3];
 31:   Q  = ksp->work[4];
 32:   Q2 = ksp->work[5];
 33:   P2 = ksp->work[6];
 34:   R2 = ksp->work[7];
 35:   S2 = ksp->work[8];
 36:   W  = ksp->work[9];
 37:   Z  = ksp->work[10];
 38:   W2 = ksp->work[11];
 39:   Z2 = ksp->work[12];
 40:   T  = ksp->work[13];
 41:   V  = ksp->work[14];

 43:   /* Only supports right preconditioning */
 45:   if (!ksp->guess_zero) {
 46:     if (!bcgs->guess) VecDuplicate(X, &bcgs->guess);
 47:     VecCopy(X, bcgs->guess);
 48:   } else {
 49:     VecSet(X, 0.0);
 50:   }

 52:   /* Compute initial residual */
 53:   KSPGetPC(ksp, &pc);
 54:   PCSetUp(pc);
 55:   if (!ksp->guess_zero) {
 56:     KSP_MatMult(ksp, pc->mat, X, Q2);
 57:     VecCopy(B, R);
 58:     VecAXPY(R, -1.0, Q2);
 59:   } else {
 60:     VecCopy(B, R);
 61:   }

 63:   /* Test for nothing to do */
 64:   if (ksp->normtype != KSP_NORM_NONE) {
 65:     VecNorm(R, NORM_2, &dp);
 66:   } else dp = 0.0;
 67:   PetscObjectSAWsTakeAccess((PetscObject)ksp);
 68:   ksp->its   = 0;
 69:   ksp->rnorm = dp;
 70:   PetscObjectSAWsGrantAccess((PetscObject)ksp);
 71:   KSPLogResidualHistory(ksp, dp);
 72:   KSPMonitor(ksp, 0, dp);
 73:   (*ksp->converged)(ksp, 0, dp, &ksp->reason, ksp->cnvP);
 74:   if (ksp->reason) return 0;

 76:   /* Initialize */
 77:   VecCopy(R, RP); /* rp <- r */

 79:   VecDotBegin(R, RP, &rho); /* rho <- (r,rp) */
 80:   PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R));
 81:   KSP_PCApply(ksp, R, R2);          /* r2 <- K r */
 82:   KSP_MatMult(ksp, pc->mat, R2, W); /* w <- A r2 */
 83:   VecDotEnd(R, RP, &rho);

 85:   VecDotBegin(W, RP, &d2); /* d2 <- (w,rp) */
 86:   PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)W));
 87:   KSP_PCApply(ksp, W, W2);          /* w2 <- K w */
 88:   KSP_MatMult(ksp, pc->mat, W2, T); /* t <- A w2 */
 89:   VecDotEnd(W, RP, &d2);

 91:   alpha = rho / d2;
 92:   beta  = 0.0;

 94:   /* Main loop */
 95:   i = 0;
 96:   do {
 97:     if (i == 0) {
 98:       VecCopy(R2, P2); /* p2 <- r2 */
 99:       VecCopy(W, S);   /* s  <- w  */
100:       VecCopy(W2, S2); /* s2 <- w2 */
101:       VecCopy(T, Z);   /* z  <- t  */
102:     } else {
103:       VecAXPBYPCZ(P2, 1.0, -beta * omega, beta, R2, S2); /* p2 <- beta * p2 + r2 - beta * omega * s2 */
104:       VecAXPBYPCZ(S, 1.0, -beta * omega, beta, W, Z);    /* s  <- beta * s  + w  - beta * omega * z  */
105:       VecAXPBYPCZ(S2, 1.0, -beta * omega, beta, W2, Z2); /* s2 <- beta * s2 + w2 - beta * omega * z2 */
106:       VecAXPBYPCZ(Z, 1.0, -beta * omega, beta, T, V);    /* z  <- beta * z  + t  - beta * omega * v  */
107:     }
108:     VecWAXPY(Q, -alpha, S, R);    /* q  <- r  - alpha s  */
109:     VecWAXPY(Q2, -alpha, S2, R2); /* q2 <- r2 - alpha s2 */
110:     VecWAXPY(Y, -alpha, Z, W);    /* y  <- w  - alpha z  */

112:     VecDotBegin(Q, Y, &d1); /* d1 <- (q,y) */
113:     VecDotBegin(Y, Y, &d2); /* d2 <- (y,y) */

115:     PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)Q));
116:     KSP_PCApply(ksp, Z, Z2);          /* z2 <- K z */
117:     KSP_MatMult(ksp, pc->mat, Z2, V); /* v <- A z2 */

119:     VecDotEnd(Q, Y, &d1);
120:     VecDotEnd(Y, Y, &d2);

122:     if (d2 == 0.0) {
123:       /* y is 0. if q is 0, then alpha s == r, and hence alpha p may be our solution. Give it a try? */
124:       VecDot(Q, Q, &d1);
125:       if (d1 != 0.0) {
126:         ksp->reason = KSP_DIVERGED_BREAKDOWN;
127:         break;
128:       }
129:       VecAXPY(X, alpha, P2); /* x <- x + alpha p2 */
130:       PetscObjectSAWsTakeAccess((PetscObject)ksp);
131:       ksp->its++;
132:       ksp->rnorm  = 0.0;
133:       ksp->reason = KSP_CONVERGED_RTOL;
134:       PetscObjectSAWsGrantAccess((PetscObject)ksp);
135:       KSPLogResidualHistory(ksp, dp);
136:       KSPMonitor(ksp, i + 1, 0.0);
137:       break;
138:     }
139:     omega = d1 / d2;                                      /* omega <- (y'q) / (y'y) */
140:     VecAXPBYPCZ(X, alpha, omega, 1.0, P2, Q2); /* x <- alpha * p2 + omega * q2 + x */
141:     VecWAXPY(R, -omega, Y, Q);                 /* r <- q - omega y */
142:     VecWAXPY(R2, -alpha, Z2, W2);              /* r2 <- w2 - alpha z2 */
143:     VecAYPX(R2, -omega, Q2);                   /* r2 <- q2 - omega r2 */
144:     VecWAXPY(W, -alpha, V, T);                 /* w <- t - alpha v */
145:     VecAYPX(W, -omega, Y);                     /* w <- y - omega w */
146:     rhoold = rho;

148:     if (ksp->normtype != KSP_NORM_NONE && ksp->chknorm < i + 2) { VecNormBegin(R, NORM_2, &dp); /* dp <- norm(r) */ }
149:     VecDotBegin(R, RP, &rho); /* rho <- (r,rp) */
150:     VecDotBegin(S, RP, &d1);  /* d1 <- (s,rp) */
151:     VecDotBegin(W, RP, &d2);  /* d2 <- (w,rp) */
152:     VecDotBegin(Z, RP, &d3);  /* d3 <- (z,rp) */

154:     PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R));
155:     KSP_PCApply(ksp, W, W2);          /* w2 <- K w */
156:     KSP_MatMult(ksp, pc->mat, W2, T); /* t <- A w2 */

158:     if (ksp->normtype != KSP_NORM_NONE && ksp->chknorm < i + 2) VecNormEnd(R, NORM_2, &dp);
159:     VecDotEnd(R, RP, &rho);
160:     VecDotEnd(S, RP, &d1);
161:     VecDotEnd(W, RP, &d2);
162:     VecDotEnd(Z, RP, &d3);


166:     beta  = (rho / rhoold) * (alpha / omega);
167:     alpha = rho / (d2 + beta * d1 - beta * omega * d3); /* alpha <- rho / (d2 + beta * d1 - beta * omega * d3) */

169:     PetscObjectSAWsTakeAccess((PetscObject)ksp);
170:     ksp->its++;

172:     /* Residual replacement step  */
173:     if (i > 0 && i % 100 == 0 && i < 1001) {
174:       KSP_MatMult(ksp, pc->mat, X, R);
175:       VecAYPX(R, -1.0, B);              /* r  <- b - Ax */
176:       KSP_PCApply(ksp, R, R2);          /* r2 <- K r */
177:       KSP_MatMult(ksp, pc->mat, R2, W); /* w  <- A r2 */
178:       KSP_PCApply(ksp, W, W2);          /* w2 <- K w */
179:       KSP_MatMult(ksp, pc->mat, W2, T); /* t  <- A w2 */
180:       KSP_MatMult(ksp, pc->mat, P2, S); /* s  <- A p2 */
181:       KSP_PCApply(ksp, S, S2);          /* s2 <- K s */
182:       KSP_MatMult(ksp, pc->mat, S2, Z); /* z  <- A s2 */
183:       KSP_PCApply(ksp, Z, Z2);          /* z2 <- K z */
184:       KSP_MatMult(ksp, pc->mat, Z2, V); /* v  <- A z2 */
185:     }

187:     ksp->rnorm = dp;
188:     PetscObjectSAWsGrantAccess((PetscObject)ksp);
189:     KSPLogResidualHistory(ksp, dp);
190:     KSPMonitor(ksp, i + 1, dp);
191:     (*ksp->converged)(ksp, i + 1, dp, &ksp->reason, ksp->cnvP);
192:     if (ksp->reason) break;
193:     if (rho == 0.0) {
194:       ksp->reason = KSP_DIVERGED_BREAKDOWN;
195:       break;
196:     }
197:     i++;
198:   } while (i < ksp->max_it);

200:   if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
201:   return 0;
202: }

204: /*MC
205:     KSPPIPEBCGS - Implements the pipelined BiCGStab method.

207:     This method has only two non-blocking reductions per iteration, compared to 3 blocking for standard FBCGS.  The
208:     non-blocking reductions are overlapped by matrix-vector products and preconditioner applications.

210:     Periodic residual replacement may be used to increase robustness and maximal attainable accuracy.

212:     Options Database Keys:
213:     see KSPSolve()

215:     Level: intermediate

217:     Notes:
218:     Like KSPFBCGS, the KSPPIPEBCGS implementation only allows for right preconditioning.
219:     MPI configuration may be necessary for reductions to make asynchronous progress, which is important for
220:     performance of pipelined methods. See the FAQ on the PETSc website for details.

222:     Contributed by:
223:     Siegfried Cools, Universiteit Antwerpen,
224:     EXA2CT European Project on EXascale Algorithms and Advanced Computational Techniques, 2016.

226:     Reference:
227:     S. Cools and W. Vanroose,
228:     "The communication-hiding pipelined BiCGStab method for the parallel solution of large unsymmetric linear systems",
229:     Parallel Computing, 65:1-20, 2017.

231: .seealso: `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `KSPBICG`, `KSPFBCGS`, `KSPFBCGSL`, `KSPSetPCSide()`
232: M*/
233: PETSC_EXTERN PetscErrorCode KSPCreate_PIPEBCGS(KSP ksp)
234: {
235:   KSP_BCGS *bcgs;

237:   PetscNew(&bcgs);

239:   ksp->data                = bcgs;
240:   ksp->ops->setup          = KSPSetUp_PIPEBCGS;
241:   ksp->ops->solve          = KSPSolve_PIPEBCGS;
242:   ksp->ops->destroy        = KSPDestroy_BCGS;
243:   ksp->ops->reset          = KSPReset_BCGS;
244:   ksp->ops->buildresidual  = KSPBuildResidualDefault;
245:   ksp->ops->setfromoptions = KSPSetFromOptions_BCGS;

247:   KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_RIGHT, 2);
248:   KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_RIGHT, 1);
249:   return 0;
250: }