Actual source code: pipecr.c
petsc-3.14.6 2021-03-30
1: #include <petsc/private/kspimpl.h>
3: /*
4: KSPSetUp_PIPECR - Sets up the workspace needed by the PIPECR method.
6: This is called once, usually automatically by KSPSolve() or KSPSetUp()
7: but can be called directly by KSPSetUp()
8: */
9: static PetscErrorCode KSPSetUp_PIPECR(KSP ksp)
10: {
14: /* get work vectors needed by PIPECR */
15: KSPSetWorkVecs(ksp,7);
16: return(0);
17: }
19: /*
20: KSPSolve_PIPECR - This routine actually applies the pipelined conjugate residual method
21: */
22: static PetscErrorCode KSPSolve_PIPECR(KSP ksp)
23: {
25: PetscInt i;
26: PetscScalar alpha=0.0,beta=0.0,gamma,gammaold=0.0,delta;
27: PetscReal dp = 0.0;
28: Vec X,B,Z,P,W,Q,U,M,N;
29: Mat Amat,Pmat;
30: PetscBool diagonalscale;
33: PCGetDiagonalScale(ksp->pc,&diagonalscale);
34: if (diagonalscale) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"Krylov method %s does not support diagonal scaling",((PetscObject)ksp)->type_name);
36: X = ksp->vec_sol;
37: B = ksp->vec_rhs;
38: M = ksp->work[0];
39: Z = ksp->work[1];
40: P = ksp->work[2];
41: N = ksp->work[3];
42: W = ksp->work[4];
43: Q = ksp->work[5];
44: U = ksp->work[6];
46: PCGetOperators(ksp->pc,&Amat,&Pmat);
48: ksp->its = 0;
49: /* we don't have an R vector, so put the (unpreconditioned) residual in w for now */
50: if (!ksp->guess_zero) {
51: KSP_MatMult(ksp,Amat,X,W); /* w <- b - Ax */
52: VecAYPX(W,-1.0,B);
53: } else {
54: VecCopy(B,W); /* w <- b (x is 0) */
55: }
56: KSP_PCApply(ksp,W,U); /* u <- Bw */
58: switch (ksp->normtype) {
59: case KSP_NORM_PRECONDITIONED:
60: VecNormBegin(U,NORM_2,&dp); /* dp <- u'*u = e'*A'*B'*B*A'*e' */
61: PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)U));
62: KSP_MatMult(ksp,Amat,U,W); /* w <- Au */
63: VecNormEnd(U,NORM_2,&dp);
64: break;
65: case KSP_NORM_NONE:
66: KSP_MatMult(ksp,Amat,U,W);
67: dp = 0.0;
68: break;
69: default: SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"%s",KSPNormTypes[ksp->normtype]);
70: }
71: KSPLogResidualHistory(ksp,dp);
72: KSPMonitor(ksp,0,dp);
73: ksp->rnorm = dp;
74: (*ksp->converged)(ksp,0,dp,&ksp->reason,ksp->cnvP); /* test for convergence */
75: if (ksp->reason) return(0);
77: i = 0;
78: do {
79: KSP_PCApply(ksp,W,M); /* m <- Bw */
81: if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
82: VecNormBegin(U,NORM_2,&dp);
83: }
84: VecDotBegin(W,U,&gamma);
85: VecDotBegin(M,W,&delta);
86: PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)U));
88: KSP_MatMult(ksp,Amat,M,N); /* n <- Am */
90: if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
91: VecNormEnd(U,NORM_2,&dp);
92: }
93: VecDotEnd(W,U,&gamma);
94: VecDotEnd(M,W,&delta);
96: if (i > 0) {
97: if (ksp->normtype == KSP_NORM_NONE) dp = 0.0;
98: ksp->rnorm = dp;
99: KSPLogResidualHistory(ksp,dp);
100: KSPMonitor(ksp,i,dp);
101: (*ksp->converged)(ksp,i,dp,&ksp->reason,ksp->cnvP);
102: if (ksp->reason) return(0);
103: }
105: if (i == 0) {
106: alpha = gamma / delta;
107: VecCopy(N,Z); /* z <- n */
108: VecCopy(M,Q); /* q <- m */
109: VecCopy(U,P); /* p <- u */
110: } else {
111: beta = gamma / gammaold;
112: alpha = gamma / (delta - beta / alpha * gamma);
113: VecAYPX(Z,beta,N); /* z <- n + beta * z */
114: VecAYPX(Q,beta,M); /* q <- m + beta * q */
115: VecAYPX(P,beta,U); /* p <- u + beta * p */
116: }
117: VecAXPY(X, alpha,P); /* x <- x + alpha * p */
118: VecAXPY(U,-alpha,Q); /* u <- u - alpha * q */
119: VecAXPY(W,-alpha,Z); /* w <- w - alpha * z */
120: gammaold = gamma;
121: i++;
122: ksp->its = i;
124: /* if (i%50 == 0) { */
125: /* KSP_MatMult(ksp,Amat,X,W); /\* w <- b - Ax *\/ */
126: /* VecAYPX(W,-1.0,B); */
127: /* KSP_PCApply(ksp,W,U); */
128: /* KSP_MatMult(ksp,Amat,U,W); */
129: /* } */
131: } while (i<=ksp->max_it);
132: if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
133: return(0);
134: }
136: /*MC
137: KSPPIPECR - Pipelined conjugate residual method
139: This method has only a single non-blocking reduction per iteration, compared to 2 blocking for standard CR. The
140: non-blocking reduction is overlapped by the matrix-vector product, but not the preconditioner application.
142: See also KSPPIPECG, where the reduction is only overlapped with the matrix-vector product.
144: Level: intermediate
146: Notes:
147: MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods.
148: See the FAQ on the PETSc website for details.
150: Contributed by:
151: Pieter Ghysels, Universiteit Antwerpen, Intel Exascience lab Flanders
153: Reference:
154: P. Ghysels and W. Vanroose, "Hiding global synchronization latency in the preconditioned Conjugate Gradient algorithm",
155: Submitted to Parallel Computing, 2012.
157: .seealso: KSPCreate(), KSPSetType(), KSPPIPECG, KSPGROPPCG, KSPPGMRES, KSPCG, KSPCGUseSingleReduction()
158: M*/
160: PETSC_EXTERN PetscErrorCode KSPCreate_PIPECR(KSP ksp)
161: {
165: KSPSetSupportedNorm(ksp,KSP_NORM_PRECONDITIONED,PC_LEFT,2);
166: KSPSetSupportedNorm(ksp,KSP_NORM_NONE,PC_LEFT,1);
168: ksp->ops->setup = KSPSetUp_PIPECR;
169: ksp->ops->solve = KSPSolve_PIPECR;
170: ksp->ops->destroy = KSPDestroyDefault;
171: ksp->ops->view = NULL;
172: ksp->ops->setfromoptions = NULL;
173: ksp->ops->buildsolution = KSPBuildSolutionDefault;
174: ksp->ops->buildresidual = KSPBuildResidualDefault;
175: return(0);
176: }