Actual source code: pipecr.c
petsc-3.9.4 2018-09-11
1: #include <petsc/private/kspimpl.h>
3: /*
4: KSPSetUp_PIPECR - Sets up the workspace needed by the PIPECR method.
6: This is called once, usually automatically by KSPSolve() or KSPSetUp()
7: but can be called directly by KSPSetUp()
8: */
9: static PetscErrorCode KSPSetUp_PIPECR(KSP ksp)
10: {
14: /* get work vectors needed by PIPECR */
15: KSPSetWorkVecs(ksp,7);
16: return(0);
17: }
19: /*
20: KSPSolve_PIPECR - This routine actually applies the pipelined conjugate residual method
22: Input Parameter:
23: . ksp - the Krylov space object that was set to use conjugate gradient, by, for
24: example, KSPCreate(MPI_Comm,KSP *ksp); KSPSetType(ksp,KSPCG);
25: */
26: static PetscErrorCode KSPSolve_PIPECR(KSP ksp)
27: {
29: PetscInt i;
30: PetscScalar alpha=0.0,beta=0.0,gamma,gammaold=0.0,delta;
31: PetscReal dp = 0.0;
32: Vec X,B,Z,P,W,Q,U,M,N;
33: Mat Amat,Pmat;
34: PetscBool diagonalscale;
37: PCGetDiagonalScale(ksp->pc,&diagonalscale);
38: if (diagonalscale) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"Krylov method %s does not support diagonal scaling",((PetscObject)ksp)->type_name);
40: X = ksp->vec_sol;
41: B = ksp->vec_rhs;
42: M = ksp->work[0];
43: Z = ksp->work[1];
44: P = ksp->work[2];
45: N = ksp->work[3];
46: W = ksp->work[4];
47: Q = ksp->work[5];
48: U = ksp->work[6];
50: PCGetOperators(ksp->pc,&Amat,&Pmat);
52: ksp->its = 0;
53: /* we don't have an R vector, so put the (unpreconditioned) residual in w for now */
54: if (!ksp->guess_zero) {
55: KSP_MatMult(ksp,Amat,X,W); /* w <- b - Ax */
56: VecAYPX(W,-1.0,B);
57: } else {
58: VecCopy(B,W); /* w <- b (x is 0) */
59: }
60: KSP_PCApply(ksp,W,U); /* u <- Bw */
62: switch (ksp->normtype) {
63: case KSP_NORM_PRECONDITIONED:
64: VecNormBegin(U,NORM_2,&dp); /* dp <- u'*u = e'*A'*B'*B*A'*e' */
65: PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)U));
66: KSP_MatMult(ksp,Amat,U,W); /* w <- Au */
67: VecNormEnd(U,NORM_2,&dp);
68: break;
69: case KSP_NORM_NONE:
70: KSP_MatMult(ksp,Amat,U,W);
71: dp = 0.0;
72: break;
73: default: SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"%s",KSPNormTypes[ksp->normtype]);
74: }
75: KSPLogResidualHistory(ksp,dp);
76: KSPMonitor(ksp,0,dp);
77: ksp->rnorm = dp;
78: (*ksp->converged)(ksp,0,dp,&ksp->reason,ksp->cnvP); /* test for convergence */
79: if (ksp->reason) return(0);
81: i = 0;
82: do {
83: KSP_PCApply(ksp,W,M); /* m <- Bw */
85: if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
86: VecNormBegin(U,NORM_2,&dp);
87: }
88: VecDotBegin(W,U,&gamma);
89: VecDotBegin(M,W,&delta);
90: PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)U));
92: KSP_MatMult(ksp,Amat,M,N); /* n <- Am */
94: if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
95: VecNormEnd(U,NORM_2,&dp);
96: }
97: VecDotEnd(W,U,&gamma);
98: VecDotEnd(M,W,&delta);
100: if (i > 0) {
101: if (ksp->normtype == KSP_NORM_NONE) dp = 0.0;
102: ksp->rnorm = dp;
103: KSPLogResidualHistory(ksp,dp);
104: KSPMonitor(ksp,i,dp);
105: (*ksp->converged)(ksp,i,dp,&ksp->reason,ksp->cnvP);
106: if (ksp->reason) break;
107: }
109: if (i == 0) {
110: alpha = gamma / delta;
111: VecCopy(N,Z); /* z <- n */
112: VecCopy(M,Q); /* q <- m */
113: VecCopy(U,P); /* p <- u */
114: } else {
115: beta = gamma / gammaold;
116: alpha = gamma / (delta - beta / alpha * gamma);
117: VecAYPX(Z,beta,N); /* z <- n + beta * z */
118: VecAYPX(Q,beta,M); /* q <- m + beta * q */
119: VecAYPX(P,beta,U); /* p <- u + beta * p */
120: }
121: VecAXPY(X, alpha,P); /* x <- x + alpha * p */
122: VecAXPY(U,-alpha,Q); /* u <- u - alpha * q */
123: VecAXPY(W,-alpha,Z); /* w <- w - alpha * z */
124: gammaold = gamma;
125: i++;
126: ksp->its = i;
128: /* if (i%50 == 0) { */
129: /* KSP_MatMult(ksp,Amat,X,W); /\* w <- b - Ax *\/ */
130: /* VecAYPX(W,-1.0,B); */
131: /* KSP_PCApply(ksp,W,U); */
132: /* KSP_MatMult(ksp,Amat,U,W); */
133: /* } */
135: } while (i<ksp->max_it);
136: if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
137: return(0);
138: }
140: /*MC
141: KSPPIPECR - Pipelined conjugate residual method
143: This method has only a single non-blocking reduction per iteration, compared to 2 blocking for standard CR. The
144: non-blocking reduction is overlapped by the matrix-vector product, but not the preconditioner application.
146: See also KSPPIPECG, where the reduction is only overlapped with the matrix-vector product.
148: Level: intermediate
150: Notes:
151: MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods.
152: See the FAQ on the PETSc website for details.
154: Contributed by:
155: Pieter Ghysels, Universiteit Antwerpen, Intel Exascience lab Flanders
157: Reference:
158: P. Ghysels and W. Vanroose, "Hiding global synchronization latency in the preconditioned Conjugate Gradient algorithm",
159: Submitted to Parallel Computing, 2012.
161: .seealso: KSPCreate(), KSPSetType(), KSPPIPECG, KSPGROPPCG, KSPPGMRES, KSPCG, KSPCGUseSingleReduction()
162: M*/
164: PETSC_EXTERN PetscErrorCode KSPCreate_PIPECR(KSP ksp)
165: {
169: KSPSetSupportedNorm(ksp,KSP_NORM_PRECONDITIONED,PC_LEFT,2);
170: KSPSetSupportedNorm(ksp,KSP_NORM_NONE,PC_LEFT,1);
172: ksp->ops->setup = KSPSetUp_PIPECR;
173: ksp->ops->solve = KSPSolve_PIPECR;
174: ksp->ops->destroy = KSPDestroyDefault;
175: ksp->ops->view = 0;
176: ksp->ops->setfromoptions = 0;
177: ksp->ops->buildsolution = KSPBuildSolutionDefault;
178: ksp->ops->buildresidual = KSPBuildResidualDefault;
179: return(0);
180: }