Actual source code: pipecg.c
petsc-3.4.0 2013-05-13
1: /*
2: author: Pieter Ghysels, Universiteit Antwerpen, Intel Exascience lab Flanders
4: This file implements a preconditioned pipelined CG. There is only a single
5: non-blocking reduction per iteration, compared to 2 blocking for standard CG.
6: The non-blocking reduction is overlapped by the matrix-vector product.
8: See "Hiding global synchronization latency in the
9: preconditioned Conjugate Gradient algorithm", P. Ghysels and W. Vanroose.
10: Submitted to Parallel Computing, 2012
12: See also pipecr.c, where the reduction is only overlapped with
13: the matrix-vector product.
15: */
17: #include <petsc-private/kspimpl.h>
19: /*
20: KSPSetUp_PIPECG - Sets up the workspace needed by the PIPECG method.
22: This is called once, usually automatically by KSPSolve() or KSPSetUp()
23: but can be called directly by KSPSetUp()
24: */
27: PetscErrorCode KSPSetUp_PIPECG(KSP ksp)
28: {
32: /* get work vectors needed by PIPECG */
33: KSPSetWorkVecs(ksp,9);
34: return(0);
35: }
37: /*
38: KSPSolve_PIPECG - This routine actually applies the pipelined conjugate gradient method
40: Input Parameter:
41: . ksp - the Krylov space object that was set to use conjugate gradient, by, for
42: example, KSPCreate(MPI_Comm,KSP *ksp); KSPSetType(ksp,KSPCG);
43: */
46: PetscErrorCode KSPSolve_PIPECG(KSP ksp)
47: {
49: PetscInt i;
50: PetscScalar alpha = 0.0,beta = 0.0,gamma = 0.0,gammaold = 0.0,delta = 0.0;
51: PetscReal dp = 0.0;
52: Vec X,B,Z,P,W,Q,U,M,N,R,S;
53: Mat Amat,Pmat;
54: MatStructure pflag;
55: PetscBool diagonalscale;
58: PCGetDiagonalScale(ksp->pc,&diagonalscale);
59: if (diagonalscale) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"Krylov method %s does not support diagonal scaling",((PetscObject)ksp)->type_name);
61: X = ksp->vec_sol;
62: B = ksp->vec_rhs;
63: M = ksp->work[0];
64: Z = ksp->work[1];
65: P = ksp->work[2];
66: N = ksp->work[3];
67: W = ksp->work[4];
68: Q = ksp->work[5];
69: U = ksp->work[6];
70: R = ksp->work[7];
71: S = ksp->work[8];
73: PCGetOperators(ksp->pc,&Amat,&Pmat,&pflag);
75: ksp->its = 0;
76: if (!ksp->guess_zero) {
77: KSP_MatMult(ksp,Amat,X,R); /* r <- b - Ax */
78: VecAYPX(R,-1.0,B);
79: } else {
80: VecCopy(B,R); /* r <- b (x is 0) */
81: }
83: KSP_PCApply(ksp,R,U); /* u <- Br */
85: switch (ksp->normtype) {
86: case KSP_NORM_PRECONDITIONED:
87: VecNormBegin(U,NORM_2,&dp); /* dp <- u'*u = e'*A'*B'*B*A'*e' */
88: PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)U));
89: KSP_MatMult(ksp,Amat,U,W); /* w <- Au */
90: VecNormEnd(U,NORM_2,&dp);
91: break;
92: case KSP_NORM_UNPRECONDITIONED:
93: VecNormBegin(R,NORM_2,&dp); /* dp <- r'*r = e'*A'*A*e */
94: PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R));
95: KSP_MatMult(ksp,Amat,U,W); /* w <- Au */
96: VecNormEnd(R,NORM_2,&dp);
97: break;
98: case KSP_NORM_NATURAL:
99: VecDotBegin(R,U,&gamma); /* gamma <- u'*r */
100: PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R));
101: KSP_MatMult(ksp,Amat,U,W); /* w <- Au */
102: VecDotEnd(R,U,&gamma);
103: if (PetscIsInfOrNanScalar(gamma)) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_FP,"Infinite or not-a-number generated in dot product");
104: dp = PetscSqrtReal(PetscAbsScalar(gamma)); /* dp <- r'*u = r'*B*r = e'*A'*B*A*e */
105: break;
106: case KSP_NORM_NONE:
107: KSP_MatMult(ksp,Amat,U,W);
108: dp = 0.0;
109: break;
110: default: SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"%s",KSPNormTypes[ksp->normtype]);
111: }
112: KSPLogResidualHistory(ksp,dp);
113: KSPMonitor(ksp,0,dp);
114: ksp->rnorm = dp;
115: (*ksp->converged)(ksp,0,dp,&ksp->reason,ksp->cnvP); /* test for convergence */
116: if (ksp->reason) return(0);
118: i = 0;
119: do {
120: if (i > 0 && ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
121: VecNormBegin(R,NORM_2,&dp);
122: } else if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
123: VecNormBegin(U,NORM_2,&dp);
124: } else if (!(i == 0 && ksp->normtype == KSP_NORM_NATURAL)) {
125: VecDotBegin(R,U,&gamma);
126: }
127: VecDotBegin(W,U,&delta);
128: PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R));
130: KSP_PCApply(ksp,W,M); /* m <- Bw */
131: KSP_MatMult(ksp,Amat,M,N); /* n <- Am */
133: if (i > 0 && ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
134: VecNormEnd(R,NORM_2,&dp);
135: } else if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
136: VecNormEnd(U,NORM_2,&dp);
137: } else if (!(i == 0 && ksp->normtype == KSP_NORM_NATURAL)) {
138: VecDotEnd(R,U,&gamma);
139: }
140: VecDotEnd(W,U,&delta);
142: if (i > 0) {
143: if (ksp->normtype == KSP_NORM_NATURAL) dp = PetscSqrtReal(PetscAbsScalar(gamma));
144: else if (ksp->normtype == KSP_NORM_NONE) dp = 0.0;
146: ksp->rnorm = dp;
147: KSPLogResidualHistory(ksp,dp);
148: KSPMonitor(ksp,i,dp);
149: (*ksp->converged)(ksp,i,dp,&ksp->reason,ksp->cnvP);
150: if (ksp->reason) break;
151: }
153: if (i == 0) {
154: alpha = gamma / delta;
155: VecCopy(N,Z); /* z <- n */
156: VecCopy(M,Q); /* q <- m */
157: VecCopy(U,P); /* p <- u */
158: VecCopy(W,S); /* s <- w */
159: } else {
160: beta = gamma / gammaold;
161: alpha = gamma / (delta - beta / alpha * gamma);
162: VecAYPX(Z,beta,N); /* z <- n + beta * z */
163: VecAYPX(Q,beta,M); /* q <- m + beta * q */
164: VecAYPX(P,beta,U); /* p <- u + beta * p */
165: VecAYPX(S,beta,W); /* s <- w + beta * s */
166: }
167: VecAXPY(X, alpha,P); /* x <- x + alpha * p */
168: VecAXPY(U,-alpha,Q); /* u <- u - alpha * q */
169: VecAXPY(W,-alpha,Z); /* w <- w - alpha * z */
170: VecAXPY(R,-alpha,S); /* r <- r - alpha * s */
171: gammaold = gamma;
172: i++;
173: ksp->its = i;
175: /* if (i%50 == 0) { */
176: /* KSP_MatMult(ksp,Amat,X,R); /\* w <- b - Ax *\/ */
177: /* VecAYPX(R,-1.0,B); */
178: /* KSP_PCApply(ksp,R,U); */
179: /* KSP_MatMult(ksp,Amat,U,W); */
180: /* } */
182: } while (i<ksp->max_it);
183: if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
184: return(0);
185: }
190: PETSC_EXTERN PetscErrorCode KSPCreate_PIPECG(KSP ksp)
191: {
195: KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_LEFT,1);
196: KSPSetSupportedNorm(ksp,KSP_NORM_PRECONDITIONED,PC_LEFT,1);
197: KSPSetSupportedNorm(ksp,KSP_NORM_NATURAL,PC_LEFT,1);
198: KSPSetSupportedNorm(ksp,KSP_NORM_NONE,PC_LEFT,1);
200: ksp->ops->setup = KSPSetUp_PIPECG;
201: ksp->ops->solve = KSPSolve_PIPECG;
202: ksp->ops->destroy = KSPDestroyDefault;
203: ksp->ops->view = 0;
204: ksp->ops->setfromoptions = 0;
205: ksp->ops->buildsolution = KSPBuildSolutionDefault;
206: ksp->ops->buildresidual = KSPBuildResidualDefault;
207: return(0);
208: }