Actual source code: pipecr.c
petsc-3.4.0 2013-05-13
1: /*
2: author: Pieter Ghysels, Universiteit Antwerpen, Intel Exascience lab Flanders
4: This file implements a preconditioned pipelined CR. There is only a single
5: non-blocking reduction per iteration, compared to 2 blocking for standard CR.
6: The non-blocking reduction is overlapped by the matrix-vector product.
8: See "Hiding global synchronization latency in the
9: preconditioned Conjugate Gradient algorithm", P. Ghysels and W. Vanroose.
10: Submitted to Parallel Computing, 2012
12: See also pipecg.c, where the reduction can be overlapped with both the
13: matrix-vector product and the preconditioner.
14: */
16: #include <petsc-private/kspimpl.h>
18: /*
19: KSPSetUp_PIPECR - Sets up the workspace needed by the PIPECR method.
21: This is called once, usually automatically by KSPSolve() or KSPSetUp()
22: but can be called directly by KSPSetUp()
23: */
26: PetscErrorCode KSPSetUp_PIPECR(KSP ksp)
27: {
31: /* get work vectors needed by PIPECR */
32: KSPSetWorkVecs(ksp,7);
33: return(0);
34: }
36: /*
37: KSPSolve_PIPECR - This routine actually applies the pipelined conjugate residual method
39: Input Parameter:
40: . ksp - the Krylov space object that was set to use conjugate gradient, by, for
41: example, KSPCreate(MPI_Comm,KSP *ksp); KSPSetType(ksp,KSPCG);
42: */
45: PetscErrorCode KSPSolve_PIPECR(KSP ksp)
46: {
48: PetscInt i;
49: PetscScalar alpha=0.0,beta=0.0,gamma,gammaold=0.0,delta;
50: PetscReal dp = 0.0;
51: Vec X,B,Z,P,W,Q,U,M,N;
52: Mat Amat,Pmat;
53: MatStructure pflag;
54: PetscBool diagonalscale;
57: PCGetDiagonalScale(ksp->pc,&diagonalscale);
58: if (diagonalscale) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"Krylov method %s does not support diagonal scaling",((PetscObject)ksp)->type_name);
60: X = ksp->vec_sol;
61: B = ksp->vec_rhs;
62: M = ksp->work[0];
63: Z = ksp->work[1];
64: P = ksp->work[2];
65: N = ksp->work[3];
66: W = ksp->work[4];
67: Q = ksp->work[5];
68: U = ksp->work[6];
70: PCGetOperators(ksp->pc,&Amat,&Pmat,&pflag);
72: ksp->its = 0;
73: /* we don't have an R vector, so put the (unpreconditioned) residual in w for now */
74: if (!ksp->guess_zero) {
75: KSP_MatMult(ksp,Amat,X,W); /* w <- b - Ax */
76: VecAYPX(W,-1.0,B);
77: } else {
78: VecCopy(B,W); /* w <- b (x is 0) */
79: }
80: KSP_PCApply(ksp,W,U); /* u <- Bw */
82: switch (ksp->normtype) {
83: case KSP_NORM_PRECONDITIONED:
84: VecNormBegin(U,NORM_2,&dp); /* dp <- u'*u = e'*A'*B'*B*A'*e' */
85: PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)U));
86: KSP_MatMult(ksp,Amat,U,W); /* w <- Au */
87: VecNormEnd(U,NORM_2,&dp);
88: break;
89: case KSP_NORM_NONE:
90: KSP_MatMult(ksp,Amat,U,W);
91: dp = 0.0;
92: break;
93: default: SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"%s",KSPNormTypes[ksp->normtype]);
94: }
95: KSPLogResidualHistory(ksp,dp);
96: KSPMonitor(ksp,0,dp);
97: ksp->rnorm = dp;
98: (*ksp->converged)(ksp,0,dp,&ksp->reason,ksp->cnvP); /* test for convergence */
99: if (ksp->reason) return(0);
101: i = 0;
102: do {
103: KSP_PCApply(ksp,W,M); /* m <- Bw */
105: if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
106: VecNormBegin(U,NORM_2,&dp);
107: }
108: VecDotBegin(W,U,&gamma);
109: VecDotBegin(M,W,&delta);
110: PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)U));
112: KSP_MatMult(ksp,Amat,M,N); /* n <- Am */
114: if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
115: VecNormEnd(U,NORM_2,&dp);
116: }
117: VecDotEnd(W,U,&gamma);
118: VecDotEnd(M,W,&delta);
120: if (i > 0) {
121: if (ksp->normtype == KSP_NORM_NONE) dp = 0.0;
122: ksp->rnorm = dp;
123: KSPLogResidualHistory(ksp,dp);
124: KSPMonitor(ksp,i,dp);
125: (*ksp->converged)(ksp,i,dp,&ksp->reason,ksp->cnvP);
126: if (ksp->reason) break;
127: }
129: if (i == 0) {
130: alpha = gamma / delta;
131: VecCopy(N,Z); /* z <- n */
132: VecCopy(M,Q); /* q <- m */
133: VecCopy(U,P); /* p <- u */
134: } else {
135: beta = gamma / gammaold;
136: alpha = gamma / (delta - beta / alpha * gamma);
137: VecAYPX(Z,beta,N); /* z <- n + beta * z */
138: VecAYPX(Q,beta,M); /* q <- m + beta * q */
139: VecAYPX(P,beta,U); /* p <- u + beta * p */
140: }
141: VecAXPY(X, alpha,P); /* x <- x + alpha * p */
142: VecAXPY(U,-alpha,Q); /* u <- u - alpha * q */
143: VecAXPY(W,-alpha,Z); /* w <- w - alpha * z */
144: gammaold = gamma;
145: i++;
146: ksp->its = i;
148: /* if (i%50 == 0) { */
149: /* KSP_MatMult(ksp,Amat,X,W); /\* w <- b - Ax *\/ */
150: /* VecAYPX(W,-1.0,B); */
151: /* KSP_PCApply(ksp,W,U); */
152: /* KSP_MatMult(ksp,Amat,U,W); */
153: /* } */
155: } while (i<ksp->max_it);
156: if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
157: return(0);
158: }
162: PETSC_EXTERN PetscErrorCode KSPCreate_PIPECR(KSP ksp)
163: {
167: KSPSetSupportedNorm(ksp,KSP_NORM_PRECONDITIONED,PC_LEFT,1);
168: KSPSetSupportedNorm(ksp,KSP_NORM_NONE,PC_LEFT,1);
170: ksp->ops->setup = KSPSetUp_PIPECR;
171: ksp->ops->solve = KSPSolve_PIPECR;
172: ksp->ops->destroy = KSPDestroyDefault;
173: ksp->ops->view = 0;
174: ksp->ops->setfromoptions = 0;
175: ksp->ops->buildsolution = KSPBuildSolutionDefault;
176: ksp->ops->buildresidual = KSPBuildResidualDefault;
177: return(0);
178: }