Actual source code: pipecr.c

petsc-3.4.0 2013-05-13
  1: /*
  2:  author: Pieter Ghysels, Universiteit Antwerpen, Intel Exascience lab Flanders

  4:  This file implements a preconditioned pipelined CR. There is only a single
  5:  non-blocking reduction per iteration, compared to 2 blocking for standard CR.
  6:  The non-blocking reduction is overlapped by the matrix-vector product.

  8:  See "Hiding global synchronization latency in the
  9:  preconditioned Conjugate Gradient algorithm", P. Ghysels and W. Vanroose.
 10:  Submitted to Parallel Computing, 2012

 12:  See also pipecg.c, where the reduction can be overlapped with both the
 13:  matrix-vector product and the preconditioner.
 14:  */

 16: #include <petsc-private/kspimpl.h>

 18: /*
 19:      KSPSetUp_PIPECR - Sets up the workspace needed by the PIPECR method.

 21:       This is called once, usually automatically by KSPSolve() or KSPSetUp()
 22:      but can be called directly by KSPSetUp()
 23: */
 26: PetscErrorCode KSPSetUp_PIPECR(KSP ksp)
 27: {

 31:   /* get work vectors needed by PIPECR */
 32:   KSPSetWorkVecs(ksp,7);
 33:   return(0);
 34: }

 36: /*
 37:  KSPSolve_PIPECR - This routine actually applies the pipelined conjugate residual method

 39:  Input Parameter:
 40:  .     ksp - the Krylov space object that was set to use conjugate gradient, by, for
 41:              example, KSPCreate(MPI_Comm,KSP *ksp); KSPSetType(ksp,KSPCG);
 42: */
 45: PetscErrorCode  KSPSolve_PIPECR(KSP ksp)
 46: {
 48:   PetscInt       i;
 49:   PetscScalar    alpha=0.0,beta=0.0,gamma,gammaold=0.0,delta;
 50:   PetscReal      dp   = 0.0;
 51:   Vec            X,B,Z,P,W,Q,U,M,N;
 52:   Mat            Amat,Pmat;
 53:   MatStructure   pflag;
 54:   PetscBool      diagonalscale;

 57:   PCGetDiagonalScale(ksp->pc,&diagonalscale);
 58:   if (diagonalscale) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"Krylov method %s does not support diagonal scaling",((PetscObject)ksp)->type_name);

 60:   X = ksp->vec_sol;
 61:   B = ksp->vec_rhs;
 62:   M = ksp->work[0];
 63:   Z = ksp->work[1];
 64:   P = ksp->work[2];
 65:   N = ksp->work[3];
 66:   W = ksp->work[4];
 67:   Q = ksp->work[5];
 68:   U = ksp->work[6];

 70:   PCGetOperators(ksp->pc,&Amat,&Pmat,&pflag);

 72:   ksp->its = 0;
 73:   /* we don't have an R vector, so put the (unpreconditioned) residual in w for now */
 74:   if (!ksp->guess_zero) {
 75:     KSP_MatMult(ksp,Amat,X,W);            /*     w <- b - Ax     */
 76:     VecAYPX(W,-1.0,B);
 77:   } else {
 78:     VecCopy(B,W);                         /*     w <- b (x is 0) */
 79:   }
 80:   KSP_PCApply(ksp,W,U);                   /*     u <- Bw   */

 82:   switch (ksp->normtype) {
 83:   case KSP_NORM_PRECONDITIONED:
 84:     VecNormBegin(U,NORM_2,&dp);           /*     dp <- u'*u = e'*A'*B'*B*A'*e'     */
 85:     PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)U));
 86:     KSP_MatMult(ksp,Amat,U,W);            /*     w <- Au   */
 87:     VecNormEnd(U,NORM_2,&dp);
 88:     break;
 89:   case KSP_NORM_NONE:
 90:     KSP_MatMult(ksp,Amat,U,W);
 91:     dp   = 0.0;
 92:     break;
 93:   default: SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"%s",KSPNormTypes[ksp->normtype]);
 94:   }
 95:   KSPLogResidualHistory(ksp,dp);
 96:   KSPMonitor(ksp,0,dp);
 97:   ksp->rnorm = dp;
 98:   (*ksp->converged)(ksp,0,dp,&ksp->reason,ksp->cnvP); /* test for convergence */
 99:   if (ksp->reason) return(0);

101:   i = 0;
102:   do {
103:     KSP_PCApply(ksp,W,M);            /*   m <- Bw       */

105:     if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
106:       VecNormBegin(U,NORM_2,&dp);
107:     }
108:     VecDotBegin(W,U,&gamma);
109:     VecDotBegin(M,W,&delta);
110:     PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)U));

112:     KSP_MatMult(ksp,Amat,M,N);       /*   n <- Am       */

114:     if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
115:       VecNormEnd(U,NORM_2,&dp);
116:     }
117:     VecDotEnd(W,U,&gamma);
118:     VecDotEnd(M,W,&delta);

120:     if (i > 0) {
121:       if (ksp->normtype == KSP_NORM_NONE) dp = 0.0;
122:       ksp->rnorm = dp;
123:       KSPLogResidualHistory(ksp,dp);
124:       KSPMonitor(ksp,i,dp);
125:       (*ksp->converged)(ksp,i,dp,&ksp->reason,ksp->cnvP);
126:       if (ksp->reason) break;
127:     }

129:     if (i == 0) {
130:       alpha = gamma / delta;
131:       VecCopy(N,Z);        /*     z <- n          */
132:       VecCopy(M,Q);        /*     q <- m          */
133:       VecCopy(U,P);        /*     p <- u          */
134:     } else {
135:       beta  = gamma / gammaold;
136:       alpha = gamma / (delta - beta / alpha * gamma);
137:       VecAYPX(Z,beta,N);   /*     z <- n + beta * z   */
138:       VecAYPX(Q,beta,M);   /*     q <- m + beta * q   */
139:       VecAYPX(P,beta,U);   /*     p <- u + beta * p   */
140:     }
141:     VecAXPY(X, alpha,P); /*     x <- x + alpha * p   */
142:     VecAXPY(U,-alpha,Q); /*     u <- u - alpha * q   */
143:     VecAXPY(W,-alpha,Z); /*     w <- w - alpha * z   */
144:     gammaold = gamma;
145:     i++;
146:     ksp->its = i;

148:     /* if (i%50 == 0) { */
149:     /*   KSP_MatMult(ksp,Amat,X,W);            /\*     w <- b - Ax     *\/ */
150:     /*   VecAYPX(W,-1.0,B); */
151:     /*   KSP_PCApply(ksp,W,U); */
152:     /*   KSP_MatMult(ksp,Amat,U,W); */
153:     /* } */

155:   } while (i<ksp->max_it);
156:   if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
157:   return(0);
158: }

162: PETSC_EXTERN PetscErrorCode KSPCreate_PIPECR(KSP ksp)
163: {

167:   KSPSetSupportedNorm(ksp,KSP_NORM_PRECONDITIONED,PC_LEFT,1);
168:   KSPSetSupportedNorm(ksp,KSP_NORM_NONE,PC_LEFT,1);

170:   ksp->ops->setup          = KSPSetUp_PIPECR;
171:   ksp->ops->solve          = KSPSolve_PIPECR;
172:   ksp->ops->destroy        = KSPDestroyDefault;
173:   ksp->ops->view           = 0;
174:   ksp->ops->setfromoptions = 0;
175:   ksp->ops->buildsolution  = KSPBuildSolutionDefault;
176:   ksp->ops->buildresidual  = KSPBuildResidualDefault;
177:   return(0);
178: }