Actual source code: pipegcr.c

petsc-3.9.4 2018-09-11
Report Typos and Errors
  1: /*
  2:     Contributed by Sascha M. Schnepp and Patrick Sanan
  3: */

  5: #include "petscsys.h"
  6:  #include <../src/ksp/ksp/impls/gcr/pipegcr/pipegcrimpl.h>

  8: static PetscBool  cited = PETSC_FALSE;
  9: static const char citation[] =
 10:   "@article{SSM2016,\n"
 11:   "  author = {P. Sanan and S.M. Schnepp and D.A. May},\n"
 12:   "  title = {Pipelined, Flexible Krylov Subspace Methods},\n"
 13:   "  journal = {SIAM Journal on Scientific Computing},\n"
 14:   "  volume = {38},\n"
 15:   "  number = {5},\n"
 16:   "  pages = {C441-C470},\n"
 17:   "  year = {2016},\n"
 18:   "  doi = {10.1137/15M1049130},\n"
 19:   "  URL = {http://dx.doi.org/10.1137/15M1049130},\n"
 20:   "  eprint = {http://dx.doi.org/10.1137/15M1049130}\n"
 21:   "}\n";

 23: #define KSPPIPEGCR_DEFAULT_MMAX 15
 24: #define KSPPIPEGCR_DEFAULT_NPREALLOC 5
 25: #define KSPPIPEGCR_DEFAULT_VECB 5
 26: #define KSPPIPEGCR_DEFAULT_TRUNCSTRAT KSP_FCD_TRUNC_TYPE_NOTAY
 27: #define KSPPIPEGCR_DEFAULT_UNROLL_W PETSC_TRUE

 29:  #include <petscksp.h>

 31: static PetscErrorCode KSPAllocateVectors_PIPEGCR(KSP ksp, PetscInt nvecsneeded, PetscInt chunksize)
 32: {
 33:   PetscErrorCode  ierr;
 34:   PetscInt        i;
 35:   KSP_PIPEGCR     *pipegcr;
 36:   PetscInt        nnewvecs, nvecsprev;

 39:   pipegcr = (KSP_PIPEGCR*)ksp->data;

 41:   /* Allocate enough new vectors to add chunksize new vectors, reach nvecsneedtotal, or to reach mmax+1, whichever is smallest */
 42:   if(pipegcr->nvecs < PetscMin(pipegcr->mmax+1,nvecsneeded)){
 43:     nvecsprev = pipegcr->nvecs;
 44:     nnewvecs = PetscMin(PetscMax(nvecsneeded-pipegcr->nvecs,chunksize),pipegcr->mmax+1-pipegcr->nvecs);
 45:     KSPCreateVecs(ksp,nnewvecs,&pipegcr->ppvecs[pipegcr->nchunks],0,NULL);
 46:     PetscLogObjectParents((PetscObject)ksp,nnewvecs,pipegcr->ppvecs[pipegcr->nchunks]);
 47:     KSPCreateVecs(ksp,nnewvecs,&pipegcr->psvecs[pipegcr->nchunks],0,NULL);
 48:     PetscLogObjectParents((PetscObject)ksp,nnewvecs,pipegcr->psvecs[pipegcr->nchunks]);
 49:     KSPCreateVecs(ksp,nnewvecs,&pipegcr->pqvecs[pipegcr->nchunks],0,NULL);
 50:     PetscLogObjectParents((PetscObject)ksp,nnewvecs,pipegcr->pqvecs[pipegcr->nchunks]);
 51:     if (pipegcr->unroll_w) {
 52:       KSPCreateVecs(ksp,nnewvecs,&pipegcr->ptvecs[pipegcr->nchunks],0,NULL);
 53:       PetscLogObjectParents((PetscObject)ksp,nnewvecs,pipegcr->ptvecs[pipegcr->nchunks]);
 54:     }
 55:     pipegcr->nvecs += nnewvecs;
 56:     for(i=0;i<nnewvecs;i++){
 57:       pipegcr->qvecs[nvecsprev+i] = pipegcr->pqvecs[pipegcr->nchunks][i];
 58:       pipegcr->pvecs[nvecsprev+i] = pipegcr->ppvecs[pipegcr->nchunks][i];
 59:       pipegcr->svecs[nvecsprev+i] = pipegcr->psvecs[pipegcr->nchunks][i];
 60:       if (pipegcr->unroll_w) {
 61:         pipegcr->tvecs[nvecsprev+i] = pipegcr->ptvecs[pipegcr->nchunks][i];
 62:       }
 63:     }
 64:     pipegcr->chunksizes[pipegcr->nchunks] = nnewvecs;
 65:     pipegcr->nchunks++;
 66:   }
 67:   return(0);
 68: }

 70: static PetscErrorCode KSPSolve_PIPEGCR_cycle(KSP ksp)
 71: {
 72:   KSP_PIPEGCR    *pipegcr = (KSP_PIPEGCR*)ksp->data;
 74:   Mat            A, B;
 75:   Vec            x,r,b,z,w,m,n,p,s,q,t,*redux;
 76:   PetscInt       i,j,k,idx,kdx,mi;
 77:   PetscScalar    alpha=0.0,gamma,*betas,*dots;
 78:   PetscReal      rnorm=0.0, delta,*eta,*etas;


 82:   /* !!PS We have not checked these routines for use with complex numbers. The inner products
 83:      are likely not defined correctly for that case */
 84: #if (defined(PETSC_USE_COMPLEX) && !defined(PETSC_SKIP_COMPLEX))
 85:   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"PIPEGCR has not been implemented for use with complex scalars");
 86: #endif

 88:   KSPGetOperators(ksp, &A, &B);
 89:   x = ksp->vec_sol;
 90:   b = ksp->vec_rhs;
 91:   r = ksp->work[0];
 92:   z = ksp->work[1];
 93:   w = ksp->work[2]; /* w = Az = AB(r)                 (pipelining intermediate) */
 94:   m = ksp->work[3]; /* m = B(w) = B(Az) = B(AB(r))    (pipelining intermediate) */
 95:   n = ksp->work[4]; /* n = AB(w) = AB(Az) = AB(AB(r)) (pipelining intermediate) */
 96:   p = pipegcr->pvecs[0];
 97:   s = pipegcr->svecs[0];
 98:   q = pipegcr->qvecs[0];
 99:   t = pipegcr->unroll_w ? pipegcr->tvecs[0] : NULL;

101:   redux = pipegcr->redux;
102:   dots  = pipegcr->dots;
103:   etas  = pipegcr->etas;
104:   betas = dots;        /* dots takes the result of all dot products of which the betas are a subset */

106:   /* cycle initial residual */
107:   KSP_MatMult(ksp,A,x,r);
108:   VecAYPX(r,-1.0,b);                   /* r <- b - Ax */
109:   KSP_PCApply(ksp,r,z);                /* z <- B(r)   */
110:   KSP_MatMult(ksp,A,z,w);              /* w <- Az     */

112:   /* initialization of other variables and pipelining intermediates */
113:   VecCopy(z,p);
114:   KSP_MatMult(ksp,A,p,s);

116:   /* overlap initial computation of delta, gamma */
117:   redux[0] = w;
118:   redux[1] = r;
119:   VecMDotBegin(w,2,redux,dots);    /* Start split reductions for gamma = (w,r), delta = (w,w) */
120:   PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)s)); /* perform asynchronous reduction */
121:   KSP_PCApply(ksp,s,q);            /* q = B(s) */
122:   if (pipegcr->unroll_w) {
123:     KSP_MatMult(ksp,A,q,t);        /* t = Aq   */
124:   }
125:   VecMDotEnd(w,2,redux,dots);      /* Finish split reduction */
126:   delta    = PetscRealPart(dots[0]);
127:   etas[0]  = delta;
128:   gamma    = dots[1];
129:   alpha    = gamma/delta;

131:   i = 0;
132:   do {
133:     PetscObjectSAWsTakeAccess((PetscObject)ksp);
134:     ksp->its++;
135:     PetscObjectSAWsGrantAccess((PetscObject)ksp);

137:     /* update solution, residuals, .. */
138:     VecAXPY(x,+alpha,p);
139:     VecAXPY(r,-alpha,s);
140:     VecAXPY(z,-alpha,q);
141:     if(pipegcr->unroll_w){
142:       VecAXPY(w,-alpha,t);
143:     } else {
144:       KSP_MatMult(ksp,A,z,w);
145:     }

147:     /* Computations of current iteration done */
148:     i++;

150:     if (pipegcr->modifypc) {
151:       (*pipegcr->modifypc)(ksp,ksp->its,ksp->rnorm,pipegcr->modifypc_ctx);
152:     }

154:     /* If needbe, allocate a new chunk of vectors */
155:     KSPAllocateVectors_PIPEGCR(ksp,i+1,pipegcr->vecb);

157:     /* Note that we wrap around and start clobbering old vectors */
158:     idx = i % (pipegcr->mmax+1);
159:     p   = pipegcr->pvecs[idx];
160:     s   = pipegcr->svecs[idx];
161:     q   = pipegcr->qvecs[idx];
162:     if (pipegcr->unroll_w) {
163:       t   = pipegcr->tvecs[idx];
164:     }
165:     eta = pipegcr->etas+idx;

167:     /* number of old directions to orthogonalize against */
168:     switch(pipegcr->truncstrat){
169:       case KSP_FCD_TRUNC_TYPE_STANDARD:
170:         mi = pipegcr->mmax;
171:         break;
172:       case KSP_FCD_TRUNC_TYPE_NOTAY:
173:         mi = ((i-1) % pipegcr->mmax)+1;
174:         break;
175:       default:
176:         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unrecognized Truncation Strategy");
177:     }

179:     /* Pick old p,s,q,zeta in a way suitable for VecMDot */
180:     for(k=PetscMax(0,i-mi),j=0;k<i;j++,k++){
181:       kdx = k % (pipegcr->mmax+1);
182:       pipegcr->pold[j] = pipegcr->pvecs[kdx];
183:       pipegcr->sold[j] = pipegcr->svecs[kdx];
184:       pipegcr->qold[j] = pipegcr->qvecs[kdx];
185:       if (pipegcr->unroll_w) {
186:         pipegcr->told[j] = pipegcr->tvecs[kdx];
187:       }
188:       redux[j]         = pipegcr->svecs[kdx];
189:     }
190:     /* If the above loop is not run redux contains only r and w => all beta_k = 0, only gamma, delta != 0 */
191:     redux[j]   = r;
192:     redux[j+1] = w;

194:     /* Dot products */
195:     /* Start split reductions for beta_k = (w,s_k), gamma = (w,r), delta = (w,w) */
196:     VecMDotBegin(w,j+2,redux,dots);
197:     PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)w));

199:     /* B(w-r) + u stabilization */
200:     VecWAXPY(n,-1.0,r,w);              /* m = u + B(w-r): (a) ntmp = w-r              */
201:     KSP_PCApply(ksp,n,m);              /* m = u + B(w-r): (b) mtmp = B(ntmp) = B(w-r) */
202:     VecAXPY(m,1.0,z);                  /* m = u + B(w-r): (c) m = z + mtmp            */
203:     if(pipegcr->unroll_w){
204:       KSP_MatMult(ksp,A,m,n);          /* n = Am                                      */
205:     }

207:     /* Finish split reductions for beta_k = (w,s_k), gamma = (w,r), delta = (w,w) */
208:     VecMDotEnd(w,j+2,redux,dots);
209:     gamma = dots[j];
210:     delta = PetscRealPart(dots[j+1]);

212:     /* compute new residual norm.
213:        this cannot be done before this point so that the natural norm
214:        is available for free and the communication involved is overlapped */
215:     switch (ksp->normtype) {
216:     case KSP_NORM_PRECONDITIONED:
217:       VecNorm(z,NORM_2,&rnorm);        /* ||r|| <- sqrt(z'*z) */
218:       break;
219:     case KSP_NORM_UNPRECONDITIONED:
220:       VecNorm(r,NORM_2,&rnorm);        /* ||r|| <- sqrt(r'*r) */
221:       break;
222:     case KSP_NORM_NATURAL:
223:       rnorm = PetscSqrtReal(PetscAbsScalar(gamma));         /* ||r|| <- sqrt(r,w)  */
224:       break;
225:     case KSP_NORM_NONE:
226:       rnorm = 0.0;
227:       break;
228:     default: SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"%s",KSPNormTypes[ksp->normtype]);
229:     }

231:     /* Check for convergence */
232:     PetscObjectSAWsTakeAccess((PetscObject)ksp);
233:     ksp->rnorm = rnorm;
234:     PetscObjectSAWsGrantAccess((PetscObject)ksp);
235:     KSPLogResidualHistory(ksp,rnorm);
236:     KSPMonitor(ksp,ksp->its,rnorm);
237:     (*ksp->converged)(ksp,ksp->its+1,rnorm,&ksp->reason,ksp->cnvP);
238:     if (ksp->reason) break;

240:     /* compute new eta and scale beta */
241:     *eta = 0.;
242:     for(k=PetscMax(0,i-mi),j=0;k<i;j++,k++){
243:       kdx = k % (pipegcr->mmax+1);
244:       betas[j] /= -etas[kdx];                               /* betak  /= etak */
245:       *eta -= ((PetscReal)(PetscAbsScalar(betas[j])*PetscAbsScalar(betas[j]))) * etas[kdx];
246:                                                             /* etaitmp = -betaik^2 * etak */
247:     }
248:     *eta += delta;                                          /* etai    = delta -betaik^2 * etak */

250:     /* check breakdown of eta = (s,s) */
251:     if(*eta < 0.) {
252:       pipegcr->norm_breakdown = PETSC_TRUE;
253:       PetscInfo1(ksp,"Restart due to square root breakdown at it = \n",ksp->its);
254:       break;
255:     } else {
256:       alpha= gamma/(*eta);                                  /* alpha = gamma/etai */
257:     }

259:     /* project out stored search directions using classical G-S */
260:     VecCopy(z,p);
261:     VecCopy(w,s);
262:     VecCopy(m,q);
263:     if(pipegcr->unroll_w){
264:       VecCopy(n,t);
265:       VecMAXPY(t,j,betas,pipegcr->told); /* ti <- n  - sum_k beta_k t_k */
266:     }
267:     VecMAXPY(p,j,betas,pipegcr->pold); /* pi <- ui - sum_k beta_k p_k */
268:     VecMAXPY(s,j,betas,pipegcr->sold); /* si <- wi - sum_k beta_k s_k */
269:     VecMAXPY(q,j,betas,pipegcr->qold); /* qi <- m  - sum_k beta_k q_k */

271:   } while (ksp->its < ksp->max_it);
272:   return(0);
273: }

275: static PetscErrorCode KSPSolve_PIPEGCR(KSP ksp)
276: {
277:   KSP_PIPEGCR    *pipegcr = (KSP_PIPEGCR*)ksp->data;
279:   Mat            A, B;
280:   Vec            x,b,r,z,w;
281:   PetscScalar    gamma;
282:   PetscReal      rnorm=0.0;
283:   PetscBool      issym;

286:   PetscCitationsRegister(citation,&cited);

288:   KSPGetOperators(ksp, &A, &B);
289:   x = ksp->vec_sol;
290:   b = ksp->vec_rhs;
291:   r = ksp->work[0];
292:   z = ksp->work[1];
293:   w = ksp->work[2]; /* w = Az = AB(r)                 (pipelining intermediate) */

295:   /* compute initial residual */
296:   if (!ksp->guess_zero) {
297:     KSP_MatMult(ksp,A,x,r);
298:     VecAYPX(r,-1.0,b);                 /* r <- b - Ax       */
299:   } else {
300:     VecCopy(b,r);                      /* r <- b            */
301:   }

303:   /* initial residual norm */
304:   KSP_PCApply(ksp,r,z);                /* z <- B(r)         */
305:   KSP_MatMult(ksp,A,z,w);              /* w <- Az           */
306:   VecDot(r,w,&gamma);                  /* gamma = (r,w)     */

308:   switch (ksp->normtype) {
309:     case KSP_NORM_PRECONDITIONED:
310:       VecNorm(z,NORM_2,&rnorm);        /* ||r|| <- sqrt(z'*z) */
311:       break;
312:     case KSP_NORM_UNPRECONDITIONED:
313:       VecNorm(r,NORM_2,&rnorm);        /* ||r|| <- sqrt(r'*r) */
314:       break;
315:     case KSP_NORM_NATURAL:
316:       rnorm = PetscSqrtReal(PetscAbsScalar(gamma));         /* ||r|| <- sqrt(r,w)  */
317:       break;
318:     case KSP_NORM_NONE:
319:       rnorm = 0.0;
320:       break;
321:     default: SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"%s",KSPNormTypes[ksp->normtype]);
322:   }

324:   /* Is A symmetric? */
325:   PetscObjectTypeCompareAny((PetscObject)A,&issym,"sbaij","seqsbaij","mpibaij","");
326:   if (!issym) {
327:     PetscInfo(A,"Matrix type is not any of MATSBAIJ,MATSEQSBAIJ,MATMPIBAIJ. Is matrix A symmetric (as required by CR methods)?");
328:   }

330:   /* logging */
331:   PetscObjectSAWsTakeAccess((PetscObject)ksp);
332:   ksp->its    = 0;
333:   ksp->rnorm0 = rnorm;
334:   PetscObjectSAWsGrantAccess((PetscObject)ksp);
335:   KSPLogResidualHistory(ksp,ksp->rnorm0);
336:   KSPMonitor(ksp,ksp->its,ksp->rnorm0);
337:   (*ksp->converged)(ksp,ksp->its,ksp->rnorm0,&ksp->reason,ksp->cnvP);
338:   if (ksp->reason) return(0);

340:   do {
341:     KSPSolve_PIPEGCR_cycle(ksp);
342:     if (ksp->reason) break;
343:     if (pipegcr->norm_breakdown) {
344:       pipegcr->n_restarts++;
345:       pipegcr->norm_breakdown = PETSC_FALSE;
346:     }
347:   } while (ksp->its < ksp->max_it);

349:   if (ksp->its >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
350:   return(0);
351: }

353: static PetscErrorCode KSPView_PIPEGCR(KSP ksp, PetscViewer viewer)
354: {
355:   KSP_PIPEGCR    *pipegcr = (KSP_PIPEGCR*)ksp->data;
357:   PetscBool      isascii,isstring;
358:   const char     *truncstr;

361:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII, &isascii);
362:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);

364:   if(pipegcr->truncstrat == KSP_FCD_TRUNC_TYPE_STANDARD){
365:     truncstr = "Using standard truncation strategy";
366:   } else if(pipegcr->truncstrat == KSP_FCD_TRUNC_TYPE_NOTAY){
367:     truncstr = "Using Notay's truncation strategy";
368:   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Undefined FCD truncation strategy");
369: 

371:   if (isascii) {
372:     PetscViewerASCIIPrintf(viewer,"  max previous directions = %D\n",pipegcr->mmax);
373:     PetscViewerASCIIPrintf(viewer,"  preallocated %D directions\n",PetscMin(pipegcr->nprealloc,pipegcr->mmax+1));
374:     PetscViewerASCIIPrintf(viewer,"  %s\n",truncstr);
375:     PetscViewerASCIIPrintf(viewer,"  w unrolling = %D \n", pipegcr->unroll_w);
376:     PetscViewerASCIIPrintf(viewer,"  restarts performed = %D \n", pipegcr->n_restarts);
377:   } else if (isstring) {
378:     PetscViewerStringSPrintf(viewer, "max previous directions = %D, preallocated %D directions, %s truncation strategy", pipegcr->mmax,pipegcr->nprealloc,truncstr);
379:   }
380:   return(0);
381: }


384: static PetscErrorCode KSPSetUp_PIPEGCR(KSP ksp)
385: {
386:   KSP_PIPEGCR   *pipegcr = (KSP_PIPEGCR*)ksp->data;
388:   Mat            A;
389:   PetscBool      diagonalscale;
390:   const PetscInt nworkstd = 5;

393:   PCGetDiagonalScale(ksp->pc,&diagonalscale);
394:   if (diagonalscale) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"Krylov method %s does not support diagonal scaling",((PetscObject)ksp)->type_name);

396:   KSPGetOperators(ksp, &A, NULL);

398:   /* Allocate "standard" work vectors */
399:   KSPSetWorkVecs(ksp,nworkstd);

401:   /* Allocated space for pointers to additional work vectors
402:     note that mmax is the number of previous directions, so we add 1 for the current direction */
403:   PetscMalloc6(pipegcr->mmax+1,&(pipegcr->pvecs),pipegcr->mmax+1,&(pipegcr->ppvecs),pipegcr->mmax+1,&(pipegcr->svecs), pipegcr->mmax+1,&(pipegcr->psvecs),pipegcr->mmax+1,&(pipegcr->qvecs),pipegcr->mmax+1,&(pipegcr->pqvecs));
404:   if (pipegcr->unroll_w) {
405:     PetscMalloc3(pipegcr->mmax+1,&(pipegcr->tvecs),pipegcr->mmax+1,&(pipegcr->ptvecs),pipegcr->mmax+2,&(pipegcr->told));
406:   }
407:   PetscMalloc4(pipegcr->mmax+2,&(pipegcr->pold),pipegcr->mmax+2,&(pipegcr->sold),pipegcr->mmax+2,&(pipegcr->qold),pipegcr->mmax+2,&(pipegcr->chunksizes));
408:   PetscMalloc3(pipegcr->mmax+2,&(pipegcr->dots),pipegcr->mmax+1,&(pipegcr->etas),pipegcr->mmax+2,&(pipegcr->redux));
409:   /* If the requested number of preallocated vectors is greater than mmax reduce nprealloc */
410:   if(pipegcr->nprealloc > pipegcr->mmax+1){
411:     PetscInfo2(NULL,"Requested nprealloc=%d is greater than m_max+1=%d. Resetting nprealloc = m_max+1.\n",pipegcr->nprealloc, pipegcr->mmax+1);
412:   }

414:   /* Preallocate additional work vectors */
415:   KSPAllocateVectors_PIPEGCR(ksp,pipegcr->nprealloc,pipegcr->nprealloc);

417:   PetscLogObjectMemory(
418:     (PetscObject)ksp,
419:     (pipegcr->mmax + 1) * 4 * sizeof(Vec*) +        /* old dirs  */
420:     (pipegcr->mmax + 1) * 4 * sizeof(Vec**) +       /* old pdirs */
421:     (pipegcr->mmax + 1) * 4 * sizeof(Vec*) +        /* p/s/qold/told */
422:     (pipegcr->mmax + 1) *     sizeof(PetscInt) +    /* chunksizes */
423:     (pipegcr->mmax + 2) *     sizeof(Vec*) +        /* redux */
424:     (pipegcr->mmax + 2) *     sizeof(PetscScalar) + /* dots */
425:     (pipegcr->mmax + 1) *     sizeof(PetscReal)     /* etas */
426:   );
427:   return(0);
428: }

430: static PetscErrorCode KSPReset_PIPEGCR(KSP ksp)
431: {
433:   KSP_PIPEGCR    *pipegcr = (KSP_PIPEGCR*)ksp->data;

436:   if (pipegcr->modifypc_destroy) {
437:     (*pipegcr->modifypc_destroy)(pipegcr->modifypc_ctx);
438:   }
439:   return(0);
440: }

442: static PetscErrorCode KSPDestroy_PIPEGCR(KSP ksp)
443: {
445:   PetscInt       i;
446:   KSP_PIPEGCR    *pipegcr = (KSP_PIPEGCR*)ksp->data;

449:   VecDestroyVecs(ksp->nwork,&ksp->work); /* Destroy "standard" work vecs */

451:   /* Destroy vectors for old directions and the arrays that manage pointers to them */
452:   if(pipegcr->nvecs){
453:     for(i=0;i<pipegcr->nchunks;i++){
454:       VecDestroyVecs(pipegcr->chunksizes[i],&pipegcr->ppvecs[i]);
455:       VecDestroyVecs(pipegcr->chunksizes[i],&pipegcr->psvecs[i]);
456:       VecDestroyVecs(pipegcr->chunksizes[i],&pipegcr->pqvecs[i]);
457:       if (pipegcr->unroll_w) {
458:         VecDestroyVecs(pipegcr->chunksizes[i],&pipegcr->ptvecs[i]);
459:       }
460:     }
461:   }

463:   PetscFree6(pipegcr->pvecs,pipegcr->ppvecs,pipegcr->svecs,pipegcr->psvecs,pipegcr->qvecs,pipegcr->pqvecs);
464:   PetscFree4(pipegcr->pold,pipegcr->sold,pipegcr->qold,pipegcr->chunksizes);
465:   PetscFree3(pipegcr->dots,pipegcr->etas,pipegcr->redux);
466:   if (pipegcr->unroll_w) {
467:     PetscFree3(pipegcr->tvecs,pipegcr->ptvecs,pipegcr->told);
468:   }

470:   KSPReset_PIPEGCR(ksp);
471:   KSPDestroyDefault(ksp);
472:   return(0);
473: }

475: /*@
476:   KSPPIPEGCRSetUnrollW - Set to PETSC_TRUE to use PIPEGCR with unrolling of the w vector

478:   Logically Collective on KSP

480:   Input Parameters:
481: +  ksp - the Krylov space context
482: -  unroll_w - use unrolling

484:   Level: intermediate

486:   Options Database:
487: . -ksp_pipegcr_unroll_w

489: .seealso: KSPPIPEGCR, KSPPIPEGCRSetTruncationType(), KSPPIPEGCRSetNprealloc(),KSPPIPEGCRGetUnrollW()
490: @*/
491: PetscErrorCode KSPPIPEGCRSetUnrollW(KSP ksp,PetscBool unroll_w)
492: {
493:   KSP_PIPEGCR *pipegcr=(KSP_PIPEGCR*)ksp->data;;

498:   pipegcr->unroll_w=unroll_w;
499:   return(0);
500: }

502: /*@
503:   KSPPIPEGCRGetUnrollW - Get information on PIPEGCR unrolling the w vector

505:   Logically Collective on KSP

507:    Input Parameter:
508: .  ksp - the Krylov space context

510:    Output Parameter:
511: .  unroll_w - PIPEGCR uses unrolling (bool)

513:   Level: intermediate

515:   Options Database:
516: . -ksp_pipegcr_unroll_w

518: .seealso: KSPPIPEGCR, KSPPIPEGCRGetTruncationType(), KSPPIPEGCRGetNprealloc(),KSPPIPEGCRSetUnrollW()
519: @*/
520: PetscErrorCode KSPPIPEGCRGetUnrollW(KSP ksp,PetscBool *unroll_w)
521: {
522:   KSP_PIPEGCR *pipegcr=(KSP_PIPEGCR*)ksp->data;;

526:   *unroll_w=pipegcr->unroll_w;
527:   return(0);
528: }

530: /*@
531:   KSPPIPEGCRSetMmax - set the maximum number of previous directions PIPEGCR will store for orthogonalization

533:   Note: mmax + 1 directions are stored (mmax previous ones along with a current one)
534:   and whether all are used in each iteration also depends on the truncation strategy
535:   (see KSPPIPEGCRSetTruncationType)

537:   Logically Collective on KSP

539:   Input Parameters:
540: +  ksp - the Krylov space context
541: -  mmax - the maximum number of previous directions to orthogonalize againt

543:   Level: intermediate

545:   Options Database:
546: . -ksp_pipegcr_mmax <N>

548: .seealso: KSPPIPEGCR, KSPPIPEGCRSetTruncationType(), KSPPIPEGCRSetNprealloc()
549: @*/
550: PetscErrorCode KSPPIPEGCRSetMmax(KSP ksp,PetscInt mmax)
551: {
552:   KSP_PIPEGCR *pipegcr=(KSP_PIPEGCR*)ksp->data;;

557:   pipegcr->mmax=mmax;
558:   return(0);
559: }

561: /*@
562:   KSPPIPEGCRGetMmax - get the maximum number of previous directions PIPEGCR will store

564:   Note: PIPEGCR stores mmax+1 directions at most (mmax previous ones, and one current one)

566:    Not Collective

568:    Input Parameter:
569: .  ksp - the Krylov space context

571:    Output Parameter:
572: .  mmax - the maximum number of previous directons allowed for orthogonalization

574:   Options Database:
575: . -ksp_pipegcr_mmax <N>

577:    Level: intermediate

579: .keywords: KSP, PIPEGCR, truncation

581: .seealso: KSPPIPEGCR, KSPPIPEGCRGetTruncationType(), KSPPIPEGCRGetNprealloc(), KSPPIPEGCRSetMmax()
582: @*/

584: PetscErrorCode KSPPIPEGCRGetMmax(KSP ksp,PetscInt *mmax)
585: {
586:   KSP_PIPEGCR *pipegcr=(KSP_PIPEGCR*)ksp->data;;

590:   *mmax=pipegcr->mmax;
591:   return(0);
592: }

594: /*@
595:   KSPPIPEGCRSetNprealloc - set the number of directions to preallocate with PIPEGCR

597:   Logically Collective on KSP

599:   Input Parameters:
600: +  ksp - the Krylov space context
601: -  nprealloc - the number of vectors to preallocate

603:   Level: advanced

605:   Options Database:
606: . -ksp_pipegcr_nprealloc <N>

608: .seealso: KSPPIPEGCR, KSPPIPEGCRGetTruncationType(), KSPPIPEGCRGetNprealloc()
609: @*/
610: PetscErrorCode KSPPIPEGCRSetNprealloc(KSP ksp,PetscInt nprealloc)
611: {
612:   KSP_PIPEGCR *pipegcr=(KSP_PIPEGCR*)ksp->data;;

617:   pipegcr->nprealloc = nprealloc;
618:   return(0);
619: }

621: /*@
622:   KSPPIPEGCRGetNprealloc - get the number of directions preallocate by PIPEGCR

624:    Not Collective

626:    Input Parameter:
627: .  ksp - the Krylov space context

629:    Output Parameter:
630: .  nprealloc - the number of directions preallocated

632:   Options Database:
633: . -ksp_pipegcr_nprealloc <N>

635:    Level: advanced

637: .keywords: KSP, PIPEGCR, truncation

639: .seealso: KSPPIPEGCR, KSPPIPEGCRGetTruncationType(), KSPPIPEGCRSetNprealloc()
640: @*/
641: PetscErrorCode KSPPIPEGCRGetNprealloc(KSP ksp,PetscInt *nprealloc)
642: {
643:   KSP_PIPEGCR *pipegcr=(KSP_PIPEGCR*)ksp->data;;

647:   *nprealloc = pipegcr->nprealloc;
648:   return(0);
649: }

651: /*@
652:   KSPPIPEGCRSetTruncationType - specify how many of its stored previous directions PIPEGCR uses during orthoganalization

654:   Logically Collective on KSP

656:   KSP_FCD_TRUNC_TYPE_STANDARD uses all (up to mmax) stored directions
657:   KSP_FCD_TRUNC_TYPE_NOTAY uses the last max(1,mod(i,mmax)) directions at iteration i=0,1,..

659:   Input Parameters:
660: +  ksp - the Krylov space context
661: -  truncstrat - the choice of strategy

663:   Level: intermediate

665:   Options Database:
666: . -ksp_pipegcr_truncation_type <standard,notay> - which stored basis vectors to orthogonalize against

668: .seealso: KSPPIPEGCR, KSPPIPEGCRSetTruncationType, KSPPIPEGCRTruncationType, KSPFCDTruncationType
669: @*/
670: PetscErrorCode KSPPIPEGCRSetTruncationType(KSP ksp,KSPFCDTruncationType truncstrat)
671: {
672:   KSP_PIPEGCR *pipegcr=(KSP_PIPEGCR*)ksp->data;;

677:   pipegcr->truncstrat=truncstrat;
678:   return(0);
679: }

681: /*@
682:   KSPPIPEGCRGetTruncationType - get the truncation strategy employed by PIPEGCR

684:   Not Collective

686:   KSP_FCD_TRUNC_TYPE_STANDARD uses all (up to mmax) stored directions
687:   KSP_FCD_TRUNC_TYPE_NOTAY uses the last max(1,mod(i,mmax)) directions at iteration i=0,1,..

689:    Input Parameter:
690: .  ksp - the Krylov space context

692:    Output Parameter:
693: .  truncstrat - the strategy type

695:   Options Database:
696: . -ksp_pipegcr_truncation_type <standard,notay> - which stored basis vectors to orthogonalize against

698:    Level: intermediate

700: .keywords: KSP, PIPEGCR, truncation

702: .seealso: KSPPIPEGCR, KSPPIPEGCRSetTruncationType, KSPPIPEGCRTruncationType, KSPFCDTruncationType
703: @*/
704: PetscErrorCode KSPPIPEGCRGetTruncationType(KSP ksp,KSPFCDTruncationType *truncstrat)
705: {
706:   KSP_PIPEGCR *pipegcr=(KSP_PIPEGCR*)ksp->data;;

710:   *truncstrat=pipegcr->truncstrat;
711:   return(0);
712: }

714: static PetscErrorCode KSPSetFromOptions_PIPEGCR(PetscOptionItems *PetscOptionsObject,KSP ksp)
715: {
717:   KSP_PIPEGCR    *pipegcr = (KSP_PIPEGCR*)ksp->data;
718:   PetscInt       mmax,nprealloc;
719:   PetscBool      flg;

722:   PetscOptionsHead(PetscOptionsObject,"KSP PIPEGCR options");
723:   PetscOptionsInt("-ksp_pipegcr_mmax","Number of search directions to storue","KSPPIPEGCRSetMmax",pipegcr->mmax,&mmax,&flg);
724:   if (flg) KSPPIPEGCRSetMmax(ksp,mmax);
725:   PetscOptionsInt("-ksp_pipegcr_nprealloc","Number of directions to preallocate","KSPPIPEGCRSetNprealloc",pipegcr->nprealloc,&nprealloc,&flg);
726:   if (flg) { KSPPIPEGCRSetNprealloc(ksp,nprealloc); }
727:   PetscOptionsEnum("-ksp_pipegcr_truncation_type","Truncation approach for directions","KSPFCGSetTruncationType",KSPFCDTruncationTypes,(PetscEnum)pipegcr->truncstrat,(PetscEnum*)&pipegcr->truncstrat,NULL);
728:   PetscOptionsBool("-ksp_pipegcr_unroll_w","Use unrolling of w","KSPPIPEGCRSetUnrollW",pipegcr->unroll_w,&pipegcr->unroll_w,NULL);
729:   PetscOptionsTail();
730:   return(0);
731: }

734: typedef PetscErrorCode (*KSPPIPEGCRModifyPCFunction)(KSP,PetscInt,PetscReal,void*);
735: typedef PetscErrorCode (*KSPPIPEGCRDestroyFunction)(void*);

737: static PetscErrorCode  KSPPIPEGCRSetModifyPC_PIPEGCR(KSP ksp,KSPPIPEGCRModifyPCFunction function,void *data,KSPPIPEGCRDestroyFunction destroy)
738: {
739:   KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR*)ksp->data;

743:   pipegcr->modifypc         = function;
744:   pipegcr->modifypc_destroy = destroy;
745:   pipegcr->modifypc_ctx     = data;
746:   return(0);
747: }

749: /*@C
750:  KSPPIPEGCRSetModifyPC - Sets the routine used by PIPEGCR to modify the preconditioner.

752:  Logically Collective on KSP

754:  Input Parameters:
755:  +  ksp      - iterative context obtained from KSPCreate()
756:  .  function - user defined function to modify the preconditioner
757:  .  ctx      - user provided contex for the modify preconditioner function
758:  -  destroy  - the function to use to destroy the user provided application context.

760:  Calling Sequence of function:
761:   PetscErrorCode function (KSP ksp, PetscInt n, PetscReal rnorm, void *ctx)

763:  ksp   - iterative context
764:  n     - the total number of PIPEGCR iterations that have occurred
765:  rnorm - 2-norm residual value
766:  ctx   - the user provided application context

768:  Level: intermediate

770:  Notes:
771:  The default modifypc routine is KSPPIPEGCRModifyPCNoChange()

773:  .seealso: KSPPIPEGCRModifyPCNoChange()

775:  @*/
776: PetscErrorCode  KSPPIPEGCRSetModifyPC(KSP ksp,PetscErrorCode (*function)(KSP,PetscInt,PetscReal,void*),void *data,PetscErrorCode (*destroy)(void*))
777: {

781:   PetscUseMethod(ksp,"KSPPIPEGCRSetModifyPC_C",(KSP,PetscErrorCode (*)(KSP,PetscInt,PetscReal,void*),void *data,PetscErrorCode (*)(void*)),(ksp,function,data,destroy));
782:   return(0);
783: }

785: /*MC
786:      KSPPIPEGCR - Implements a Pipelined Generalized Conjugate Residual method.

788:   Options Database Keys:
789: .   -ksp_pipegcr_mmax <N>  - the max number of Krylov directions to orthogonalize against
790: .   -ksp_pipegcr_unroll_w - unroll w at the storage cost of a maximum of (mmax+1) extra vectors with the benefit of better pipelining (default: PETSC_TRUE)
791: .   -ksp_pipegcr_nprealloc <N> - the number of vectors to preallocated for storing Krylov directions. Once exhausted new directions are allocated blockwise (default: 5)
792: .   -ksp_pipegcr_truncation_type <standard,notay> - which previous search directions to orthogonalize against


795:   Notes:
796:     The PIPEGCR Krylov method supports non-symmetric matrices and permits the use of a preconditioner
797:     which may vary from one iteration to the next. Users can can define a method to vary the
798:     preconditioner between iterates via KSPPIPEGCRSetModifyPC().
799:     Restarts are solves with x0 not equal to zero. When a restart occurs, the initial starting
800:     solution is given by the current estimate for x which was obtained by the last restart
801:     iterations of the PIPEGCR algorithm.
802:     The method implemented requires at most the storage of 4 x mmax + 5 vectors, roughly twice as much as GCR.

804:     Only supports left preconditioning.

806:     The natural "norm" for this method is (u,Au), where u is the preconditioned residual. This norm is available at no additional computational cost, as with standard CG. Choosing preconditioned or unpreconditioned norm types involves a blocking reduction which prevents any benefit from pipelining.

808:   Reference:
809:     P. Sanan, S.M. Schnepp, and D.A. May,
810:     "Pipelined, Flexible Krylov Subspace Methods,"
811:     SIAM Journal on Scientific Computing 2016 38:5, C441-C470,
812:     DOI: 10.1137/15M1049130

814:    Level: intermediate

816: .seealso:  KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP,
817:            KSPPIPEFGMRES, KSPPIPECG, KSPPIPECR, KSPPIPEFCG,KSPPIPEGCRSetTruncationType(),KSPPIPEGCRSetNprealloc(),KSPPIPEGCRSetUnrollW(),KSPPIPEGCRSetMmax()


820: M*/
821: PETSC_EXTERN PetscErrorCode KSPCreate_PIPEGCR(KSP ksp)
822: {
824:   KSP_PIPEGCR    *pipegcr;

827:   PetscNewLog(ksp,&pipegcr);
828:   pipegcr->mmax       = KSPPIPEGCR_DEFAULT_MMAX;
829:   pipegcr->nprealloc  = KSPPIPEGCR_DEFAULT_NPREALLOC;
830:   pipegcr->nvecs      = 0;
831:   pipegcr->vecb       = KSPPIPEGCR_DEFAULT_VECB;
832:   pipegcr->nchunks    = 0;
833:   pipegcr->truncstrat = KSPPIPEGCR_DEFAULT_TRUNCSTRAT;
834:   pipegcr->n_restarts = 0;
835:   pipegcr->unroll_w   = KSPPIPEGCR_DEFAULT_UNROLL_W;

837:   ksp->data       = (void*)pipegcr;

839:   /* natural norm is for free, precond+unprecond norm require non-overlapped reduction */
840:   KSPSetSupportedNorm(ksp,KSP_NORM_NATURAL,PC_LEFT,2);
841:   KSPSetSupportedNorm(ksp,KSP_NORM_PRECONDITIONED,PC_LEFT,1);
842:   KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_LEFT,1);
843:   KSPSetSupportedNorm(ksp,KSP_NORM_NONE,PC_LEFT,1);

845:   ksp->ops->setup          = KSPSetUp_PIPEGCR;
846:   ksp->ops->solve          = KSPSolve_PIPEGCR;
847:   ksp->ops->reset          = KSPReset_PIPEGCR;
848:   ksp->ops->destroy        = KSPDestroy_PIPEGCR;
849:   ksp->ops->view           = KSPView_PIPEGCR;
850:   ksp->ops->setfromoptions = KSPSetFromOptions_PIPEGCR;
851:   ksp->ops->buildsolution  = KSPBuildSolutionDefault;
852:   ksp->ops->buildresidual  = KSPBuildResidualDefault;

854:   PetscObjectComposeFunction((PetscObject)ksp,"KSPPIPEGCRSetModifyPC_C",KSPPIPEGCRSetModifyPC_PIPEGCR);
855:   return(0);
856: }