Actual source code: pipegcr.c
1: /*
2: Contributed by Sascha M. Schnepp and Patrick Sanan
3: */
5: #include "petscsys.h"
6: #include <../src/ksp/ksp/impls/gcr/pipegcr/pipegcrimpl.h>
8: static PetscBool cited = PETSC_FALSE;
9: static const char citation[] = "@article{SSM2016,\n"
10: " author = {P. Sanan and S.M. Schnepp and D.A. May},\n"
11: " title = {Pipelined, Flexible Krylov Subspace Methods},\n"
12: " journal = {SIAM Journal on Scientific Computing},\n"
13: " volume = {38},\n"
14: " number = {5},\n"
15: " pages = {C441-C470},\n"
16: " year = {2016},\n"
17: " doi = {10.1137/15M1049130},\n"
18: " URL = {http://dx.doi.org/10.1137/15M1049130},\n"
19: " eprint = {http://dx.doi.org/10.1137/15M1049130}\n"
20: "}\n";
22: #define KSPPIPEGCR_DEFAULT_MMAX 15
23: #define KSPPIPEGCR_DEFAULT_NPREALLOC 5
24: #define KSPPIPEGCR_DEFAULT_VECB 5
25: #define KSPPIPEGCR_DEFAULT_TRUNCSTRAT KSP_FCD_TRUNC_TYPE_NOTAY
26: #define KSPPIPEGCR_DEFAULT_UNROLL_W PETSC_TRUE
28: #include <petscksp.h>
30: static PetscErrorCode KSPAllocateVectors_PIPEGCR(KSP ksp, PetscInt nvecsneeded, PetscInt chunksize)
31: {
32: PetscInt i;
33: KSP_PIPEGCR *pipegcr;
34: PetscInt nnewvecs, nvecsprev;
36: pipegcr = (KSP_PIPEGCR *)ksp->data;
38: /* Allocate enough new vectors to add chunksize new vectors, reach nvecsneedtotal, or to reach mmax+1, whichever is smallest */
39: if (pipegcr->nvecs < PetscMin(pipegcr->mmax + 1, nvecsneeded)) {
40: nvecsprev = pipegcr->nvecs;
41: nnewvecs = PetscMin(PetscMax(nvecsneeded - pipegcr->nvecs, chunksize), pipegcr->mmax + 1 - pipegcr->nvecs);
42: KSPCreateVecs(ksp, nnewvecs, &pipegcr->ppvecs[pipegcr->nchunks], 0, NULL);
43: KSPCreateVecs(ksp, nnewvecs, &pipegcr->psvecs[pipegcr->nchunks], 0, NULL);
44: KSPCreateVecs(ksp, nnewvecs, &pipegcr->pqvecs[pipegcr->nchunks], 0, NULL);
45: if (pipegcr->unroll_w) { KSPCreateVecs(ksp, nnewvecs, &pipegcr->ptvecs[pipegcr->nchunks], 0, NULL); }
46: pipegcr->nvecs += nnewvecs;
47: for (i = 0; i < nnewvecs; i++) {
48: pipegcr->qvecs[nvecsprev + i] = pipegcr->pqvecs[pipegcr->nchunks][i];
49: pipegcr->pvecs[nvecsprev + i] = pipegcr->ppvecs[pipegcr->nchunks][i];
50: pipegcr->svecs[nvecsprev + i] = pipegcr->psvecs[pipegcr->nchunks][i];
51: if (pipegcr->unroll_w) pipegcr->tvecs[nvecsprev + i] = pipegcr->ptvecs[pipegcr->nchunks][i];
52: }
53: pipegcr->chunksizes[pipegcr->nchunks] = nnewvecs;
54: pipegcr->nchunks++;
55: }
56: return 0;
57: }
59: static PetscErrorCode KSPSolve_PIPEGCR_cycle(KSP ksp)
60: {
61: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
62: Mat A, B;
63: Vec x, r, b, z, w, m, n, p, s, q, t, *redux;
64: PetscInt i, j, k, idx, kdx, mi;
65: PetscScalar alpha = 0.0, gamma, *betas, *dots;
66: PetscReal rnorm = 0.0, delta, *eta, *etas;
68: /* !!PS We have not checked these routines for use with complex numbers. The inner products
69: are likely not defined correctly for that case */
72: KSPGetOperators(ksp, &A, &B);
73: x = ksp->vec_sol;
74: b = ksp->vec_rhs;
75: r = ksp->work[0];
76: z = ksp->work[1];
77: w = ksp->work[2]; /* w = Az = AB(r) (pipelining intermediate) */
78: m = ksp->work[3]; /* m = B(w) = B(Az) = B(AB(r)) (pipelining intermediate) */
79: n = ksp->work[4]; /* n = AB(w) = AB(Az) = AB(AB(r)) (pipelining intermediate) */
80: p = pipegcr->pvecs[0];
81: s = pipegcr->svecs[0];
82: q = pipegcr->qvecs[0];
83: t = pipegcr->unroll_w ? pipegcr->tvecs[0] : NULL;
85: redux = pipegcr->redux;
86: dots = pipegcr->dots;
87: etas = pipegcr->etas;
88: betas = dots; /* dots takes the result of all dot products of which the betas are a subset */
90: /* cycle initial residual */
91: KSP_MatMult(ksp, A, x, r);
92: VecAYPX(r, -1.0, b); /* r <- b - Ax */
93: KSP_PCApply(ksp, r, z); /* z <- B(r) */
94: KSP_MatMult(ksp, A, z, w); /* w <- Az */
96: /* initialization of other variables and pipelining intermediates */
97: VecCopy(z, p);
98: KSP_MatMult(ksp, A, p, s);
100: /* overlap initial computation of delta, gamma */
101: redux[0] = w;
102: redux[1] = r;
103: VecMDotBegin(w, 2, redux, dots); /* Start split reductions for gamma = (w,r), delta = (w,w) */
104: PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)s)); /* perform asynchronous reduction */
105: KSP_PCApply(ksp, s, q); /* q = B(s) */
106: if (pipegcr->unroll_w) { KSP_MatMult(ksp, A, q, t); /* t = Aq */ }
107: VecMDotEnd(w, 2, redux, dots); /* Finish split reduction */
108: delta = PetscRealPart(dots[0]);
109: etas[0] = delta;
110: gamma = dots[1];
111: alpha = gamma / delta;
113: i = 0;
114: do {
115: PetscObjectSAWsTakeAccess((PetscObject)ksp);
116: ksp->its++;
117: PetscObjectSAWsGrantAccess((PetscObject)ksp);
119: /* update solution, residuals, .. */
120: VecAXPY(x, +alpha, p);
121: VecAXPY(r, -alpha, s);
122: VecAXPY(z, -alpha, q);
123: if (pipegcr->unroll_w) {
124: VecAXPY(w, -alpha, t);
125: } else {
126: KSP_MatMult(ksp, A, z, w);
127: }
129: /* Computations of current iteration done */
130: i++;
132: if (pipegcr->modifypc) (*pipegcr->modifypc)(ksp, ksp->its, ksp->rnorm, pipegcr->modifypc_ctx);
134: /* If needbe, allocate a new chunk of vectors */
135: KSPAllocateVectors_PIPEGCR(ksp, i + 1, pipegcr->vecb);
137: /* Note that we wrap around and start clobbering old vectors */
138: idx = i % (pipegcr->mmax + 1);
139: p = pipegcr->pvecs[idx];
140: s = pipegcr->svecs[idx];
141: q = pipegcr->qvecs[idx];
142: if (pipegcr->unroll_w) t = pipegcr->tvecs[idx];
143: eta = pipegcr->etas + idx;
145: /* number of old directions to orthogonalize against */
146: switch (pipegcr->truncstrat) {
147: case KSP_FCD_TRUNC_TYPE_STANDARD:
148: mi = pipegcr->mmax;
149: break;
150: case KSP_FCD_TRUNC_TYPE_NOTAY:
151: mi = ((i - 1) % pipegcr->mmax) + 1;
152: break;
153: default:
154: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unrecognized Truncation Strategy");
155: }
157: /* Pick old p,s,q,zeta in a way suitable for VecMDot */
158: for (k = PetscMax(0, i - mi), j = 0; k < i; j++, k++) {
159: kdx = k % (pipegcr->mmax + 1);
160: pipegcr->pold[j] = pipegcr->pvecs[kdx];
161: pipegcr->sold[j] = pipegcr->svecs[kdx];
162: pipegcr->qold[j] = pipegcr->qvecs[kdx];
163: if (pipegcr->unroll_w) pipegcr->told[j] = pipegcr->tvecs[kdx];
164: redux[j] = pipegcr->svecs[kdx];
165: }
166: /* If the above loop is not run redux contains only r and w => all beta_k = 0, only gamma, delta != 0 */
167: redux[j] = r;
168: redux[j + 1] = w;
170: /* Dot products */
171: /* Start split reductions for beta_k = (w,s_k), gamma = (w,r), delta = (w,w) */
172: VecMDotBegin(w, j + 2, redux, dots);
173: PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)w));
175: /* B(w-r) + u stabilization */
176: VecWAXPY(n, -1.0, r, w); /* m = u + B(w-r): (a) ntmp = w-r */
177: KSP_PCApply(ksp, n, m); /* m = u + B(w-r): (b) mtmp = B(ntmp) = B(w-r) */
178: VecAXPY(m, 1.0, z); /* m = u + B(w-r): (c) m = z + mtmp */
179: if (pipegcr->unroll_w) { KSP_MatMult(ksp, A, m, n); /* n = Am */ }
181: /* Finish split reductions for beta_k = (w,s_k), gamma = (w,r), delta = (w,w) */
182: VecMDotEnd(w, j + 2, redux, dots);
183: gamma = dots[j];
184: delta = PetscRealPart(dots[j + 1]);
186: /* compute new residual norm.
187: this cannot be done before this point so that the natural norm
188: is available for free and the communication involved is overlapped */
189: switch (ksp->normtype) {
190: case KSP_NORM_PRECONDITIONED:
191: VecNorm(z, NORM_2, &rnorm); /* ||r|| <- sqrt(z'*z) */
192: break;
193: case KSP_NORM_UNPRECONDITIONED:
194: VecNorm(r, NORM_2, &rnorm); /* ||r|| <- sqrt(r'*r) */
195: break;
196: case KSP_NORM_NATURAL:
197: rnorm = PetscSqrtReal(PetscAbsScalar(gamma)); /* ||r|| <- sqrt(r,w) */
198: break;
199: case KSP_NORM_NONE:
200: rnorm = 0.0;
201: break;
202: default:
203: SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "%s", KSPNormTypes[ksp->normtype]);
204: }
206: /* Check for convergence */
207: PetscObjectSAWsTakeAccess((PetscObject)ksp);
208: ksp->rnorm = rnorm;
209: PetscObjectSAWsGrantAccess((PetscObject)ksp);
210: KSPLogResidualHistory(ksp, rnorm);
211: KSPMonitor(ksp, ksp->its, rnorm);
212: (*ksp->converged)(ksp, ksp->its, rnorm, &ksp->reason, ksp->cnvP);
213: if (ksp->reason) return 0;
215: /* compute new eta and scale beta */
216: *eta = 0.;
217: for (k = PetscMax(0, i - mi), j = 0; k < i; j++, k++) {
218: kdx = k % (pipegcr->mmax + 1);
219: betas[j] /= -etas[kdx]; /* betak /= etak */
220: *eta -= ((PetscReal)(PetscAbsScalar(betas[j]) * PetscAbsScalar(betas[j]))) * etas[kdx];
221: /* etaitmp = -betaik^2 * etak */
222: }
223: *eta += delta; /* etai = delta -betaik^2 * etak */
225: /* check breakdown of eta = (s,s) */
226: if (*eta < 0.) {
227: pipegcr->norm_breakdown = PETSC_TRUE;
228: PetscInfo(ksp, "Restart due to square root breakdown at it = %" PetscInt_FMT "\n", ksp->its);
229: break;
230: } else {
231: alpha = gamma / (*eta); /* alpha = gamma/etai */
232: }
234: /* project out stored search directions using classical G-S */
235: VecCopy(z, p);
236: VecCopy(w, s);
237: VecCopy(m, q);
238: if (pipegcr->unroll_w) {
239: VecCopy(n, t);
240: VecMAXPY(t, j, betas, pipegcr->told); /* ti <- n - sum_k beta_k t_k */
241: }
242: VecMAXPY(p, j, betas, pipegcr->pold); /* pi <- ui - sum_k beta_k p_k */
243: VecMAXPY(s, j, betas, pipegcr->sold); /* si <- wi - sum_k beta_k s_k */
244: VecMAXPY(q, j, betas, pipegcr->qold); /* qi <- m - sum_k beta_k q_k */
246: } while (ksp->its < ksp->max_it);
247: if (ksp->its >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
248: return 0;
249: }
251: static PetscErrorCode KSPSolve_PIPEGCR(KSP ksp)
252: {
253: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
254: Mat A, B;
255: Vec x, b, r, z, w;
256: PetscScalar gamma;
257: PetscReal rnorm = 0.0;
258: PetscBool issym;
260: PetscCitationsRegister(citation, &cited);
262: KSPGetOperators(ksp, &A, &B);
263: x = ksp->vec_sol;
264: b = ksp->vec_rhs;
265: r = ksp->work[0];
266: z = ksp->work[1];
267: w = ksp->work[2]; /* w = Az = AB(r) (pipelining intermediate) */
269: /* compute initial residual */
270: if (!ksp->guess_zero) {
271: KSP_MatMult(ksp, A, x, r);
272: VecAYPX(r, -1.0, b); /* r <- b - Ax */
273: } else {
274: VecCopy(b, r); /* r <- b */
275: }
277: /* initial residual norm */
278: KSP_PCApply(ksp, r, z); /* z <- B(r) */
279: KSP_MatMult(ksp, A, z, w); /* w <- Az */
280: VecDot(r, w, &gamma); /* gamma = (r,w) */
282: switch (ksp->normtype) {
283: case KSP_NORM_PRECONDITIONED:
284: VecNorm(z, NORM_2, &rnorm); /* ||r|| <- sqrt(z'*z) */
285: break;
286: case KSP_NORM_UNPRECONDITIONED:
287: VecNorm(r, NORM_2, &rnorm); /* ||r|| <- sqrt(r'*r) */
288: break;
289: case KSP_NORM_NATURAL:
290: rnorm = PetscSqrtReal(PetscAbsScalar(gamma)); /* ||r|| <- sqrt(r,w) */
291: break;
292: case KSP_NORM_NONE:
293: rnorm = 0.0;
294: break;
295: default:
296: SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "%s", KSPNormTypes[ksp->normtype]);
297: }
299: /* Is A symmetric? */
300: PetscObjectTypeCompareAny((PetscObject)A, &issym, MATSBAIJ, MATSEQSBAIJ, MATMPISBAIJ, "");
301: if (!issym) PetscInfo(A, "Matrix type is not any of MATSBAIJ,MATSEQSBAIJ,MATMPISBAIJ. Is matrix A symmetric (as required by CR methods)?");
303: /* logging */
304: PetscObjectSAWsTakeAccess((PetscObject)ksp);
305: ksp->its = 0;
306: ksp->rnorm0 = rnorm;
307: PetscObjectSAWsGrantAccess((PetscObject)ksp);
308: KSPLogResidualHistory(ksp, ksp->rnorm0);
309: KSPMonitor(ksp, ksp->its, ksp->rnorm0);
310: (*ksp->converged)(ksp, ksp->its, ksp->rnorm0, &ksp->reason, ksp->cnvP);
311: if (ksp->reason) return 0;
313: do {
314: KSPSolve_PIPEGCR_cycle(ksp);
315: if (ksp->reason) return 0;
316: if (pipegcr->norm_breakdown) {
317: pipegcr->n_restarts++;
318: pipegcr->norm_breakdown = PETSC_FALSE;
319: }
320: } while (ksp->its < ksp->max_it);
322: if (ksp->its >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
323: return 0;
324: }
326: static PetscErrorCode KSPView_PIPEGCR(KSP ksp, PetscViewer viewer)
327: {
328: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
329: PetscBool isascii, isstring;
330: const char *truncstr;
332: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii);
333: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring);
335: if (pipegcr->truncstrat == KSP_FCD_TRUNC_TYPE_STANDARD) {
336: truncstr = "Using standard truncation strategy";
337: } else if (pipegcr->truncstrat == KSP_FCD_TRUNC_TYPE_NOTAY) {
338: truncstr = "Using Notay's truncation strategy";
339: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Undefined FCD truncation strategy");
341: if (isascii) {
342: PetscViewerASCIIPrintf(viewer, " max previous directions = %" PetscInt_FMT "\n", pipegcr->mmax);
343: PetscViewerASCIIPrintf(viewer, " preallocated %" PetscInt_FMT " directions\n", PetscMin(pipegcr->nprealloc, pipegcr->mmax + 1));
344: PetscViewerASCIIPrintf(viewer, " %s\n", truncstr);
345: PetscViewerASCIIPrintf(viewer, " w unrolling = %s \n", PetscBools[pipegcr->unroll_w]);
346: PetscViewerASCIIPrintf(viewer, " restarts performed = %" PetscInt_FMT " \n", pipegcr->n_restarts);
347: } else if (isstring) {
348: PetscViewerStringSPrintf(viewer, "max previous directions = %" PetscInt_FMT ", preallocated %" PetscInt_FMT " directions, %s truncation strategy", pipegcr->mmax, pipegcr->nprealloc, truncstr);
349: }
350: return 0;
351: }
353: static PetscErrorCode KSPSetUp_PIPEGCR(KSP ksp)
354: {
355: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
356: Mat A;
357: PetscBool diagonalscale;
358: const PetscInt nworkstd = 5;
360: PCGetDiagonalScale(ksp->pc, &diagonalscale);
363: KSPGetOperators(ksp, &A, NULL);
365: /* Allocate "standard" work vectors */
366: KSPSetWorkVecs(ksp, nworkstd);
368: /* Allocated space for pointers to additional work vectors
369: note that mmax is the number of previous directions, so we add 1 for the current direction */
370: PetscMalloc6(pipegcr->mmax + 1, &(pipegcr->pvecs), pipegcr->mmax + 1, &(pipegcr->ppvecs), pipegcr->mmax + 1, &(pipegcr->svecs), pipegcr->mmax + 1, &(pipegcr->psvecs), pipegcr->mmax + 1, &(pipegcr->qvecs), pipegcr->mmax + 1, &(pipegcr->pqvecs));
371: if (pipegcr->unroll_w) PetscMalloc3(pipegcr->mmax + 1, &(pipegcr->tvecs), pipegcr->mmax + 1, &(pipegcr->ptvecs), pipegcr->mmax + 2, &(pipegcr->told));
372: PetscMalloc4(pipegcr->mmax + 2, &(pipegcr->pold), pipegcr->mmax + 2, &(pipegcr->sold), pipegcr->mmax + 2, &(pipegcr->qold), pipegcr->mmax + 2, &(pipegcr->chunksizes));
373: PetscMalloc3(pipegcr->mmax + 2, &(pipegcr->dots), pipegcr->mmax + 1, &(pipegcr->etas), pipegcr->mmax + 2, &(pipegcr->redux));
374: /* If the requested number of preallocated vectors is greater than mmax reduce nprealloc */
375: if (pipegcr->nprealloc > pipegcr->mmax + 1) PetscInfo(NULL, "Requested nprealloc=%" PetscInt_FMT " is greater than m_max+1=%" PetscInt_FMT ". Resetting nprealloc = m_max+1.\n", pipegcr->nprealloc, pipegcr->mmax + 1);
377: /* Preallocate additional work vectors */
378: KSPAllocateVectors_PIPEGCR(ksp, pipegcr->nprealloc, pipegcr->nprealloc);
379: return 0;
380: }
382: static PetscErrorCode KSPReset_PIPEGCR(KSP ksp)
383: {
384: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
386: if (pipegcr->modifypc_destroy) (*pipegcr->modifypc_destroy)(pipegcr->modifypc_ctx);
387: return 0;
388: }
390: static PetscErrorCode KSPDestroy_PIPEGCR(KSP ksp)
391: {
392: PetscInt i;
393: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
395: VecDestroyVecs(ksp->nwork, &ksp->work); /* Destroy "standard" work vecs */
397: /* Destroy vectors for old directions and the arrays that manage pointers to them */
398: if (pipegcr->nvecs) {
399: for (i = 0; i < pipegcr->nchunks; i++) {
400: VecDestroyVecs(pipegcr->chunksizes[i], &pipegcr->ppvecs[i]);
401: VecDestroyVecs(pipegcr->chunksizes[i], &pipegcr->psvecs[i]);
402: VecDestroyVecs(pipegcr->chunksizes[i], &pipegcr->pqvecs[i]);
403: if (pipegcr->unroll_w) VecDestroyVecs(pipegcr->chunksizes[i], &pipegcr->ptvecs[i]);
404: }
405: }
407: PetscFree6(pipegcr->pvecs, pipegcr->ppvecs, pipegcr->svecs, pipegcr->psvecs, pipegcr->qvecs, pipegcr->pqvecs);
408: PetscFree4(pipegcr->pold, pipegcr->sold, pipegcr->qold, pipegcr->chunksizes);
409: PetscFree3(pipegcr->dots, pipegcr->etas, pipegcr->redux);
410: if (pipegcr->unroll_w) PetscFree3(pipegcr->tvecs, pipegcr->ptvecs, pipegcr->told);
412: KSPReset_PIPEGCR(ksp);
413: PetscObjectComposeFunction((PetscObject)ksp, "KSPPIPEGCRSetModifyPC_C", NULL);
414: KSPDestroyDefault(ksp);
415: return 0;
416: }
418: /*@
419: KSPPIPEGCRSetUnrollW - Set to PETSC_TRUE to use PIPEGCR with unrolling of the w vector
421: Logically Collective on ksp
423: Input Parameters:
424: + ksp - the Krylov space context
425: - unroll_w - use unrolling
427: Level: intermediate
429: Options Database:
430: . -ksp_pipegcr_unroll_w <bool> - use unrolling
432: .seealso: `KSPPIPEGCR`, `KSPPIPEGCRSetTruncationType()`, `KSPPIPEGCRSetNprealloc()`, `KSPPIPEGCRGetUnrollW()`
433: @*/
434: PetscErrorCode KSPPIPEGCRSetUnrollW(KSP ksp, PetscBool unroll_w)
435: {
436: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
440: pipegcr->unroll_w = unroll_w;
441: return 0;
442: }
444: /*@
445: KSPPIPEGCRGetUnrollW - Get information on PIPEGCR unrolling the w vector
447: Logically Collective on ksp
449: Input Parameter:
450: . ksp - the Krylov space context
452: Output Parameter:
453: . unroll_w - PIPEGCR uses unrolling (bool)
455: Level: intermediate
457: .seealso: `KSPPIPEGCR`, `KSPPIPEGCRGetTruncationType()`, `KSPPIPEGCRGetNprealloc()`, `KSPPIPEGCRSetUnrollW()`
458: @*/
459: PetscErrorCode KSPPIPEGCRGetUnrollW(KSP ksp, PetscBool *unroll_w)
460: {
461: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
464: *unroll_w = pipegcr->unroll_w;
465: return 0;
466: }
468: /*@
469: KSPPIPEGCRSetMmax - set the maximum number of previous directions PIPEGCR will store for orthogonalization
471: Note: mmax + 1 directions are stored (mmax previous ones along with a current one)
472: and whether all are used in each iteration also depends on the truncation strategy
473: (see KSPPIPEGCRSetTruncationType)
475: Logically Collective on ksp
477: Input Parameters:
478: + ksp - the Krylov space context
479: - mmax - the maximum number of previous directions to orthogonalize againt
481: Level: intermediate
483: Options Database:
484: . -ksp_pipegcr_mmax <N> - maximum number of previous directions
486: .seealso: `KSPPIPEGCR`, `KSPPIPEGCRSetTruncationType()`, `KSPPIPEGCRSetNprealloc()`
487: @*/
488: PetscErrorCode KSPPIPEGCRSetMmax(KSP ksp, PetscInt mmax)
489: {
490: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
494: pipegcr->mmax = mmax;
495: return 0;
496: }
498: /*@
499: KSPPIPEGCRGetMmax - get the maximum number of previous directions PIPEGCR will store
501: Note: PIPEGCR stores mmax+1 directions at most (mmax previous ones, and one current one)
503: Not Collective
505: Input Parameter:
506: . ksp - the Krylov space context
508: Output Parameter:
509: . mmax - the maximum number of previous directions allowed for orthogonalization
511: Level: intermediate
513: .seealso: `KSPPIPEGCR`, `KSPPIPEGCRGetTruncationType()`, `KSPPIPEGCRGetNprealloc()`, `KSPPIPEGCRSetMmax()`
514: @*/
516: PetscErrorCode KSPPIPEGCRGetMmax(KSP ksp, PetscInt *mmax)
517: {
518: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
521: *mmax = pipegcr->mmax;
522: return 0;
523: }
525: /*@
526: KSPPIPEGCRSetNprealloc - set the number of directions to preallocate with PIPEGCR
528: Logically Collective on ksp
530: Input Parameters:
531: + ksp - the Krylov space context
532: - nprealloc - the number of vectors to preallocate
534: Level: advanced
536: Options Database:
537: . -ksp_pipegcr_nprealloc <N> - number of vectors to preallocate
539: .seealso: `KSPPIPEGCR`, `KSPPIPEGCRGetTruncationType()`, `KSPPIPEGCRGetNprealloc()`
540: @*/
541: PetscErrorCode KSPPIPEGCRSetNprealloc(KSP ksp, PetscInt nprealloc)
542: {
543: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
547: pipegcr->nprealloc = nprealloc;
548: return 0;
549: }
551: /*@
552: KSPPIPEGCRGetNprealloc - get the number of directions preallocate by PIPEGCR
554: Not Collective
556: Input Parameter:
557: . ksp - the Krylov space context
559: Output Parameter:
560: . nprealloc - the number of directions preallocated
562: Level: advanced
564: .seealso: `KSPPIPEGCR`, `KSPPIPEGCRGetTruncationType()`, `KSPPIPEGCRSetNprealloc()`
565: @*/
566: PetscErrorCode KSPPIPEGCRGetNprealloc(KSP ksp, PetscInt *nprealloc)
567: {
568: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
571: *nprealloc = pipegcr->nprealloc;
572: return 0;
573: }
575: /*@
576: KSPPIPEGCRSetTruncationType - specify how many of its stored previous directions PIPEGCR uses during orthoganalization
578: Logically Collective on ksp
580: KSP_FCD_TRUNC_TYPE_STANDARD uses all (up to mmax) stored directions
581: KSP_FCD_TRUNC_TYPE_NOTAY uses the last max(1,mod(i,mmax)) directions at iteration i=0,1,..
583: Input Parameters:
584: + ksp - the Krylov space context
585: - truncstrat - the choice of strategy
587: Level: intermediate
589: Options Database:
590: . -ksp_pipegcr_truncation_type <standard,notay> - which stored basis vectors to orthogonalize against
592: .seealso: `KSPPIPEGCR`, `KSPPIPEGCRSetTruncationType`, `KSPPIPEGCRTruncationType`, `KSPFCDTruncationType`
593: @*/
594: PetscErrorCode KSPPIPEGCRSetTruncationType(KSP ksp, KSPFCDTruncationType truncstrat)
595: {
596: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
600: pipegcr->truncstrat = truncstrat;
601: return 0;
602: }
604: /*@
605: KSPPIPEGCRGetTruncationType - get the truncation strategy employed by PIPEGCR
607: Not Collective
609: KSP_FCD_TRUNC_TYPE_STANDARD uses all (up to mmax) stored directions
610: KSP_FCD_TRUNC_TYPE_NOTAY uses the last max(1,mod(i,mmax)) directions at iteration i=0,1,..
612: Input Parameter:
613: . ksp - the Krylov space context
615: Output Parameter:
616: . truncstrat - the strategy type
618: Options Database:
619: . -ksp_pipegcr_truncation_type <standard,notay> - which stored basis vectors to orthogonalize against
621: Level: intermediate
623: .seealso: `KSPPIPEGCR`, `KSPPIPEGCRSetTruncationType`, `KSPPIPEGCRTruncationType`, `KSPFCDTruncationType`
624: @*/
625: PetscErrorCode KSPPIPEGCRGetTruncationType(KSP ksp, KSPFCDTruncationType *truncstrat)
626: {
627: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
630: *truncstrat = pipegcr->truncstrat;
631: return 0;
632: }
634: static PetscErrorCode KSPSetFromOptions_PIPEGCR(KSP ksp, PetscOptionItems *PetscOptionsObject)
635: {
636: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
637: PetscInt mmax, nprealloc;
638: PetscBool flg;
640: PetscOptionsHeadBegin(PetscOptionsObject, "KSP PIPEGCR options");
641: PetscOptionsInt("-ksp_pipegcr_mmax", "Number of search directions to storue", "KSPPIPEGCRSetMmax", pipegcr->mmax, &mmax, &flg);
642: if (flg) KSPPIPEGCRSetMmax(ksp, mmax);
643: PetscOptionsInt("-ksp_pipegcr_nprealloc", "Number of directions to preallocate", "KSPPIPEGCRSetNprealloc", pipegcr->nprealloc, &nprealloc, &flg);
644: if (flg) KSPPIPEGCRSetNprealloc(ksp, nprealloc);
645: PetscOptionsEnum("-ksp_pipegcr_truncation_type", "Truncation approach for directions", "KSPFCGSetTruncationType", KSPFCDTruncationTypes, (PetscEnum)pipegcr->truncstrat, (PetscEnum *)&pipegcr->truncstrat, NULL);
646: PetscOptionsBool("-ksp_pipegcr_unroll_w", "Use unrolling of w", "KSPPIPEGCRSetUnrollW", pipegcr->unroll_w, &pipegcr->unroll_w, NULL);
647: PetscOptionsHeadEnd();
648: return 0;
649: }
652: typedef PetscErrorCode (*KSPPIPEGCRModifyPCFunction)(KSP, PetscInt, PetscReal, void *);
653: typedef PetscErrorCode (*KSPPIPEGCRDestroyFunction)(void *);
655: static PetscErrorCode KSPPIPEGCRSetModifyPC_PIPEGCR(KSP ksp, KSPPIPEGCRModifyPCFunction function, void *data, KSPPIPEGCRDestroyFunction destroy)
656: {
657: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
660: pipegcr->modifypc = function;
661: pipegcr->modifypc_destroy = destroy;
662: pipegcr->modifypc_ctx = data;
663: return 0;
664: }
666: /*@C
667: KSPPIPEGCRSetModifyPC - Sets the routine used by PIPEGCR to modify the preconditioner.
669: Logically Collective on ksp
671: Input Parameters:
672: + ksp - iterative context obtained from KSPCreate()
673: . function - user defined function to modify the preconditioner
674: . ctx - user provided context for the modify preconditioner function
675: - destroy - the function to use to destroy the user provided application context.
677: Calling Sequence of function:
678: PetscErrorCode function (KSP ksp, PetscInt n, PetscReal rnorm, void *ctx)
680: ksp - iterative context
681: n - the total number of PIPEGCR iterations that have occurred
682: rnorm - 2-norm residual value
683: ctx - the user provided application context
685: Level: intermediate
687: Notes:
688: The default modifypc routine is KSPPIPEGCRModifyPCNoChange()
690: .seealso: `KSPPIPEGCRModifyPCNoChange()`
692: @*/
693: PetscErrorCode KSPPIPEGCRSetModifyPC(KSP ksp, PetscErrorCode (*function)(KSP, PetscInt, PetscReal, void *), void *data, PetscErrorCode (*destroy)(void *))
694: {
695: PetscUseMethod(ksp, "KSPPIPEGCRSetModifyPC_C", (KSP, PetscErrorCode(*)(KSP, PetscInt, PetscReal, void *), void *data, PetscErrorCode (*)(void *)), (ksp, function, data, destroy));
696: return 0;
697: }
699: /*MC
700: KSPPIPEGCR - Implements a Pipelined Generalized Conjugate Residual method.
702: Options Database Keys:
703: + -ksp_pipegcr_mmax <N> - the max number of Krylov directions to orthogonalize against
704: . -ksp_pipegcr_unroll_w - unroll w at the storage cost of a maximum of (mmax+1) extra vectors with the benefit of better pipelining (default: PETSC_TRUE)
705: . -ksp_pipegcr_nprealloc <N> - the number of vectors to preallocated for storing Krylov directions. Once exhausted new directions are allocated blockwise (default: 5)
706: - -ksp_pipegcr_truncation_type <standard,notay> - which previous search directions to orthogonalize against
708: Notes:
709: The PIPEGCR Krylov method supports non-symmetric matrices and permits the use of a preconditioner
710: which may vary from one iteration to the next. Users can can define a method to vary the
711: preconditioner between iterates via KSPPIPEGCRSetModifyPC().
712: Restarts are solves with x0 not equal to zero. When a restart occurs, the initial starting
713: solution is given by the current estimate for x which was obtained by the last restart
714: iterations of the PIPEGCR algorithm.
715: The method implemented requires at most the storage of 4 x mmax + 5 vectors, roughly twice as much as GCR.
717: Only supports left preconditioning.
719: The natural "norm" for this method is (u,Au), where u is the preconditioned residual. This norm is available at no additional computational cost, as with standard CG. Choosing preconditioned or unpreconditioned norm types involves a blocking reduction which prevents any benefit from pipelining.
721: Reference:
722: P. Sanan, S.M. Schnepp, and D.A. May,
723: "Pipelined, Flexible Krylov Subspace Methods,"
724: SIAM Journal on Scientific Computing 2016 38:5, C441-C470,
725: DOI: 10.1137/15M1049130
727: Level: intermediate
729: .seealso: `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`,
730: `KSPPIPEFGMRES`, `KSPPIPECG`, `KSPPIPECR`, `KSPPIPEFCG`, `KSPPIPEGCRSetTruncationType()`, `KSPPIPEGCRSetNprealloc()`, `KSPPIPEGCRSetUnrollW()`, `KSPPIPEGCRSetMmax()`
732: M*/
733: PETSC_EXTERN PetscErrorCode KSPCreate_PIPEGCR(KSP ksp)
734: {
735: KSP_PIPEGCR *pipegcr;
737: PetscNew(&pipegcr);
738: pipegcr->mmax = KSPPIPEGCR_DEFAULT_MMAX;
739: pipegcr->nprealloc = KSPPIPEGCR_DEFAULT_NPREALLOC;
740: pipegcr->nvecs = 0;
741: pipegcr->vecb = KSPPIPEGCR_DEFAULT_VECB;
742: pipegcr->nchunks = 0;
743: pipegcr->truncstrat = KSPPIPEGCR_DEFAULT_TRUNCSTRAT;
744: pipegcr->n_restarts = 0;
745: pipegcr->unroll_w = KSPPIPEGCR_DEFAULT_UNROLL_W;
747: ksp->data = (void *)pipegcr;
749: /* natural norm is for free, precond+unprecond norm require non-overlapped reduction */
750: KSPSetSupportedNorm(ksp, KSP_NORM_NATURAL, PC_LEFT, 2);
751: KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 1);
752: KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_LEFT, 1);
753: KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_LEFT, 1);
755: ksp->ops->setup = KSPSetUp_PIPEGCR;
756: ksp->ops->solve = KSPSolve_PIPEGCR;
757: ksp->ops->reset = KSPReset_PIPEGCR;
758: ksp->ops->destroy = KSPDestroy_PIPEGCR;
759: ksp->ops->view = KSPView_PIPEGCR;
760: ksp->ops->setfromoptions = KSPSetFromOptions_PIPEGCR;
761: ksp->ops->buildsolution = KSPBuildSolutionDefault;
762: ksp->ops->buildresidual = KSPBuildResidualDefault;
764: PetscObjectComposeFunction((PetscObject)ksp, "KSPPIPEGCRSetModifyPC_C", KSPPIPEGCRSetModifyPC_PIPEGCR);
765: return 0;
766: }