Actual source code: dgmres.c
petsc-3.6.4 2016-04-12
2: /*
3: This file implements the deflated GMRES.
4: References:
5: [1]J. Erhel, K. Burrage and B. Pohl, Restarted GMRES preconditioned by deflation,J. Computational and Applied Mathematics, 69(1996), 303-318.
6: [2] D. NUENTSA WAKAM and F. PACULL, Memory Efficient Hybrid Algebraic Solvers for Linear Systems Arising from Compressible Flows, Computers and Fluids, In Press, http://dx.doi.org/10.1016/j.compfluid.2012.03.023
8: */
10: #include <../src/ksp/ksp/impls/gmres/dgmres/dgmresimpl.h> /*I "petscksp.h" I*/
12: PetscLogEvent KSP_DGMRESComputeDeflationData, KSP_DGMRESApplyDeflation;
14: #define GMRES_DELTA_DIRECTIONS 10
15: #define GMRES_DEFAULT_MAXK 30
16: static PetscErrorCode KSPDGMRESGetNewVectors(KSP,PetscInt);
17: static PetscErrorCode KSPDGMRESUpdateHessenberg(KSP,PetscInt,PetscBool,PetscReal*);
18: static PetscErrorCode KSPDGMRESBuildSoln(PetscScalar*,Vec,Vec,KSP,PetscInt);
22: PetscErrorCode KSPDGMRESSetEigen(KSP ksp,PetscInt nb_eig)
23: {
27: PetscTryMethod((ksp),"KSPDGMRESSetEigen_C",(KSP,PetscInt),(ksp,nb_eig));
28: return(0);
29: }
32: PetscErrorCode KSPDGMRESSetMaxEigen(KSP ksp,PetscInt max_neig)
33: {
37: PetscTryMethod((ksp),"KSPDGMRESSetMaxEigen_C",(KSP,PetscInt),(ksp,max_neig));
38: return(0);
39: }
42: PetscErrorCode KSPDGMRESForce(KSP ksp,PetscBool force)
43: {
47: PetscTryMethod((ksp),"KSPDGMRESForce_C",(KSP,PetscBool),(ksp,force));
48: return(0);
49: }
52: PetscErrorCode KSPDGMRESSetRatio(KSP ksp,PetscReal ratio)
53: {
57: PetscTryMethod((ksp),"KSPDGMRESSetRatio_C",(KSP,PetscReal),(ksp,ratio));
58: return(0);
59: }
62: PetscErrorCode KSPDGMRESComputeSchurForm(KSP ksp,PetscInt *neig)
63: {
67: PetscTryMethod((ksp),"KSPDGMRESComputeSchurForm_C",(KSP, PetscInt*),(ksp, neig));
68: return(0);
69: }
72: PetscErrorCode KSPDGMRESComputeDeflationData(KSP ksp)
73: {
77: PetscTryMethod((ksp),"KSPDGMRESComputeDeflationData_C",(KSP),(ksp));
78: return(0);
79: }
82: PetscErrorCode KSPDGMRESApplyDeflation(KSP ksp, Vec x, Vec y)
83: {
87: PetscTryMethod((ksp),"KSPDGMRESApplyDeflation_C",(KSP, Vec, Vec),(ksp, x, y));
88: return(0);
89: }
93: PetscErrorCode KSPDGMRESImproveEig(KSP ksp, PetscInt neig)
94: {
98: PetscTryMethod((ksp), "KSPDGMRESImproveEig_C",(KSP, PetscInt),(ksp, neig));
99: return(0);
100: }
104: PetscErrorCode KSPSetUp_DGMRES(KSP ksp)
105: {
107: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
108: PetscInt neig = dgmres->neig+EIG_OFFSET;
109: PetscInt max_k = dgmres->max_k+1;
112: KSPSetUp_GMRES(ksp);
113: if (!dgmres->neig) return(0);
115: /* Allocate workspace for the Schur vectors*/
116: PetscMalloc1(neig*max_k, &SR);
117: dgmres->wr = NULL;
118: dgmres->wi = NULL;
119: dgmres->perm = NULL;
120: dgmres->modul = NULL;
121: dgmres->Q = NULL;
122: dgmres->Z = NULL;
124: UU = NULL;
125: XX = NULL;
126: MX = NULL;
127: AUU = NULL;
128: XMX = NULL;
129: XMU = NULL;
130: UMX = NULL;
131: AUAU = NULL;
132: TT = NULL;
133: TTF = NULL;
134: INVP = NULL;
135: X1 = NULL;
136: X2 = NULL;
137: MU = NULL;
138: return(0);
139: }
141: /*
142: Run GMRES, possibly with restart. Return residual history if requested.
143: input parameters:
145: . gmres - structure containing parameters and work areas
147: output parameters:
148: . nres - residuals (from preconditioned system) at each step.
149: If restarting, consider passing nres+it. If null,
150: ignored
151: . itcount - number of iterations used. nres[0] to nres[itcount]
152: are defined. If null, ignored.
154: Notes:
155: On entry, the value in vector VEC_VV(0) should be the initial residual
156: (this allows shortcuts where the initial preconditioned residual is 0).
157: */
160: PetscErrorCode KSPDGMRESCycle(PetscInt *itcount,KSP ksp)
161: {
162: KSP_DGMRES *dgmres = (KSP_DGMRES*)(ksp->data);
163: PetscReal res_norm,res,hapbnd,tt;
165: PetscInt it = 0;
166: PetscInt max_k = dgmres->max_k;
167: PetscBool hapend = PETSC_FALSE;
168: PetscReal res_old;
169: PetscInt test = 0;
172: VecNormalize(VEC_VV(0),&res_norm);
173: res = res_norm;
174: *GRS(0) = res_norm;
176: /* check for the convergence */
177: PetscObjectSAWsTakeAccess((PetscObject)ksp);
178: ksp->rnorm = res;
179: PetscObjectSAWsGrantAccess((PetscObject)ksp);
180: dgmres->it = (it - 1);
181: KSPLogResidualHistory(ksp,res);
182: KSPMonitor(ksp,ksp->its,res);
183: if (!res) {
184: if (itcount) *itcount = 0;
185: ksp->reason = KSP_CONVERGED_ATOL;
186: PetscInfo(ksp,"Converged due to zero residual norm on entry\n");
187: return(0);
188: }
189: /* record the residual norm to test if deflation is needed */
190: res_old = res;
192: (*ksp->converged)(ksp,ksp->its,res,&ksp->reason,ksp->cnvP);
193: while (!ksp->reason && it < max_k && ksp->its < ksp->max_it) {
194: if (it) {
195: KSPLogResidualHistory(ksp,res);
196: KSPMonitor(ksp,ksp->its,res);
197: }
198: dgmres->it = (it - 1);
199: if (dgmres->vv_allocated <= it + VEC_OFFSET + 1) {
200: KSPDGMRESGetNewVectors(ksp,it+1);
201: }
202: if (dgmres->r > 0) {
203: if (ksp->pc_side == PC_LEFT) {
204: /* Apply the first preconditioner */
205: KSP_PCApplyBAorAB(ksp,VEC_VV(it), VEC_TEMP,VEC_TEMP_MATOP);
206: /* Then apply Deflation as a preconditioner */
207: KSPDGMRESApplyDeflation(ksp, VEC_TEMP, VEC_VV(1+it));
208: } else if (ksp->pc_side == PC_RIGHT) {
209: KSPDGMRESApplyDeflation(ksp, VEC_VV(it), VEC_TEMP);
210: KSP_PCApplyBAorAB(ksp, VEC_TEMP, VEC_VV(1+it), VEC_TEMP_MATOP);
211: }
212: } else {
213: KSP_PCApplyBAorAB(ksp,VEC_VV(it),VEC_VV(1+it),VEC_TEMP_MATOP);
214: }
215: dgmres->matvecs += 1;
216: /* update hessenberg matrix and do Gram-Schmidt */
217: (*dgmres->orthog)(ksp,it);
219: /* vv(i+1) . vv(i+1) */
220: VecNormalize(VEC_VV(it+1),&tt);
221: /* save the magnitude */
222: *HH(it+1,it) = tt;
223: *HES(it+1,it) = tt;
225: /* check for the happy breakdown */
226: hapbnd = PetscAbsScalar(tt / *GRS(it));
227: if (hapbnd > dgmres->haptol) hapbnd = dgmres->haptol;
228: if (tt < hapbnd) {
229: PetscInfo2(ksp,"Detected happy breakdown, current hapbnd = %g tt = %g\n",(double)hapbnd,(double)tt);
230: hapend = PETSC_TRUE;
231: }
232: KSPDGMRESUpdateHessenberg(ksp,it,hapend,&res);
234: it++;
235: dgmres->it = (it-1); /* For converged */
236: ksp->its++;
237: ksp->rnorm = res;
238: if (ksp->reason) break;
240: (*ksp->converged)(ksp,ksp->its,res,&ksp->reason,ksp->cnvP);
242: /* Catch error in happy breakdown and signal convergence and break from loop */
243: if (hapend) {
244: if (!ksp->reason) {
245: if (ksp->errorifnotconverged) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"You reached the happy break down, but convergence was not indicated. Residual norm = %g",(double)res);
246: else {
247: ksp->reason = KSP_DIVERGED_BREAKDOWN;
248: break;
249: }
250: }
251: }
252: }
254: /* Monitor if we know that we will not return for a restart */
255: if (it && (ksp->reason || ksp->its >= ksp->max_it)) {
256: KSPLogResidualHistory(ksp,res);
257: KSPMonitor(ksp,ksp->its,res);
258: }
259: if (itcount) *itcount = it;
261: /*
262: Down here we have to solve for the "best" coefficients of the Krylov
263: columns, add the solution values together, and possibly unwind the
264: preconditioning from the solution
265: */
266: /* Form the solution (or the solution so far) */
267: KSPDGMRESBuildSoln(GRS(0),ksp->vec_sol,ksp->vec_sol,ksp,it-1);
269: /* Compute data for the deflation to be used during the next restart */
270: if (!ksp->reason && ksp->its < ksp->max_it) {
271: test = max_k *PetscLogReal(ksp->rtol/res) /PetscLogReal(res/res_old);
272: /* Compute data for the deflation if the residual rtol will not be reached in the remaining number of steps allowed */
273: if ((test > dgmres->smv*(ksp->max_it-ksp->its)) || dgmres->force) {
274: KSPDGMRESComputeDeflationData(ksp);
275: }
276: }
277: return(0);
278: }
282: PetscErrorCode KSPSolve_DGMRES(KSP ksp)
283: {
285: PetscInt i,its,itcount;
286: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
287: PetscBool guess_zero = ksp->guess_zero;
290: if (ksp->calc_sings && !dgmres->Rsvd) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ORDER,"Must call KSPSetComputeSingularValues() before KSPSetUp() is called");
292: PetscObjectSAWsTakeAccess((PetscObject)ksp);
293: ksp->its = 0;
294: dgmres->matvecs = 0;
295: PetscObjectSAWsGrantAccess((PetscObject)ksp);
297: itcount = 0;
298: ksp->reason = KSP_CONVERGED_ITERATING;
299: while (!ksp->reason) {
300: KSPInitialResidual(ksp,ksp->vec_sol,VEC_TEMP,VEC_TEMP_MATOP,VEC_VV(0),ksp->vec_rhs);
301: if (ksp->pc_side == PC_LEFT) {
302: dgmres->matvecs += 1;
303: if (dgmres->r > 0) {
304: KSPDGMRESApplyDeflation(ksp, VEC_VV(0), VEC_TEMP);
305: VecCopy(VEC_TEMP, VEC_VV(0));
306: }
307: }
309: KSPDGMRESCycle(&its,ksp);
310: itcount += its;
311: if (itcount >= ksp->max_it) {
312: if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
313: break;
314: }
315: ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */
316: }
317: ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */
319: for (i = 0; i < dgmres->r; i++) {
320: VecViewFromOptions(UU[i],(PetscObject)ksp,"-ksp_dgmres_view_deflation_vecs");
321: }
322: return(0);
323: }
327: PetscErrorCode KSPDestroy_DGMRES(KSP ksp)
328: {
330: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
331: PetscInt neig1 = dgmres->neig+EIG_OFFSET;
332: PetscInt max_neig = dgmres->max_neig;
335: if (dgmres->r) {
336: VecDestroyVecs(max_neig, &UU);
337: VecDestroyVecs(max_neig, &MU);
338: if (XX) {
339: VecDestroyVecs(neig1, &XX);
340: VecDestroyVecs(neig1, &MX);
341: }
343: PetscFree(TT);
344: PetscFree(TTF);
345: PetscFree(INVP);
347: PetscFree(XMX);
348: PetscFree(UMX);
349: PetscFree(XMU);
350: PetscFree(X1);
351: PetscFree(X2);
352: PetscFree(dgmres->work);
353: PetscFree(dgmres->iwork);
354: PetscFree(dgmres->wr);
355: PetscFree(dgmres->wi);
356: PetscFree(dgmres->modul);
357: PetscFree(dgmres->Q);
358: PetscFree(ORTH);
359: PetscFree(AUAU);
360: PetscFree(AUU);
361: PetscFree(SR2);
362: }
363: PetscFree(SR);
364: KSPDestroy_GMRES(ksp);
365: return(0);
366: }
367: /*
368: KSPDGMRESBuildSoln - create the solution from the starting vector and the
369: current iterates.
371: Input parameters:
372: nrs - work area of size it + 1.
373: vs - index of initial guess
374: vdest - index of result. Note that vs may == vdest (replace
375: guess with the solution).
377: This is an internal routine that knows about the GMRES internals.
378: */
381: static PetscErrorCode KSPDGMRESBuildSoln(PetscScalar *nrs,Vec vs,Vec vdest,KSP ksp,PetscInt it)
382: {
383: PetscScalar tt;
385: PetscInt ii,k,j;
386: KSP_DGMRES *dgmres = (KSP_DGMRES*) (ksp->data);
388: /* Solve for solution vector that minimizes the residual */
391: /* If it is < 0, no gmres steps have been performed */
392: if (it < 0) {
393: VecCopy(vs,vdest); /* VecCopy() is smart, exists immediately if vguess == vdest */
394: return(0);
395: }
396: if (*HH(it,it) == 0.0) SETERRQ2(PetscObjectComm((PetscObject)ksp), PETSC_ERR_CONV_FAILED,"Likely your matrix is the zero operator. HH(it,it) is identically zero; it = %D GRS(it) = %g",it,(double)PetscAbsScalar(*GRS(it)));
397: if (*HH(it,it) != 0.0) nrs[it] = *GRS(it) / *HH(it,it);
398: else nrs[it] = 0.0;
400: for (ii=1; ii<=it; ii++) {
401: k = it - ii;
402: tt = *GRS(k);
403: for (j=k+1; j<=it; j++) tt = tt - *HH(k,j) * nrs[j];
404: if (*HH(k,k) == 0.0) SETERRQ2(PetscObjectComm((PetscObject)ksp), PETSC_ERR_CONV_FAILED,"Likely your matrix is singular. HH(k,k) is identically zero; it = %D k = %D",it,k);
405: nrs[k] = tt / *HH(k,k);
406: }
408: /* Accumulate the correction to the solution of the preconditioned problem in TEMP */
409: VecSet(VEC_TEMP,0.0);
410: VecMAXPY(VEC_TEMP,it+1,nrs,&VEC_VV(0));
412: /* Apply deflation */
413: if (ksp->pc_side==PC_RIGHT && dgmres->r > 0) {
414: KSPDGMRESApplyDeflation(ksp, VEC_TEMP, VEC_TEMP_MATOP);
415: VecCopy(VEC_TEMP_MATOP, VEC_TEMP);
416: }
417: KSPUnwindPreconditioner(ksp,VEC_TEMP,VEC_TEMP_MATOP);
419: /* add solution to previous solution */
420: if (vdest != vs) {
421: VecCopy(vs,vdest);
422: }
423: VecAXPY(vdest,1.0,VEC_TEMP);
424: return(0);
425: }
426: /*
427: Do the scalar work for the orthogonalization. Return new residual norm.
428: */
431: static PetscErrorCode KSPDGMRESUpdateHessenberg(KSP ksp,PetscInt it,PetscBool hapend,PetscReal *res)
432: {
433: PetscScalar *hh,*cc,*ss,tt;
434: PetscInt j;
435: KSP_DGMRES *dgmres = (KSP_DGMRES*) (ksp->data);
438: hh = HH(0,it);
439: cc = CC(0);
440: ss = SS(0);
442: /* Apply all the previously computed plane rotations to the new column
443: of the Hessenberg matrix */
444: for (j=1; j<=it; j++) {
445: tt = *hh;
446: *hh = PetscConj(*cc) * tt + *ss * *(hh+1);
447: hh++;
448: *hh = *cc++ * *hh -(*ss++ * tt);
449: }
451: /*
452: compute the new plane rotation, and apply it to:
453: 1) the right-hand-side of the Hessenberg system
454: 2) the new column of the Hessenberg matrix
455: thus obtaining the updated value of the residual
456: */
457: if (!hapend) {
458: tt = PetscSqrtScalar(PetscConj(*hh) * *hh + PetscConj(*(hh+1)) * *(hh+1));
459: if (tt == 0.0) {
460: ksp->reason = KSP_DIVERGED_NULL;
461: return(0);
462: }
463: *cc = *hh / tt;
464: *ss = *(hh+1) / tt;
465: *GRS(it+1) = -(*ss * *GRS(it));
466: *GRS(it) = PetscConj(*cc) * *GRS(it);
467: *hh = PetscConj(*cc) * *hh + *ss * *(hh+1);
468: *res = PetscAbsScalar(*GRS(it+1));
469: } else {
470: /* happy breakdown: HH(it+1, it) = 0, therfore we don't need to apply
471: another rotation matrix (so RH doesn't change). The new residual is
472: always the new sine term times the residual from last time (GRS(it)),
473: but now the new sine rotation would be zero...so the residual should
474: be zero...so we will multiply "zero" by the last residual. This might
475: not be exactly what we want to do here -could just return "zero". */
477: *res = 0.0;
478: }
479: return(0);
480: }
481: /*
482: This routine allocates more work vectors, starting from VEC_VV(it).
483: */
486: static PetscErrorCode KSPDGMRESGetNewVectors(KSP ksp,PetscInt it)
487: {
488: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
490: PetscInt nwork = dgmres->nwork_alloc,k,nalloc;
493: nalloc = PetscMin(ksp->max_it,dgmres->delta_allocate);
494: /* Adjust the number to allocate to make sure that we don't exceed the
495: number of available slots */
496: if (it + VEC_OFFSET + nalloc >= dgmres->vecs_allocated) {
497: nalloc = dgmres->vecs_allocated - it - VEC_OFFSET;
498: }
499: if (!nalloc) return(0);
501: dgmres->vv_allocated += nalloc;
503: KSPCreateVecs(ksp,nalloc,&dgmres->user_work[nwork],0,NULL);
504: PetscLogObjectParents(ksp,nalloc,dgmres->user_work[nwork]);
506: dgmres->mwork_alloc[nwork] = nalloc;
507: for (k=0; k<nalloc; k++) {
508: dgmres->vecs[it+VEC_OFFSET+k] = dgmres->user_work[nwork][k];
509: }
510: dgmres->nwork_alloc++;
511: return(0);
512: }
516: PetscErrorCode KSPBuildSolution_DGMRES(KSP ksp,Vec ptr,Vec *result)
517: {
518: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
522: if (!ptr) {
523: if (!dgmres->sol_temp) {
524: VecDuplicate(ksp->vec_sol,&dgmres->sol_temp);
525: PetscLogObjectParent((PetscObject)ksp,(PetscObject)dgmres->sol_temp);
526: }
527: ptr = dgmres->sol_temp;
528: }
529: if (!dgmres->nrs) {
530: /* allocate the work area */
531: PetscMalloc1(dgmres->max_k,&dgmres->nrs);
532: PetscLogObjectMemory((PetscObject)ksp,dgmres->max_k*sizeof(PetscScalar));
533: }
535: KSPDGMRESBuildSoln(dgmres->nrs,ksp->vec_sol,ptr,ksp,dgmres->it);
536: if (result) *result = ptr;
537: return(0);
538: }
542: PetscErrorCode KSPView_DGMRES(KSP ksp,PetscViewer viewer)
543: {
544: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
546: PetscBool iascii,isharmonic;
549: KSPView_GMRES(ksp,viewer);
550: PetscObjectTypeCompare((PetscObject) viewer,PETSCVIEWERASCII,&iascii);
551: if (iascii) {
552: if (dgmres->force) PetscViewerASCIIPrintf(viewer, " DGMRES: Adaptive strategy is used: FALSE\n");
553: else PetscViewerASCIIPrintf(viewer, " DGMRES: Adaptive strategy is used: TRUE\n");
554: PetscOptionsHasName(NULL, "-ksp_dgmres_harmonic_ritz", &isharmonic);
555: if (isharmonic) {
556: PetscViewerASCIIPrintf(viewer, " DGMRES: Frequency of extracted eigenvalues = %D using Harmonic Ritz values \n", dgmres->neig);
557: } else {
558: PetscViewerASCIIPrintf(viewer, " DGMRES: Frequency of extracted eigenvalues = %D using Ritz values \n", dgmres->neig);
559: }
560: PetscViewerASCIIPrintf(viewer, " DGMRES: Total number of extracted eigenvalues = %D\n", dgmres->r);
561: PetscViewerASCIIPrintf(viewer, " DGMRES: Maximum number of eigenvalues set to be extracted = %D\n", dgmres->max_neig);
562: PetscViewerASCIIPrintf(viewer, " DGMRES: relaxation parameter for the adaptive strategy(smv) = %g\n", dgmres->smv);
563: PetscViewerASCIIPrintf(viewer, " DGMRES: Number of matvecs : %D\n", dgmres->matvecs);
564: }
565: return(0);
566: }
568: /* New DGMRES functions */
572: static PetscErrorCode KSPDGMRESSetEigen_DGMRES(KSP ksp,PetscInt neig)
573: {
574: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
577: if (neig< 0 && neig >dgmres->max_k) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE,"The value of neig must be positive and less than the restart value ");
578: dgmres->neig=neig;
579: return(0);
580: }
584: static PetscErrorCode KSPDGMRESSetMaxEigen_DGMRES(KSP ksp,PetscInt max_neig)
585: {
586: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
589: if (max_neig < 0 && max_neig >dgmres->max_k) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE,"The value of max_neig must be positive and less than the restart value ");
590: dgmres->max_neig=max_neig;
591: return(0);
592: }
596: static PetscErrorCode KSPDGMRESSetRatio_DGMRES(KSP ksp,PetscReal ratio)
597: {
598: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
601: if (ratio <= 0) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE,"The relaxation parameter value must be positive");
602: dgmres->smv=ratio;
603: return(0);
604: }
608: static PetscErrorCode KSPDGMRESForce_DGMRES(KSP ksp,PetscBool force)
609: {
610: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
613: dgmres->force = force;
614: return(0);
615: }
617: extern PetscErrorCode KSPSetFromOptions_GMRES(PetscOptions *PetscOptionsObject,KSP);
621: PetscErrorCode KSPSetFromOptions_DGMRES(PetscOptions *PetscOptionsObject,KSP ksp)
622: {
624: PetscInt neig;
625: PetscInt max_neig;
626: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
627: PetscBool flg;
630: KSPSetFromOptions_GMRES(PetscOptionsObject,ksp);
631: PetscOptionsHead(PetscOptionsObject,"KSP DGMRES Options");
632: PetscOptionsInt("-ksp_dgmres_eigen","Number of smallest eigenvalues to extract at each restart","KSPDGMRESSetEigen",dgmres->neig, &neig, &flg);
633: if (flg) {
634: KSPDGMRESSetEigen(ksp, neig);
635: }
636: PetscOptionsInt("-ksp_dgmres_max_eigen","Maximum Number of smallest eigenvalues to extract ","KSPDGMRESSetMaxEigen",dgmres->max_neig, &max_neig, &flg);
637: if (flg) {
638: KSPDGMRESSetMaxEigen(ksp, max_neig);
639: }
640: PetscOptionsReal("-ksp_dgmres_ratio","Relaxation parameter for the smaller number of matrix-vectors product allowed","KSPDGMRESSetRatio",dgmres->smv,&dgmres->smv,NULL);
641: PetscOptionsBool("-ksp_dgmres_improve","Improve the computation of eigenvalues by solving a new generalized eigenvalue problem (experimental - not stable at this time)",NULL,dgmres->improve,&dgmres->improve,NULL);
642: PetscOptionsBool("-ksp_dgmres_force","Sets DGMRES always at restart active, i.e do not use the adaptive strategy","KSPDGMRESForce",dgmres->force,&dgmres->force,NULL);
643: PetscOptionsTail();
644: return(0);
645: }
649: static PetscErrorCode KSPDGMRESComputeDeflationData_DGMRES(KSP ksp, PetscInt *ExtrNeig)
650: {
651: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
653: PetscInt i,j, k;
654: PetscBLASInt nr, bmax;
655: PetscInt r = dgmres->r;
656: PetscInt neig; /* number of eigenvalues to extract at each restart */
657: PetscInt neig1 = dgmres->neig + EIG_OFFSET; /* max number of eig that can be extracted at each restart */
658: PetscInt max_neig = dgmres->max_neig; /* Max number of eigenvalues to extract during the iterative process */
659: PetscInt N = dgmres->max_k+1;
660: PetscInt n = dgmres->it+1;
661: PetscReal alpha;
664: PetscLogEventBegin(KSP_DGMRESComputeDeflationData, ksp, 0,0,0);
665: if (dgmres->neig == 0) return(0);
666: if (max_neig < (r+neig1) && !dgmres->improve) {
667: PetscLogEventEnd(KSP_DGMRESComputeDeflationData, ksp, 0,0,0);
668: return(0);
669: }
671: KSPDGMRESComputeSchurForm(ksp, &neig);
672: /* Form the extended Schur vectors X=VV*Sr */
673: if (!XX) {
674: VecDuplicateVecs(VEC_VV(0), neig1, &XX);
675: }
676: for (j = 0; j<neig; j++) {
677: VecZeroEntries(XX[j]);
678: VecMAXPY(XX[j], n, &SR[j*N], &VEC_VV(0));
679: }
681: /* Orthogonalize X against U */
682: if (!ORTH) {
683: PetscMalloc1(max_neig, &ORTH);
684: }
685: if (r > 0) {
686: /* modified Gram-Schmidt */
687: for (j = 0; j<neig; j++) {
688: for (i=0; i<r; i++) {
689: /* First, compute U'*X[j] */
690: VecDot(XX[j], UU[i], &alpha);
691: /* Then, compute X(j)=X(j)-U*U'*X(j) */
692: VecAXPY(XX[j], -alpha, UU[i]);
693: }
694: }
695: }
696: /* Compute MX = M^{-1}*A*X */
697: if (!MX) {
698: VecDuplicateVecs(VEC_VV(0), neig1, &MX);
699: }
700: for (j = 0; j<neig; j++) {
701: KSP_PCApplyBAorAB(ksp, XX[j], MX[j], VEC_TEMP_MATOP);
702: }
703: dgmres->matvecs += neig;
705: if ((r+neig1) > max_neig && dgmres->improve) { /* Improve the approximate eigenvectors in X by solving a new generalized eigenvalue -- Quite expensive to do this actually */
706: KSPDGMRESImproveEig(ksp, neig);
707: return(0); /* We return here since data for M have been improved in KSPDGMRESImproveEig()*/
708: }
710: /* Compute XMX = X'*M^{-1}*A*X -- size (neig, neig) */
711: if (!XMX) {
712: PetscMalloc1(neig1*neig1, &XMX);
713: }
714: for (j = 0; j < neig; j++) {
715: VecMDot(MX[j], neig, XX, &(XMX[j*neig1]));
716: }
718: if (r > 0) {
719: /* Compute UMX = U'*M^{-1}*A*X -- size (r, neig) */
720: if (!UMX) {
721: PetscMalloc1(max_neig*neig1, &UMX);
722: }
723: for (j = 0; j < neig; j++) {
724: VecMDot(MX[j], r, UU, &(UMX[j*max_neig]));
725: }
726: /* Compute XMU = X'*M^{-1}*A*U -- size(neig, r) */
727: if (!XMU) {
728: PetscMalloc1(max_neig*neig1, &XMU);
729: }
730: for (j = 0; j<r; j++) {
731: VecMDot(MU[j], neig, XX, &(XMU[j*neig1]));
732: }
733: }
735: /* Form the new matrix T = [T UMX; XMU XMX]; */
736: if (!TT) {
737: PetscMalloc1(max_neig*max_neig, &TT);
738: }
739: if (r > 0) {
740: /* Add XMU to T */
741: for (j = 0; j < r; j++) {
742: PetscMemcpy(&(TT[max_neig*j+r]), &(XMU[neig1*j]), neig*sizeof(PetscReal));
743: }
744: /* Add [UMX; XMX] to T */
745: for (j = 0; j < neig; j++) {
746: k = r+j;
747: PetscMemcpy(&(TT[max_neig*k]), &(UMX[max_neig*j]), r*sizeof(PetscReal));
748: PetscMemcpy(&(TT[max_neig*k + r]), &(XMX[neig1*j]), neig*sizeof(PetscReal));
749: }
750: } else { /* Add XMX to T */
751: for (j = 0; j < neig; j++) {
752: PetscMemcpy(&(TT[max_neig*j]), &(XMX[neig1*j]), neig*sizeof(PetscReal));
753: }
754: }
756: dgmres->r += neig;
757: r = dgmres->r;
758: PetscBLASIntCast(r,&nr);
759: /*LU Factorize T with Lapack xgetrf routine */
761: PetscBLASIntCast(max_neig,&bmax);
762: if (!TTF) {
763: PetscMalloc1(bmax*bmax, &TTF);
764: }
765: PetscMemcpy(TTF, TT, bmax*r*sizeof(PetscReal));
766: if (!INVP) {
767: PetscMalloc1(bmax, &INVP);
768: }
769: #if defined(PETSC_MISSING_LAPACK_GETRF)
770: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GETRF - Lapack routine is unavailable.");
771: #else
772: {
773: PetscBLASInt info;
774: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&nr, &nr, TTF, &bmax, INVP, &info));
775: if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGETRF INFO=%d",(int) info);
776: }
777: #endif
779: /* Save X in U and MX in MU for the next cycles and increase the size of the invariant subspace */
780: if (!UU) {
781: VecDuplicateVecs(VEC_VV(0), max_neig, &UU);
782: VecDuplicateVecs(VEC_VV(0), max_neig, &MU);
783: }
784: for (j=0; j<neig; j++) {
785: VecCopy(XX[j], UU[r-neig+j]);
786: VecCopy(MX[j], MU[r-neig+j]);
787: }
788: PetscLogEventEnd(KSP_DGMRESComputeDeflationData, ksp, 0,0,0);
789: return(0);
790: }
794: static PetscErrorCode KSPDGMRESComputeSchurForm_DGMRES(KSP ksp, PetscInt *neig)
795: {
796: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
798: PetscInt N = dgmres->max_k + 1, n=dgmres->it+1;
799: PetscBLASInt bn, bN;
800: PetscReal *A;
801: PetscBLASInt ihi;
802: PetscBLASInt ldA; /* leading dimension of A */
803: PetscBLASInt ldQ; /* leading dimension of Q */
804: PetscReal *Q; /* orthogonal matrix of (left) schur vectors */
805: PetscReal *work; /* working vector */
806: PetscBLASInt lwork; /* size of the working vector */
807: PetscInt *perm; /* Permutation vector to sort eigenvalues */
808: PetscInt i, j;
809: PetscBLASInt NbrEig; /* Number of eigenvalues really extracted */
810: PetscReal *wr, *wi, *modul; /* Real and imaginary part and modul of the eigenvalues of A*/
811: PetscBLASInt *select;
812: PetscBLASInt *iwork;
813: PetscBLASInt liwork;
814: PetscScalar *Ht; /* Transpose of the Hessenberg matrix */
815: PetscScalar *t; /* Store the result of the solution of H^T*t=h_{m+1,m}e_m */
816: PetscBLASInt *ipiv; /* Permutation vector to be used in LAPACK */
817: PetscBool flag; /* determine whether to use Ritz vectors or harmonic Ritz vectors */
820: PetscBLASIntCast(n,&bn);
821: PetscBLASIntCast(N,&bN);
822: ihi = ldQ = bn;
823: ldA = bN;
824: PetscBLASIntCast(5*N,&lwork);
826: #if defined(PETSC_USE_COMPLEX)
827: SETERRQ(PetscObjectComm((PetscObject)ksp), -1, "NO SUPPORT FOR COMPLEX VALUES AT THIS TIME");
828: #endif
830: PetscMalloc1(ldA*ldA, &A);
831: PetscMalloc1(ldQ*n, &Q);
832: PetscMalloc1(lwork, &work);
833: if (!dgmres->wr) {
834: PetscMalloc1(n, &dgmres->wr);
835: PetscMalloc1(n, &dgmres->wi);
836: }
837: wr = dgmres->wr;
838: wi = dgmres->wi;
839: PetscMalloc1(n,&modul);
840: PetscMalloc1(n,&perm);
841: /* copy the Hessenberg matrix to work space */
842: PetscMemcpy(A, dgmres->hes_origin, ldA*ldA*sizeof(PetscReal));
843: PetscOptionsHasName(NULL, "-ksp_dgmres_harmonic_ritz", &flag);
844: if (flag) {
845: /* Compute the matrix H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
846: /* Transpose the Hessenberg matrix */
847: PetscMalloc1(bn*bn, &Ht);
848: for (i = 0; i < bn; i++) {
849: for (j = 0; j < bn; j++) {
850: Ht[i * bn + j] = dgmres->hes_origin[j * ldA + i];
851: }
852: }
854: /* Solve the system H^T*t = h_{m+1,m}e_m */
855: PetscCalloc1(bn, &t);
856: t[bn-1] = dgmres->hes_origin[(bn -1) * ldA + bn]; /* Pick the last element H(m+1,m) */
857: PetscMalloc1(bn, &ipiv);
858: /* Call the LAPACK routine dgesv to solve the system Ht^-1 * t */
859: #if defined(PETSC_MISSING_LAPACK_GESV)
860: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GESV - Lapack routine is unavailable.");
861: #else
862: {
863: PetscBLASInt info;
864: PetscBLASInt nrhs = 1;
865: PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&bn, &nrhs, Ht, &bn, ipiv, t, &bn, &info));
866: if (info) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_PLIB, "Error while calling the Lapack routine DGESV");
867: }
868: #endif
869: /* Now form H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
870: for (i = 0; i < bn; i++) A[(bn-1)*bn+i] += t[i];
871: PetscFree(t);
872: PetscFree(Ht);
873: }
874: /* Compute eigenvalues with the Schur form */
875: #if defined(PETSC_MISSING_LAPACK_HSEQR)
876: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"HSEQR - Lapack routine is unavailable.");
877: #else
878: {
879: PetscBLASInt info;
880: PetscBLASInt ilo = 1;
881: PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S", "I", &bn, &ilo, &ihi, A, &ldA, wr, wi, Q, &ldQ, work, &lwork, &info));
882: if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XHSEQR %d",(int) info);
883: }
884: #endif
885: PetscFree(work);
887: /* sort the eigenvalues */
888: for (i=0; i<n; i++) modul[i] = PetscSqrtReal(wr[i]*wr[i]+wi[i]*wi[i]);
889: for (i=0; i<n; i++) perm[i] = i;
891: PetscSortRealWithPermutation(n, modul, perm);
892: /* save the complex modulus of the largest eigenvalue in magnitude */
893: if (dgmres->lambdaN < modul[perm[n-1]]) dgmres->lambdaN=modul[perm[n-1]];
894: /* count the number of extracted eigenvalues (with complex conjugates) */
895: NbrEig = 0;
896: while (NbrEig < dgmres->neig) {
897: if (wi[perm[NbrEig]] != 0) NbrEig += 2;
898: else NbrEig += 1;
899: }
900: /* Reorder the Schur decomposition so that the cluster of smallest eigenvalues appears in the leading diagonal blocks of A */
902: PetscCalloc1(n, &select);
904: if (!dgmres->GreatestEig) {
905: for (j = 0; j < NbrEig; j++) select[perm[j]] = 1;
906: } else {
907: for (j = 0; j < NbrEig; j++) select[perm[n-j-1]] = 1;
908: }
909: /* call Lapack dtrsen */
910: lwork = PetscMax(1, 4 * NbrEig *(bn-NbrEig));
911: liwork = PetscMax(1, 2 * NbrEig *(bn-NbrEig));
912: PetscMalloc1(lwork, &work);
913: PetscMalloc1(liwork, &iwork);
914: #if defined(PETSC_MISSING_LAPACK_TRSEN)
915: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"TRSEN - Lapack routine is unavailable.");
916: #else
917: {
918: PetscBLASInt info;
919: PetscReal CondEig; /* lower bound on the reciprocal condition number for the selected cluster of eigenvalues */
920: PetscReal CondSub; /* estimated reciprocal condition number of the specified invariant subspace. */
921: PetscStackCallBLAS("LAPACKtrsen",LAPACKtrsen_("B", "V", select, &bn, A, &ldA, Q, &ldQ, wr, wi, &NbrEig, &CondEig, &CondSub, work, &lwork, iwork, &liwork, &info));
922: if (info == 1) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "UNABLE TO REORDER THE EIGENVALUES WITH THE LAPACK ROUTINE : ILL-CONDITIONED PROBLEM");
923: }
924: #endif
925: PetscFree(select);
927: /* Extract the Schur vectors */
928: for (j = 0; j < NbrEig; j++) {
929: PetscMemcpy(&SR[j*N], &(Q[j*ldQ]), n*sizeof(PetscReal));
930: }
931: *neig = NbrEig;
932: PetscFree(A);
933: PetscFree(work);
934: PetscFree(perm);
935: PetscFree(work);
936: PetscFree(iwork);
937: PetscFree(modul);
938: PetscFree(Q);
939: return(0);
940: }
944: static PetscErrorCode KSPDGMRESApplyDeflation_DGMRES(KSP ksp, Vec x, Vec y)
945: {
946: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
947: PetscInt i, r = dgmres->r;
949: PetscReal alpha = 1.0;
950: PetscInt max_neig = dgmres->max_neig;
951: PetscBLASInt br,bmax;
952: PetscReal lambda = dgmres->lambdaN;
955: PetscBLASIntCast(r,&br);
956: PetscBLASIntCast(max_neig,&bmax);
957: PetscLogEventBegin(KSP_DGMRESApplyDeflation, ksp, 0, 0, 0);
958: if (!r) {
959: VecCopy(x,y);
960: return(0);
961: }
962: /* Compute U'*x */
963: if (!X1) {
964: PetscMalloc1(bmax, &X1);
965: PetscMalloc1(bmax, &X2);
966: }
967: VecMDot(x, r, UU, X1);
969: /* Solve T*X1=X2 for X1*/
970: PetscMemcpy(X2, X1, br*sizeof(PetscReal));
971: #if defined(PETSC_MISSING_LAPACK_GETRS)
972: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GETRS - Lapack routine is unavailable.");
973: #else
974: {
975: PetscBLASInt info;
976: PetscBLASInt nrhs = 1;
977: PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("N", &br, &nrhs, TTF, &bmax, INVP, X1, &bmax, &info));
978: if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGETRS %d", (int) info);
979: }
980: #endif
981: /* Iterative refinement -- is it really necessary ?? */
982: if (!WORK) {
983: PetscMalloc1(3*bmax, &WORK);
984: PetscMalloc1(bmax, &IWORK);
985: }
986: #if defined(PETSC_MISSING_LAPACK_GERFS)
987: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GERFS - Lapack routine is unavailable.");
988: #else
989: {
990: PetscBLASInt info;
991: PetscReal berr, ferr;
992: PetscBLASInt nrhs = 1;
993: PetscStackCallBLAS("LAPACKgerfs",LAPACKgerfs_("N", &br, &nrhs, TT, &bmax, TTF, &bmax, INVP, X2, &bmax,X1, &bmax, &ferr, &berr, WORK, IWORK, &info));
994: if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGERFS %d", (int) info);
995: }
996: #endif
998: for (i = 0; i < r; i++) X2[i] = X1[i]/lambda - X2[i];
1000: /* Compute X2=U*X2 */
1001: VecZeroEntries(y);
1002: VecMAXPY(y, r, X2, UU);
1003: VecAXPY(y, alpha, x);
1005: PetscLogEventEnd(KSP_DGMRESApplyDeflation, ksp, 0, 0, 0);
1006: return(0);
1007: }
1011: static PetscErrorCode KSPDGMRESImproveEig_DGMRES(KSP ksp, PetscInt neig)
1012: {
1013: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
1014: PetscInt j,r_old, r = dgmres->r;
1015: PetscBLASInt i = 0;
1016: PetscInt neig1 = dgmres->neig + EIG_OFFSET;
1017: PetscInt bmax = dgmres->max_neig;
1018: PetscInt aug = r + neig; /* actual size of the augmented invariant basis */
1019: PetscInt aug1 = bmax+neig1; /* maximum size of the augmented invariant basis */
1020: PetscBLASInt ldA; /* leading dimension of AUAU and AUU*/
1021: PetscBLASInt N; /* size of AUAU */
1022: PetscReal *Q; /* orthogonal matrix of (left) schur vectors */
1023: PetscReal *Z; /* orthogonal matrix of (right) schur vectors */
1024: PetscReal *work; /* working vector */
1025: PetscBLASInt lwork; /* size of the working vector */
1026: PetscInt *perm; /* Permutation vector to sort eigenvalues */
1027: PetscReal *wr, *wi, *beta, *modul; /* Real and imaginary part and modul of the eigenvalues of A*/
1028: PetscInt ierr;
1029: PetscBLASInt NbrEig = 0,nr,bm;
1030: PetscBLASInt *select;
1031: PetscBLASInt liwork, *iwork;
1034: /* Block construction of the matrices AUU=(AU)'*U and (AU)'*AU*/
1035: if (!AUU) {
1036: PetscMalloc1(aug1*aug1, &AUU);
1037: PetscMalloc1(aug1*aug1, &AUAU);
1038: }
1039: /* AUU = (AU)'*U = [(MU)'*U (MU)'*X; (MX)'*U (MX)'*X]
1040: * Note that MU and MX have been computed previously either in ComputeDataDeflation() or down here in a previous call to this function */
1041: /* (MU)'*U size (r x r) -- store in the <r> first columns of AUU*/
1042: for (j=0; j < r; j++) {
1043: VecMDot(UU[j], r, MU, &AUU[j*aug1]);
1044: }
1045: /* (MU)'*X size (r x neig) -- store in AUU from the column <r>*/
1046: for (j = 0; j < neig; j++) {
1047: VecMDot(XX[j], r, MU, &AUU[(r+j) *aug1]);
1048: }
1049: /* (MX)'*U size (neig x r) -- store in the <r> first columns of AUU from the row <r>*/
1050: for (j = 0; j < r; j++) {
1051: VecMDot(UU[j], neig, MX, &AUU[j*aug1+r]);
1052: }
1053: /* (MX)'*X size (neig neig) -- store in AUU from the column <r> and the row <r>*/
1054: for (j = 0; j < neig; j++) {
1055: VecMDot(XX[j], neig, MX, &AUU[(r+j) *aug1 + r]);
1056: }
1058: /* AUAU = (AU)'*AU = [(MU)'*MU (MU)'*MX; (MX)'*MU (MX)'*MX] */
1059: /* (MU)'*MU size (r x r) -- store in the <r> first columns of AUAU*/
1060: for (j=0; j < r; j++) {
1061: VecMDot(MU[j], r, MU, &AUAU[j*aug1]);
1062: }
1063: /* (MU)'*MX size (r x neig) -- store in AUAU from the column <r>*/
1064: for (j = 0; j < neig; j++) {
1065: VecMDot(MX[j], r, MU, &AUAU[(r+j) *aug1]);
1066: }
1067: /* (MX)'*MU size (neig x r) -- store in the <r> first columns of AUAU from the row <r>*/
1068: for (j = 0; j < r; j++) {
1069: VecMDot(MU[j], neig, MX, &AUAU[j*aug1+r]);
1070: }
1071: /* (MX)'*MX size (neig neig) -- store in AUAU from the column <r> and the row <r>*/
1072: for (j = 0; j < neig; j++) {
1073: VecMDot(MX[j], neig, MX, &AUAU[(r+j) *aug1 + r]);
1074: }
1076: /* Computation of the eigenvectors */
1077: PetscBLASIntCast(aug1,&ldA);
1078: PetscBLASIntCast(aug,&N);
1079: lwork = 8 * N + 20; /* sizeof the working space */
1080: PetscMalloc1(N, &wr);
1081: PetscMalloc1(N, &wi);
1082: PetscMalloc1(N, &beta);
1083: PetscMalloc1(N, &modul);
1084: PetscMalloc1(N, &perm);
1085: PetscMalloc1(N*N, &Q);
1086: PetscMalloc1(N*N, &Z);
1087: PetscMalloc1(lwork, &work);
1088: #if defined(PETSC_MISSING_LAPACK_GGES)
1089: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GGES - Lapack routine is unavailable.");
1090: #else
1091: {
1092: PetscBLASInt info;
1093: PetscStackCallBLAS("LAPACKgges",LAPACKgges_("V", "V", "N", NULL, &N, AUAU, &ldA, AUU, &ldA, &i, wr, wi, beta, Q, &N, Z, &N, work, &lwork, NULL, &info));
1094: if (info) SETERRQ1 (PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGGES %d", (int) info);
1095: }
1096: #endif
1097: for (i=0; i<N; i++) {
1098: if (beta[i] !=0.0) {
1099: wr[i] /=beta[i];
1100: wi[i] /=beta[i];
1101: }
1102: }
1103: /* sort the eigenvalues */
1104: for (i=0; i<N; i++) modul[i]=PetscSqrtReal(wr[i]*wr[i]+wi[i]*wi[i]);
1105: for (i=0; i<N; i++) perm[i] = i;
1106: PetscSortRealWithPermutation(N, modul, perm);
1107: /* Save the norm of the largest eigenvalue */
1108: if (dgmres->lambdaN < modul[perm[N-1]]) dgmres->lambdaN = modul[perm[N-1]];
1109: /* Allocate space to extract the first r schur vectors */
1110: if (!SR2) {
1111: PetscMalloc1(aug1*bmax, &SR2);
1112: }
1113: /* count the number of extracted eigenvalues (complex conjugates count as 2) */
1114: while (NbrEig < bmax) {
1115: if (wi[perm[NbrEig]] == 0) NbrEig += 1;
1116: else NbrEig += 2;
1117: }
1118: if (NbrEig > bmax) NbrEig = bmax - 1;
1119: r_old = r; /* previous size of r */
1120: dgmres->r = r = NbrEig;
1122: /* Select the eigenvalues to reorder */
1123: PetscCalloc1(N, &select);
1124: if (!dgmres->GreatestEig) {
1125: for (j = 0; j < NbrEig; j++) select[perm[j]] = 1;
1126: } else {
1127: for (j = 0; j < NbrEig; j++) select[perm[N-j-1]] = 1;
1128: }
1129: /* Reorder and extract the new <r> schur vectors */
1130: lwork = PetscMax(4 * N + 16, 2 * NbrEig *(N - NbrEig));
1131: liwork = PetscMax(N + 6, 2 * NbrEig *(N - NbrEig));
1132: PetscFree(work);
1133: PetscMalloc1(lwork, &work);
1134: PetscMalloc1(liwork, &iwork);
1135: #if defined(PETSC_MISSING_LAPACK_TGSEN)
1136: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"TGSEN - Lapack routine is unavailable.");
1137: #else
1138: {
1139: PetscBLASInt info;
1140: PetscReal Dif[2];
1141: PetscBLASInt ijob = 2;
1142: PetscBLASInt wantQ = 1, wantZ = 1;
1143: PetscStackCallBLAS("LAPACKtgsen",LAPACKtgsen_(&ijob, &wantQ, &wantZ, select, &N, AUAU, &ldA, AUU, &ldA, wr, wi, beta, Q, &N, Z, &N, &NbrEig, NULL, NULL, &(Dif[0]), work, &lwork, iwork, &liwork, &info));
1144: if (info == 1) SETERRQ(PetscObjectComm((PetscObject)ksp), -1, "UNABLE TO REORDER THE EIGENVALUES WITH THE LAPACK ROUTINE : ILL-CONDITIONED PROBLEM");
1145: }
1146: #endif
1147: PetscFree(select);
1149: for (j=0; j<r; j++) {
1150: PetscMemcpy(&SR2[j*aug1], &(Z[j*N]), N*sizeof(PetscReal));
1151: }
1153: /* Multiply the Schur vectors SR2 by U (and X) to get a new U
1154: -- save it temporarily in MU */
1155: for (j = 0; j < r; j++) {
1156: VecZeroEntries(MU[j]);
1157: VecMAXPY(MU[j], r_old, &SR2[j*aug1], UU);
1158: VecMAXPY(MU[j], neig, &SR2[j*aug1+r_old], XX);
1159: }
1160: /* Form T = U'*MU*U */
1161: for (j = 0; j < r; j++) {
1162: VecCopy(MU[j], UU[j]);
1163: KSP_PCApplyBAorAB(ksp, UU[j], MU[j], VEC_TEMP_MATOP);
1164: }
1165: dgmres->matvecs += r;
1166: for (j = 0; j < r; j++) {
1167: VecMDot(MU[j], r, UU, &TT[j*bmax]);
1168: }
1169: /* Factorize T */
1170: PetscMemcpy(TTF, TT, bmax*r*sizeof(PetscReal));
1171: PetscBLASIntCast(r,&nr);
1172: PetscBLASIntCast(bmax,&bm);
1173: #if defined(PETSC_MISSING_LAPACK_GETRF)
1174: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GETRF - Lapack routine is unavailable.");
1175: #else
1176: {
1177: PetscBLASInt info;
1178: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&nr, &nr, TTF, &bm, INVP, &info));
1179: if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGETRF INFO=%d",(int) info);
1180: }
1181: #endif
1182: /* Free Memory */
1183: PetscFree(wr);
1184: PetscFree(wi);
1185: PetscFree(beta);
1186: PetscFree(modul);
1187: PetscFree(perm);
1188: PetscFree(Q);
1189: PetscFree(Z);
1190: PetscFree(work);
1191: PetscFree(iwork);
1192: return(0);
1193: }
1195: /* end new DGMRES functions */
1197: /*MC
1198: KSPDGMRES - Implements the deflated GMRES as defined in [1,2].
1199: In this implementation, the adaptive strategy allows to switch to the deflated GMRES when the
1200: stagnation occurs.
1202: Options Database Keys:
1203: GMRES Options (inherited):
1204: + -ksp_gmres_restart <restart> - the number of Krylov directions to orthogonalize against
1205: . -ksp_gmres_haptol <tol> - sets the tolerance for "happy ending" (exact convergence)
1206: . -ksp_gmres_preallocate - preallocate all the Krylov search directions initially (otherwise groups of
1207: vectors are allocated as needed)
1208: . -ksp_gmres_classicalgramschmidt - use classical (unmodified) Gram-Schmidt to orthogonalize against the Krylov space (fast) (the default)
1209: . -ksp_gmres_modifiedgramschmidt - use modified Gram-Schmidt in the orthogonalization (more stable, but slower)
1210: . -ksp_gmres_cgs_refinement_type <never,ifneeded,always> - determine if iterative refinement is used to increase the
1211: stability of the classical Gram-Schmidt orthogonalization.
1212: - -ksp_gmres_krylov_monitor - plot the Krylov space generated
1214: DGMRES Options Database Keys:
1215: + -ksp_dgmres_eigen <neig> - number of smallest eigenvalues to extract at each restart
1216: . -ksp_dgmres_max_eigen <max_neig> - maximum number of eigenvalues that can be extracted during the iterative
1217: process
1218: . -ksp_dgmres_force - use the deflation at each restart; switch off the adaptive strategy.
1219: - -ksp_dgmres_view_deflation_vecs <viewerspec> - View the deflation vectors, where viewerspec is a key that can be
1220: parsed by PetscOptionsGetViewer(). If neig > 1, viewerspec should
1221: end with ":append". No vectors will be viewed if the adaptive
1222: strategy chooses not to deflate, so -ksp_dgmres_force should also
1223: be given.
1224: The deflation vectors span a subspace that may be a good
1225: approximation of the subspace of smallest eigenvectors of the
1226: preconditioned operator, so this option can aid in understanding
1227: the performance of a preconditioner.
1229: Level: beginner
1231: Notes: Left and right preconditioning are supported, but not symmetric preconditioning. Complex arithmetic is not yet supported
1233: References:
1235: [1]Restarted GMRES preconditioned by deflation,J. Computational and Applied Mathematics, 69(1996), 303-318.
1236: [2]On the performance of various adaptive preconditioned GMRES strategies, 5(1998), 101-121.
1237: [3] D. NUENTSA WAKAM and F. PACULL, Memory Efficient Hybrid Algebraic Solvers for Linear Systems Arising from Compressible Flows, Computers and Fluids, In Press, http://dx.doi.org/10.1016/j.compfluid.2012.03.023
1239: Contributed by: Desire NUENTSA WAKAM,INRIA
1241: .seealso: KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPFGMRES, KSPLGMRES,
1242: KSPGMRESSetRestart(), KSPGMRESSetHapTol(), KSPGMRESSetPreAllocateVectors(), KSPGMRESSetOrthogonalization(), KSPGMRESGetOrthogonalization(),
1243: KSPGMRESClassicalGramSchmidtOrthogonalization(), KSPGMRESModifiedGramSchmidtOrthogonalization(),
1244: KSPGMRESCGSRefinementType, KSPGMRESSetCGSRefinementType(), KSPGMRESGetCGSRefinementType(), KSPGMRESMonitorKrylov(), KSPSetPCSide()
1246: M*/
1250: PETSC_EXTERN PetscErrorCode KSPCreate_DGMRES(KSP ksp)
1251: {
1252: KSP_DGMRES *dgmres;
1256: PetscNewLog(ksp,&dgmres);
1257: ksp->data = (void*) dgmres;
1259: KSPSetSupportedNorm(ksp,KSP_NORM_PRECONDITIONED,PC_LEFT,3);
1260: KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_RIGHT,2);
1262: ksp->ops->buildsolution = KSPBuildSolution_DGMRES;
1263: ksp->ops->setup = KSPSetUp_DGMRES;
1264: ksp->ops->solve = KSPSolve_DGMRES;
1265: ksp->ops->destroy = KSPDestroy_DGMRES;
1266: ksp->ops->view = KSPView_DGMRES;
1267: ksp->ops->setfromoptions = KSPSetFromOptions_DGMRES;
1268: ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES;
1269: ksp->ops->computeeigenvalues = KSPComputeEigenvalues_GMRES;
1271: PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetPreAllocateVectors_C",KSPGMRESSetPreAllocateVectors_GMRES);
1272: PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetOrthogonalization_C",KSPGMRESSetOrthogonalization_GMRES);
1273: PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetRestart_C",KSPGMRESSetRestart_GMRES);
1274: PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetHapTol_C",KSPGMRESSetHapTol_GMRES);
1275: PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetCGSRefinementType_C",KSPGMRESSetCGSRefinementType_GMRES);
1276: /* -- New functions defined in DGMRES -- */
1277: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetEigen_C",KSPDGMRESSetEigen_DGMRES);
1278: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetMaxEigen_C",KSPDGMRESSetMaxEigen_DGMRES);
1279: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetRatio_C",KSPDGMRESSetRatio_DGMRES);
1280: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESForce_C",KSPDGMRESForce_DGMRES);
1281: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeSchurForm_C",KSPDGMRESComputeSchurForm_DGMRES);
1282: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeDeflationData_C",KSPDGMRESComputeDeflationData_DGMRES);
1283: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESApplyDeflation_C",KSPDGMRESApplyDeflation_DGMRES);
1284: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESImproveEig_C", KSPDGMRESImproveEig_DGMRES);
1286: PetscLogEventRegister("DGMRESComputeDefl", KSP_CLASSID, &KSP_DGMRESComputeDeflationData);
1287: PetscLogEventRegister("DGMRESApplyDefl", KSP_CLASSID, &KSP_DGMRESApplyDeflation);
1289: dgmres->haptol = 1.0e-30;
1290: dgmres->q_preallocate = 0;
1291: dgmres->delta_allocate = GMRES_DELTA_DIRECTIONS;
1292: dgmres->orthog = KSPGMRESClassicalGramSchmidtOrthogonalization;
1293: dgmres->nrs = 0;
1294: dgmres->sol_temp = 0;
1295: dgmres->max_k = GMRES_DEFAULT_MAXK;
1296: dgmres->Rsvd = 0;
1297: dgmres->cgstype = KSP_GMRES_CGS_REFINE_NEVER;
1298: dgmres->orthogwork = 0;
1300: /* Default values for the deflation */
1301: dgmres->r = 0;
1302: dgmres->neig = DGMRES_DEFAULT_EIG;
1303: dgmres->max_neig = DGMRES_DEFAULT_MAXEIG-1;
1304: dgmres->lambdaN = 0.0;
1305: dgmres->smv = SMV;
1306: dgmres->matvecs = 0;
1307: dgmres->GreatestEig = PETSC_FALSE; /* experimental */
1308: dgmres->HasSchur = PETSC_FALSE;
1309: return(0);
1310: }