Actual source code: dgmres.c
petsc-3.10.5 2019-03-28
1: /*
2: This file implements the deflated GMRES.
4: */
6: #include <../src/ksp/ksp/impls/gmres/dgmres/dgmresimpl.h>
8: PetscLogEvent KSP_DGMRESComputeDeflationData, KSP_DGMRESApplyDeflation;
10: #define GMRES_DELTA_DIRECTIONS 10
11: #define GMRES_DEFAULT_MAXK 30
12: static PetscErrorCode KSPDGMRESGetNewVectors(KSP,PetscInt);
13: static PetscErrorCode KSPDGMRESUpdateHessenberg(KSP,PetscInt,PetscBool,PetscReal*);
14: static PetscErrorCode KSPDGMRESBuildSoln(PetscScalar*,Vec,Vec,KSP,PetscInt);
16: PetscErrorCode KSPDGMRESSetEigen(KSP ksp,PetscInt nb_eig)
17: {
21: PetscTryMethod((ksp),"KSPDGMRESSetEigen_C",(KSP,PetscInt),(ksp,nb_eig));
22: return(0);
23: }
24: PetscErrorCode KSPDGMRESSetMaxEigen(KSP ksp,PetscInt max_neig)
25: {
29: PetscTryMethod((ksp),"KSPDGMRESSetMaxEigen_C",(KSP,PetscInt),(ksp,max_neig));
30: return(0);
31: }
32: PetscErrorCode KSPDGMRESForce(KSP ksp,PetscBool force)
33: {
37: PetscTryMethod((ksp),"KSPDGMRESForce_C",(KSP,PetscBool),(ksp,force));
38: return(0);
39: }
40: PetscErrorCode KSPDGMRESSetRatio(KSP ksp,PetscReal ratio)
41: {
45: PetscTryMethod((ksp),"KSPDGMRESSetRatio_C",(KSP,PetscReal),(ksp,ratio));
46: return(0);
47: }
48: PetscErrorCode KSPDGMRESComputeSchurForm(KSP ksp,PetscInt *neig)
49: {
53: PetscUseMethod((ksp),"KSPDGMRESComputeSchurForm_C",(KSP, PetscInt*),(ksp, neig));
54: return(0);
55: }
56: PetscErrorCode KSPDGMRESComputeDeflationData(KSP ksp,PetscInt *curneigh)
57: {
61: PetscUseMethod((ksp),"KSPDGMRESComputeDeflationData_C",(KSP,PetscInt*),(ksp,curneigh));
62: return(0);
63: }
64: PetscErrorCode KSPDGMRESApplyDeflation(KSP ksp, Vec x, Vec y)
65: {
69: PetscUseMethod((ksp),"KSPDGMRESApplyDeflation_C",(KSP, Vec, Vec),(ksp, x, y));
70: return(0);
71: }
73: PetscErrorCode KSPDGMRESImproveEig(KSP ksp, PetscInt neig)
74: {
78: PetscUseMethod((ksp), "KSPDGMRESImproveEig_C",(KSP, PetscInt),(ksp, neig));
79: return(0);
80: }
82: PetscErrorCode KSPSetUp_DGMRES(KSP ksp)
83: {
85: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
86: PetscInt neig = dgmres->neig+EIG_OFFSET;
87: PetscInt max_k = dgmres->max_k+1;
90: KSPSetUp_GMRES(ksp);
91: if (!dgmres->neig) return(0);
93: /* Allocate workspace for the Schur vectors*/
94: PetscMalloc1(neig*max_k, &SR);
95: dgmres->wr = NULL;
96: dgmres->wi = NULL;
97: dgmres->perm = NULL;
98: dgmres->modul = NULL;
99: dgmres->Q = NULL;
100: dgmres->Z = NULL;
102: UU = NULL;
103: XX = NULL;
104: MX = NULL;
105: AUU = NULL;
106: XMX = NULL;
107: XMU = NULL;
108: UMX = NULL;
109: AUAU = NULL;
110: TT = NULL;
111: TTF = NULL;
112: INVP = NULL;
113: X1 = NULL;
114: X2 = NULL;
115: MU = NULL;
116: return(0);
117: }
119: /*
120: Run GMRES, possibly with restart. Return residual history if requested.
121: input parameters:
123: . gmres - structure containing parameters and work areas
125: output parameters:
126: . nres - residuals (from preconditioned system) at each step.
127: If restarting, consider passing nres+it. If null,
128: ignored
129: . itcount - number of iterations used. nres[0] to nres[itcount]
130: are defined. If null, ignored.
132: Notes:
133: On entry, the value in vector VEC_VV(0) should be the initial residual
134: (this allows shortcuts where the initial preconditioned residual is 0).
135: */
136: PetscErrorCode KSPDGMRESCycle(PetscInt *itcount,KSP ksp)
137: {
138: KSP_DGMRES *dgmres = (KSP_DGMRES*)(ksp->data);
139: PetscReal res_norm,res,hapbnd,tt;
141: PetscInt it = 0;
142: PetscInt max_k = dgmres->max_k;
143: PetscBool hapend = PETSC_FALSE;
144: PetscReal res_old;
145: PetscInt test = 0;
148: VecNormalize(VEC_VV(0),&res_norm);
149: KSPCheckNorm(ksp,res_norm);
150: res = res_norm;
151: *GRS(0) = res_norm;
153: /* check for the convergence */
154: PetscObjectSAWsTakeAccess((PetscObject)ksp);
155: ksp->rnorm = res;
156: PetscObjectSAWsGrantAccess((PetscObject)ksp);
157: dgmres->it = (it - 1);
158: KSPLogResidualHistory(ksp,res);
159: KSPMonitor(ksp,ksp->its,res);
160: if (!res) {
161: if (itcount) *itcount = 0;
162: ksp->reason = KSP_CONVERGED_ATOL;
163: PetscInfo(ksp,"Converged due to zero residual norm on entry\n");
164: return(0);
165: }
166: /* record the residual norm to test if deflation is needed */
167: res_old = res;
169: (*ksp->converged)(ksp,ksp->its,res,&ksp->reason,ksp->cnvP);
170: while (!ksp->reason && it < max_k && ksp->its < ksp->max_it) {
171: if (it) {
172: KSPLogResidualHistory(ksp,res);
173: KSPMonitor(ksp,ksp->its,res);
174: }
175: dgmres->it = (it - 1);
176: if (dgmres->vv_allocated <= it + VEC_OFFSET + 1) {
177: KSPDGMRESGetNewVectors(ksp,it+1);
178: }
179: if (dgmres->r > 0) {
180: if (ksp->pc_side == PC_LEFT) {
181: /* Apply the first preconditioner */
182: KSP_PCApplyBAorAB(ksp,VEC_VV(it), VEC_TEMP,VEC_TEMP_MATOP);
183: /* Then apply Deflation as a preconditioner */
184: KSPDGMRESApplyDeflation(ksp, VEC_TEMP, VEC_VV(1+it));
185: } else if (ksp->pc_side == PC_RIGHT) {
186: KSPDGMRESApplyDeflation(ksp, VEC_VV(it), VEC_TEMP);
187: KSP_PCApplyBAorAB(ksp, VEC_TEMP, VEC_VV(1+it), VEC_TEMP_MATOP);
188: }
189: } else {
190: KSP_PCApplyBAorAB(ksp,VEC_VV(it),VEC_VV(1+it),VEC_TEMP_MATOP);
191: }
192: dgmres->matvecs += 1;
193: /* update hessenberg matrix and do Gram-Schmidt */
194: (*dgmres->orthog)(ksp,it);
196: /* vv(i+1) . vv(i+1) */
197: VecNormalize(VEC_VV(it+1),&tt);
198: /* save the magnitude */
199: *HH(it+1,it) = tt;
200: *HES(it+1,it) = tt;
202: /* check for the happy breakdown */
203: hapbnd = PetscAbsScalar(tt / *GRS(it));
204: if (hapbnd > dgmres->haptol) hapbnd = dgmres->haptol;
205: if (tt < hapbnd) {
206: PetscInfo2(ksp,"Detected happy breakdown, current hapbnd = %g tt = %g\n",(double)hapbnd,(double)tt);
207: hapend = PETSC_TRUE;
208: }
209: KSPDGMRESUpdateHessenberg(ksp,it,hapend,&res);
211: it++;
212: dgmres->it = (it-1); /* For converged */
213: ksp->its++;
214: ksp->rnorm = res;
215: if (ksp->reason) break;
217: (*ksp->converged)(ksp,ksp->its,res,&ksp->reason,ksp->cnvP);
219: /* Catch error in happy breakdown and signal convergence and break from loop */
220: if (hapend) {
221: if (!ksp->reason) {
222: if (ksp->errorifnotconverged) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"You reached the happy break down, but convergence was not indicated. Residual norm = %g",(double)res);
223: else {
224: ksp->reason = KSP_DIVERGED_BREAKDOWN;
225: break;
226: }
227: }
228: }
229: }
231: /* Monitor if we know that we will not return for a restart */
232: if (it && (ksp->reason || ksp->its >= ksp->max_it)) {
233: KSPLogResidualHistory(ksp,res);
234: KSPMonitor(ksp,ksp->its,res);
235: }
236: if (itcount) *itcount = it;
238: /*
239: Down here we have to solve for the "best" coefficients of the Krylov
240: columns, add the solution values together, and possibly unwind the
241: preconditioning from the solution
242: */
243: /* Form the solution (or the solution so far) */
244: KSPDGMRESBuildSoln(GRS(0),ksp->vec_sol,ksp->vec_sol,ksp,it-1);
246: /* Compute data for the deflation to be used during the next restart */
247: if (!ksp->reason && ksp->its < ksp->max_it) {
248: test = max_k *PetscLogReal(ksp->rtol/res) /PetscLogReal(res/res_old);
249: /* Compute data for the deflation if the residual rtol will not be reached in the remaining number of steps allowed */
250: if ((test > dgmres->smv*(ksp->max_it-ksp->its)) || dgmres->force) {
251: KSPDGMRESComputeDeflationData(ksp,NULL);
252: }
253: }
254: return(0);
255: }
257: PetscErrorCode KSPSolve_DGMRES(KSP ksp)
258: {
260: PetscInt i,its,itcount;
261: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
262: PetscBool guess_zero = ksp->guess_zero;
265: if (ksp->calc_sings && !dgmres->Rsvd) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ORDER,"Must call KSPSetComputeSingularValues() before KSPSetUp() is called");
267: PetscObjectSAWsTakeAccess((PetscObject)ksp);
268: ksp->its = 0;
269: dgmres->matvecs = 0;
270: PetscObjectSAWsGrantAccess((PetscObject)ksp);
272: itcount = 0;
273: ksp->reason = KSP_CONVERGED_ITERATING;
274: while (!ksp->reason) {
275: KSPInitialResidual(ksp,ksp->vec_sol,VEC_TEMP,VEC_TEMP_MATOP,VEC_VV(0),ksp->vec_rhs);
276: if (ksp->pc_side == PC_LEFT) {
277: dgmres->matvecs += 1;
278: if (dgmres->r > 0) {
279: KSPDGMRESApplyDeflation(ksp, VEC_VV(0), VEC_TEMP);
280: VecCopy(VEC_TEMP, VEC_VV(0));
281: }
282: }
284: KSPDGMRESCycle(&its,ksp);
285: itcount += its;
286: if (itcount >= ksp->max_it) {
287: if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
288: break;
289: }
290: ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */
291: }
292: ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */
294: for (i = 0; i < dgmres->r; i++) {
295: VecViewFromOptions(UU[i],(PetscObject)ksp,"-ksp_dgmres_view_deflation_vecs");
296: }
297: return(0);
298: }
300: PetscErrorCode KSPDestroy_DGMRES(KSP ksp)
301: {
303: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
304: PetscInt neig1 = dgmres->neig+EIG_OFFSET;
305: PetscInt max_neig = dgmres->max_neig;
308: if (dgmres->r) {
309: VecDestroyVecs(max_neig, &UU);
310: VecDestroyVecs(max_neig, &MU);
311: if (XX) {
312: VecDestroyVecs(neig1, &XX);
313: VecDestroyVecs(neig1, &MX);
314: }
316: PetscFree(TT);
317: PetscFree(TTF);
318: PetscFree(INVP);
320: PetscFree(XMX);
321: PetscFree(UMX);
322: PetscFree(XMU);
323: PetscFree(X1);
324: PetscFree(X2);
325: PetscFree(dgmres->work);
326: PetscFree(dgmres->iwork);
327: PetscFree(dgmres->wr);
328: PetscFree(dgmres->wi);
329: PetscFree(dgmres->modul);
330: PetscFree(dgmres->Q);
331: PetscFree(ORTH);
332: PetscFree(AUAU);
333: PetscFree(AUU);
334: PetscFree(SR2);
335: }
336: PetscFree(SR);
337: KSPDestroy_GMRES(ksp);
338: return(0);
339: }
340: /*
341: KSPDGMRESBuildSoln - create the solution from the starting vector and the
342: current iterates.
344: Input parameters:
345: nrs - work area of size it + 1.
346: vs - index of initial guess
347: vdest - index of result. Note that vs may == vdest (replace
348: guess with the solution).
350: This is an internal routine that knows about the GMRES internals.
351: */
352: static PetscErrorCode KSPDGMRESBuildSoln(PetscScalar *nrs,Vec vs,Vec vdest,KSP ksp,PetscInt it)
353: {
354: PetscScalar tt;
356: PetscInt ii,k,j;
357: KSP_DGMRES *dgmres = (KSP_DGMRES*) (ksp->data);
359: /* Solve for solution vector that minimizes the residual */
362: /* If it is < 0, no gmres steps have been performed */
363: if (it < 0) {
364: VecCopy(vs,vdest); /* VecCopy() is smart, exists immediately if vguess == vdest */
365: return(0);
366: }
367: if (*HH(it,it) == 0.0) SETERRQ2(PetscObjectComm((PetscObject)ksp), PETSC_ERR_CONV_FAILED,"Likely your matrix is the zero operator. HH(it,it) is identically zero; it = %D GRS(it) = %g",it,(double)PetscAbsScalar(*GRS(it)));
368: if (*HH(it,it) != 0.0) nrs[it] = *GRS(it) / *HH(it,it);
369: else nrs[it] = 0.0;
371: for (ii=1; ii<=it; ii++) {
372: k = it - ii;
373: tt = *GRS(k);
374: for (j=k+1; j<=it; j++) tt = tt - *HH(k,j) * nrs[j];
375: if (*HH(k,k) == 0.0) SETERRQ2(PetscObjectComm((PetscObject)ksp), PETSC_ERR_CONV_FAILED,"Likely your matrix is singular. HH(k,k) is identically zero; it = %D k = %D",it,k);
376: nrs[k] = tt / *HH(k,k);
377: }
379: /* Accumulate the correction to the solution of the preconditioned problem in TEMP */
380: VecSet(VEC_TEMP,0.0);
381: VecMAXPY(VEC_TEMP,it+1,nrs,&VEC_VV(0));
383: /* Apply deflation */
384: if (ksp->pc_side==PC_RIGHT && dgmres->r > 0) {
385: KSPDGMRESApplyDeflation(ksp, VEC_TEMP, VEC_TEMP_MATOP);
386: VecCopy(VEC_TEMP_MATOP, VEC_TEMP);
387: }
388: KSPUnwindPreconditioner(ksp,VEC_TEMP,VEC_TEMP_MATOP);
390: /* add solution to previous solution */
391: if (vdest != vs) {
392: VecCopy(vs,vdest);
393: }
394: VecAXPY(vdest,1.0,VEC_TEMP);
395: return(0);
396: }
397: /*
398: Do the scalar work for the orthogonalization. Return new residual norm.
399: */
400: static PetscErrorCode KSPDGMRESUpdateHessenberg(KSP ksp,PetscInt it,PetscBool hapend,PetscReal *res)
401: {
402: PetscScalar *hh,*cc,*ss,tt;
403: PetscInt j;
404: KSP_DGMRES *dgmres = (KSP_DGMRES*) (ksp->data);
407: hh = HH(0,it);
408: cc = CC(0);
409: ss = SS(0);
411: /* Apply all the previously computed plane rotations to the new column
412: of the Hessenberg matrix */
413: for (j=1; j<=it; j++) {
414: tt = *hh;
415: *hh = PetscConj(*cc) * tt + *ss * *(hh+1);
416: hh++;
417: *hh = *cc++ * *hh -(*ss++ * tt);
418: }
420: /*
421: compute the new plane rotation, and apply it to:
422: 1) the right-hand-side of the Hessenberg system
423: 2) the new column of the Hessenberg matrix
424: thus obtaining the updated value of the residual
425: */
426: if (!hapend) {
427: tt = PetscSqrtScalar(PetscConj(*hh) * *hh + PetscConj(*(hh+1)) * *(hh+1));
428: if (tt == 0.0) {
429: ksp->reason = KSP_DIVERGED_NULL;
430: return(0);
431: }
432: *cc = *hh / tt;
433: *ss = *(hh+1) / tt;
434: *GRS(it+1) = -(*ss * *GRS(it));
435: *GRS(it) = PetscConj(*cc) * *GRS(it);
436: *hh = PetscConj(*cc) * *hh + *ss * *(hh+1);
437: *res = PetscAbsScalar(*GRS(it+1));
438: } else {
439: /* happy breakdown: HH(it+1, it) = 0, therfore we don't need to apply
440: another rotation matrix (so RH doesn't change). The new residual is
441: always the new sine term times the residual from last time (GRS(it)),
442: but now the new sine rotation would be zero...so the residual should
443: be zero...so we will multiply "zero" by the last residual. This might
444: not be exactly what we want to do here -could just return "zero". */
446: *res = 0.0;
447: }
448: return(0);
449: }
450: /*
451: This routine allocates more work vectors, starting from VEC_VV(it).
452: */
453: static PetscErrorCode KSPDGMRESGetNewVectors(KSP ksp,PetscInt it)
454: {
455: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
457: PetscInt nwork = dgmres->nwork_alloc,k,nalloc;
460: nalloc = PetscMin(ksp->max_it,dgmres->delta_allocate);
461: /* Adjust the number to allocate to make sure that we don't exceed the
462: number of available slots */
463: if (it + VEC_OFFSET + nalloc >= dgmres->vecs_allocated) {
464: nalloc = dgmres->vecs_allocated - it - VEC_OFFSET;
465: }
466: if (!nalloc) return(0);
468: dgmres->vv_allocated += nalloc;
470: KSPCreateVecs(ksp,nalloc,&dgmres->user_work[nwork],0,NULL);
471: PetscLogObjectParents(ksp,nalloc,dgmres->user_work[nwork]);
473: dgmres->mwork_alloc[nwork] = nalloc;
474: for (k=0; k<nalloc; k++) {
475: dgmres->vecs[it+VEC_OFFSET+k] = dgmres->user_work[nwork][k];
476: }
477: dgmres->nwork_alloc++;
478: return(0);
479: }
481: PetscErrorCode KSPBuildSolution_DGMRES(KSP ksp,Vec ptr,Vec *result)
482: {
483: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
487: if (!ptr) {
488: if (!dgmres->sol_temp) {
489: VecDuplicate(ksp->vec_sol,&dgmres->sol_temp);
490: PetscLogObjectParent((PetscObject)ksp,(PetscObject)dgmres->sol_temp);
491: }
492: ptr = dgmres->sol_temp;
493: }
494: if (!dgmres->nrs) {
495: /* allocate the work area */
496: PetscMalloc1(dgmres->max_k,&dgmres->nrs);
497: PetscLogObjectMemory((PetscObject)ksp,dgmres->max_k*sizeof(PetscScalar));
498: }
500: KSPDGMRESBuildSoln(dgmres->nrs,ksp->vec_sol,ptr,ksp,dgmres->it);
501: if (result) *result = ptr;
502: return(0);
503: }
505: PetscErrorCode KSPView_DGMRES(KSP ksp,PetscViewer viewer)
506: {
507: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
509: PetscBool iascii,isharmonic;
512: KSPView_GMRES(ksp,viewer);
513: PetscObjectTypeCompare((PetscObject) viewer,PETSCVIEWERASCII,&iascii);
514: if (iascii) {
515: if (dgmres->force) PetscViewerASCIIPrintf(viewer, " Adaptive strategy is used: FALSE\n");
516: else PetscViewerASCIIPrintf(viewer, " Adaptive strategy is used: TRUE\n");
517: PetscOptionsHasName(((PetscObject)ksp)->options,((PetscObject)ksp)->prefix, "-ksp_dgmres_harmonic_ritz", &isharmonic);
518: if (isharmonic) {
519: PetscViewerASCIIPrintf(viewer, " Frequency of extracted eigenvalues = %D using Harmonic Ritz values \n", dgmres->neig);
520: } else {
521: PetscViewerASCIIPrintf(viewer, " Frequency of extracted eigenvalues = %D using Ritz values \n", dgmres->neig);
522: }
523: PetscViewerASCIIPrintf(viewer, " Total number of extracted eigenvalues = %D\n", dgmres->r);
524: PetscViewerASCIIPrintf(viewer, " Maximum number of eigenvalues set to be extracted = %D\n", dgmres->max_neig);
525: PetscViewerASCIIPrintf(viewer, " relaxation parameter for the adaptive strategy(smv) = %g\n", dgmres->smv);
526: PetscViewerASCIIPrintf(viewer, " Number of matvecs : %D\n", dgmres->matvecs);
527: }
528: return(0);
529: }
531: /* New DGMRES functions */
533: PetscErrorCode KSPDGMRESSetEigen_DGMRES(KSP ksp,PetscInt neig)
534: {
535: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
538: if (neig< 0 && neig >dgmres->max_k) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE,"The value of neig must be positive and less than the restart value ");
539: dgmres->neig=neig;
540: return(0);
541: }
543: static PetscErrorCode KSPDGMRESSetMaxEigen_DGMRES(KSP ksp,PetscInt max_neig)
544: {
545: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
548: if (max_neig < 0 && max_neig >dgmres->max_k) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE,"The value of max_neig must be positive and less than the restart value ");
549: dgmres->max_neig=max_neig;
550: return(0);
551: }
553: static PetscErrorCode KSPDGMRESSetRatio_DGMRES(KSP ksp,PetscReal ratio)
554: {
555: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
558: if (ratio <= 0) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE,"The relaxation parameter value must be positive");
559: dgmres->smv=ratio;
560: return(0);
561: }
563: static PetscErrorCode KSPDGMRESForce_DGMRES(KSP ksp,PetscBool force)
564: {
565: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
568: dgmres->force = force;
569: return(0);
570: }
572: PetscErrorCode KSPSetFromOptions_DGMRES(PetscOptionItems *PetscOptionsObject,KSP ksp)
573: {
575: PetscInt neig;
576: PetscInt max_neig;
577: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
578: PetscBool flg;
581: KSPSetFromOptions_GMRES(PetscOptionsObject,ksp);
582: PetscOptionsHead(PetscOptionsObject,"KSP DGMRES Options");
583: PetscOptionsInt("-ksp_dgmres_eigen","Number of smallest eigenvalues to extract at each restart","KSPDGMRESSetEigen",dgmres->neig, &neig, &flg);
584: if (flg) {
585: KSPDGMRESSetEigen(ksp, neig);
586: }
587: PetscOptionsInt("-ksp_dgmres_max_eigen","Maximum Number of smallest eigenvalues to extract ","KSPDGMRESSetMaxEigen",dgmres->max_neig, &max_neig, &flg);
588: if (flg) {
589: KSPDGMRESSetMaxEigen(ksp, max_neig);
590: }
591: PetscOptionsReal("-ksp_dgmres_ratio","Relaxation parameter for the smaller number of matrix-vectors product allowed","KSPDGMRESSetRatio",dgmres->smv,&dgmres->smv,NULL);
592: PetscOptionsBool("-ksp_dgmres_improve","Improve the computation of eigenvalues by solving a new generalized eigenvalue problem (experimental - not stable at this time)",NULL,dgmres->improve,&dgmres->improve,NULL);
593: PetscOptionsBool("-ksp_dgmres_force","Sets DGMRES always at restart active, i.e do not use the adaptive strategy","KSPDGMRESForce",dgmres->force,&dgmres->force,NULL);
594: PetscOptionsTail();
595: return(0);
596: }
598: PetscErrorCode KSPDGMRESComputeDeflationData_DGMRES(KSP ksp, PetscInt *ExtrNeig)
599: {
600: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
602: PetscInt i,j, k;
603: PetscBLASInt nr, bmax;
604: PetscInt r = dgmres->r;
605: PetscInt neig; /* number of eigenvalues to extract at each restart */
606: PetscInt neig1 = dgmres->neig + EIG_OFFSET; /* max number of eig that can be extracted at each restart */
607: PetscInt max_neig = dgmres->max_neig; /* Max number of eigenvalues to extract during the iterative process */
608: PetscInt N = dgmres->max_k+1;
609: PetscInt n = dgmres->it+1;
610: PetscReal alpha;
613: PetscLogEventBegin(KSP_DGMRESComputeDeflationData, ksp, 0,0,0);
614: if (dgmres->neig == 0 || (max_neig < (r+neig1) && !dgmres->improve)) {
615: PetscLogEventEnd(KSP_DGMRESComputeDeflationData, ksp, 0,0,0);
616: return(0);
617: }
619: KSPDGMRESComputeSchurForm(ksp, &neig);
620: /* Form the extended Schur vectors X=VV*Sr */
621: if (!XX) {
622: VecDuplicateVecs(VEC_VV(0), neig1, &XX);
623: }
624: for (j = 0; j<neig; j++) {
625: VecZeroEntries(XX[j]);
626: VecMAXPY(XX[j], n, &SR[j*N], &VEC_VV(0));
627: }
629: /* Orthogonalize X against U */
630: if (!ORTH) {
631: PetscMalloc1(max_neig, &ORTH);
632: }
633: if (r > 0) {
634: /* modified Gram-Schmidt */
635: for (j = 0; j<neig; j++) {
636: for (i=0; i<r; i++) {
637: /* First, compute U'*X[j] */
638: VecDot(XX[j], UU[i], &alpha);
639: /* Then, compute X(j)=X(j)-U*U'*X(j) */
640: VecAXPY(XX[j], -alpha, UU[i]);
641: }
642: }
643: }
644: /* Compute MX = M^{-1}*A*X */
645: if (!MX) {
646: VecDuplicateVecs(VEC_VV(0), neig1, &MX);
647: }
648: for (j = 0; j<neig; j++) {
649: KSP_PCApplyBAorAB(ksp, XX[j], MX[j], VEC_TEMP_MATOP);
650: }
651: dgmres->matvecs += neig;
653: if ((r+neig1) > max_neig && dgmres->improve) { /* Improve the approximate eigenvectors in X by solving a new generalized eigenvalue -- Quite expensive to do this actually */
654: KSPDGMRESImproveEig(ksp, neig);
655: PetscLogEventEnd(KSP_DGMRESComputeDeflationData, ksp, 0,0,0);
656: return(0); /* We return here since data for M have been improved in KSPDGMRESImproveEig()*/
657: }
659: /* Compute XMX = X'*M^{-1}*A*X -- size (neig, neig) */
660: if (!XMX) {
661: PetscMalloc1(neig1*neig1, &XMX);
662: }
663: for (j = 0; j < neig; j++) {
664: VecMDot(MX[j], neig, XX, &(XMX[j*neig1]));
665: }
667: if (r > 0) {
668: /* Compute UMX = U'*M^{-1}*A*X -- size (r, neig) */
669: if (!UMX) {
670: PetscMalloc1(max_neig*neig1, &UMX);
671: }
672: for (j = 0; j < neig; j++) {
673: VecMDot(MX[j], r, UU, &(UMX[j*max_neig]));
674: }
675: /* Compute XMU = X'*M^{-1}*A*U -- size(neig, r) */
676: if (!XMU) {
677: PetscMalloc1(max_neig*neig1, &XMU);
678: }
679: for (j = 0; j<r; j++) {
680: VecMDot(MU[j], neig, XX, &(XMU[j*neig1]));
681: }
682: }
684: /* Form the new matrix T = [T UMX; XMU XMX]; */
685: if (!TT) {
686: PetscMalloc1(max_neig*max_neig, &TT);
687: }
688: if (r > 0) {
689: /* Add XMU to T */
690: for (j = 0; j < r; j++) {
691: PetscMemcpy(&(TT[max_neig*j+r]), &(XMU[neig1*j]), neig*sizeof(PetscReal));
692: }
693: /* Add [UMX; XMX] to T */
694: for (j = 0; j < neig; j++) {
695: k = r+j;
696: PetscMemcpy(&(TT[max_neig*k]), &(UMX[max_neig*j]), r*sizeof(PetscReal));
697: PetscMemcpy(&(TT[max_neig*k + r]), &(XMX[neig1*j]), neig*sizeof(PetscReal));
698: }
699: } else { /* Add XMX to T */
700: for (j = 0; j < neig; j++) {
701: PetscMemcpy(&(TT[max_neig*j]), &(XMX[neig1*j]), neig*sizeof(PetscReal));
702: }
703: }
705: dgmres->r += neig;
706: r = dgmres->r;
707: PetscBLASIntCast(r,&nr);
708: /*LU Factorize T with Lapack xgetrf routine */
710: PetscBLASIntCast(max_neig,&bmax);
711: if (!TTF) {
712: PetscMalloc1(bmax*bmax, &TTF);
713: }
714: PetscMemcpy(TTF, TT, bmax*r*sizeof(PetscReal));
715: if (!INVP) {
716: PetscMalloc1(bmax, &INVP);
717: }
718: #if defined(PETSC_MISSING_LAPACK_GETRF)
719: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GETRF - Lapack routine is unavailable.");
720: #else
721: {
722: PetscBLASInt info;
723: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&nr, &nr, TTF, &bmax, INVP, &info));
724: if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGETRF INFO=%d",(int) info);
725: }
726: #endif
728: /* Save X in U and MX in MU for the next cycles and increase the size of the invariant subspace */
729: if (!UU) {
730: VecDuplicateVecs(VEC_VV(0), max_neig, &UU);
731: VecDuplicateVecs(VEC_VV(0), max_neig, &MU);
732: }
733: for (j=0; j<neig; j++) {
734: VecCopy(XX[j], UU[r-neig+j]);
735: VecCopy(MX[j], MU[r-neig+j]);
736: }
737: PetscLogEventEnd(KSP_DGMRESComputeDeflationData, ksp, 0,0,0);
738: return(0);
739: }
741: PetscErrorCode KSPDGMRESComputeSchurForm_DGMRES(KSP ksp, PetscInt *neig)
742: {
743: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
745: PetscInt N = dgmres->max_k + 1, n=dgmres->it+1;
746: PetscBLASInt bn, bN;
747: PetscReal *A;
748: PetscBLASInt ihi;
749: PetscBLASInt ldA; /* leading dimension of A */
750: PetscBLASInt ldQ; /* leading dimension of Q */
751: PetscReal *Q; /* orthogonal matrix of (left) schur vectors */
752: PetscReal *work; /* working vector */
753: PetscBLASInt lwork; /* size of the working vector */
754: PetscInt *perm; /* Permutation vector to sort eigenvalues */
755: PetscInt i, j;
756: PetscBLASInt NbrEig; /* Number of eigenvalues really extracted */
757: PetscReal *wr, *wi, *modul; /* Real and imaginary part and modul of the eigenvalues of A*/
758: PetscBLASInt *select;
759: PetscBLASInt *iwork;
760: PetscBLASInt liwork;
761: PetscScalar *Ht; /* Transpose of the Hessenberg matrix */
762: PetscScalar *t; /* Store the result of the solution of H^T*t=h_{m+1,m}e_m */
763: PetscBLASInt *ipiv; /* Permutation vector to be used in LAPACK */
764: PetscBool flag; /* determine whether to use Ritz vectors or harmonic Ritz vectors */
767: PetscBLASIntCast(n,&bn);
768: PetscBLASIntCast(N,&bN);
769: ihi = ldQ = bn;
770: ldA = bN;
771: PetscBLASIntCast(5*N,&lwork);
773: #if defined(PETSC_USE_COMPLEX)
774: SETERRQ(PetscObjectComm((PetscObject)ksp), -1, "NO SUPPORT FOR COMPLEX VALUES AT THIS TIME");
775: #endif
777: PetscMalloc1(ldA*ldA, &A);
778: PetscMalloc1(ldQ*n, &Q);
779: PetscMalloc1(lwork, &work);
780: if (!dgmres->wr) {
781: PetscMalloc1(n, &dgmres->wr);
782: PetscMalloc1(n, &dgmres->wi);
783: }
784: wr = dgmres->wr;
785: wi = dgmres->wi;
786: PetscMalloc1(n,&modul);
787: PetscMalloc1(n,&perm);
788: /* copy the Hessenberg matrix to work space */
789: PetscMemcpy(A, dgmres->hes_origin, ldA*ldA*sizeof(PetscReal));
790: PetscOptionsHasName(((PetscObject)ksp)->options,((PetscObject)ksp)->prefix, "-ksp_dgmres_harmonic_ritz", &flag);
791: if (flag) {
792: /* Compute the matrix H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
793: /* Transpose the Hessenberg matrix */
794: PetscMalloc1(bn*bn, &Ht);
795: for (i = 0; i < bn; i++) {
796: for (j = 0; j < bn; j++) {
797: Ht[i * bn + j] = dgmres->hes_origin[j * ldA + i];
798: }
799: }
801: /* Solve the system H^T*t = h_{m+1,m}e_m */
802: PetscCalloc1(bn, &t);
803: t[bn-1] = dgmres->hes_origin[(bn -1) * ldA + bn]; /* Pick the last element H(m+1,m) */
804: PetscMalloc1(bn, &ipiv);
805: /* Call the LAPACK routine dgesv to solve the system Ht^-1 * t */
806: #if defined(PETSC_MISSING_LAPACK_GESV)
807: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GESV - Lapack routine is unavailable.");
808: #else
809: {
810: PetscBLASInt info;
811: PetscBLASInt nrhs = 1;
812: PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&bn, &nrhs, Ht, &bn, ipiv, t, &bn, &info));
813: if (info) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_PLIB, "Error while calling the Lapack routine DGESV");
814: }
815: #endif
816: /* Now form H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
817: for (i = 0; i < bn; i++) A[(bn-1)*bn+i] += t[i];
818: PetscFree(t);
819: PetscFree(Ht);
820: }
821: /* Compute eigenvalues with the Schur form */
822: #if defined(PETSC_MISSING_LAPACK_HSEQR)
823: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"HSEQR - Lapack routine is unavailable.");
824: #else
825: {
826: PetscBLASInt info;
827: PetscBLASInt ilo = 1;
828: PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S", "I", &bn, &ilo, &ihi, A, &ldA, wr, wi, Q, &ldQ, work, &lwork, &info));
829: if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XHSEQR %d",(int) info);
830: }
831: #endif
832: PetscFree(work);
834: /* sort the eigenvalues */
835: for (i=0; i<n; i++) modul[i] = PetscSqrtReal(wr[i]*wr[i]+wi[i]*wi[i]);
836: for (i=0; i<n; i++) perm[i] = i;
838: PetscSortRealWithPermutation(n, modul, perm);
839: /* save the complex modulus of the largest eigenvalue in magnitude */
840: if (dgmres->lambdaN < modul[perm[n-1]]) dgmres->lambdaN=modul[perm[n-1]];
841: /* count the number of extracted eigenvalues (with complex conjugates) */
842: NbrEig = 0;
843: while (NbrEig < dgmres->neig) {
844: if (wi[perm[NbrEig]] != 0) NbrEig += 2;
845: else NbrEig += 1;
846: }
847: /* Reorder the Schur decomposition so that the cluster of smallest eigenvalues appears in the leading diagonal blocks of A */
849: PetscCalloc1(n, &select);
851: if (!dgmres->GreatestEig) {
852: for (j = 0; j < NbrEig; j++) select[perm[j]] = 1;
853: } else {
854: for (j = 0; j < NbrEig; j++) select[perm[n-j-1]] = 1;
855: }
856: /* call Lapack dtrsen */
857: lwork = PetscMax(1, 4 * NbrEig *(bn-NbrEig));
858: liwork = PetscMax(1, 2 * NbrEig *(bn-NbrEig));
859: PetscMalloc1(lwork, &work);
860: PetscMalloc1(liwork, &iwork);
861: #if defined(PETSC_MISSING_LAPACK_TRSEN)
862: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"TRSEN - Lapack routine is unavailable.");
863: #else
864: {
865: PetscBLASInt info;
866: PetscReal CondEig; /* lower bound on the reciprocal condition number for the selected cluster of eigenvalues */
867: PetscReal CondSub; /* estimated reciprocal condition number of the specified invariant subspace. */
868: PetscStackCallBLAS("LAPACKtrsen",LAPACKtrsen_("B", "V", select, &bn, A, &ldA, Q, &ldQ, wr, wi, &NbrEig, &CondEig, &CondSub, work, &lwork, iwork, &liwork, &info));
869: if (info == 1) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "UNABLE TO REORDER THE EIGENVALUES WITH THE LAPACK ROUTINE : ILL-CONDITIONED PROBLEM");
870: }
871: #endif
872: PetscFree(select);
874: /* Extract the Schur vectors */
875: for (j = 0; j < NbrEig; j++) {
876: PetscMemcpy(&SR[j*N], &(Q[j*ldQ]), n*sizeof(PetscReal));
877: }
878: *neig = NbrEig;
879: PetscFree(A);
880: PetscFree(work);
881: PetscFree(perm);
882: PetscFree(work);
883: PetscFree(iwork);
884: PetscFree(modul);
885: PetscFree(Q);
886: return(0);
887: }
889: PetscErrorCode KSPDGMRESApplyDeflation_DGMRES(KSP ksp, Vec x, Vec y)
890: {
891: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
892: PetscInt i, r = dgmres->r;
894: PetscReal alpha = 1.0;
895: PetscInt max_neig = dgmres->max_neig;
896: PetscBLASInt br,bmax;
897: PetscReal lambda = dgmres->lambdaN;
900: PetscBLASIntCast(r,&br);
901: PetscBLASIntCast(max_neig,&bmax);
902: PetscLogEventBegin(KSP_DGMRESApplyDeflation, ksp, 0, 0, 0);
903: if (!r) {
904: VecCopy(x,y);
905: return(0);
906: }
907: /* Compute U'*x */
908: if (!X1) {
909: PetscMalloc1(bmax, &X1);
910: PetscMalloc1(bmax, &X2);
911: }
912: VecMDot(x, r, UU, X1);
914: /* Solve T*X1=X2 for X1*/
915: PetscMemcpy(X2, X1, br*sizeof(PetscReal));
916: #if defined(PETSC_MISSING_LAPACK_GETRS)
917: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GETRS - Lapack routine is unavailable.");
918: #else
919: {
920: PetscBLASInt info;
921: PetscBLASInt nrhs = 1;
922: PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("N", &br, &nrhs, TTF, &bmax, INVP, X1, &bmax, &info));
923: if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGETRS %d", (int) info);
924: }
925: #endif
926: /* Iterative refinement -- is it really necessary ?? */
927: if (!WORK) {
928: PetscMalloc1(3*bmax, &WORK);
929: PetscMalloc1(bmax, &IWORK);
930: }
931: #if defined(PETSC_MISSING_LAPACK_GERFS)
932: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GERFS - Lapack routine is unavailable.");
933: #else
934: {
935: PetscBLASInt info;
936: PetscReal berr, ferr;
937: PetscBLASInt nrhs = 1;
938: PetscStackCallBLAS("LAPACKgerfs",LAPACKgerfs_("N", &br, &nrhs, TT, &bmax, TTF, &bmax, INVP, X2, &bmax,X1, &bmax, &ferr, &berr, WORK, IWORK, &info));
939: if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGERFS %d", (int) info);
940: }
941: #endif
943: for (i = 0; i < r; i++) X2[i] = X1[i]/lambda - X2[i];
945: /* Compute X2=U*X2 */
946: VecZeroEntries(y);
947: VecMAXPY(y, r, X2, UU);
948: VecAXPY(y, alpha, x);
950: PetscLogEventEnd(KSP_DGMRESApplyDeflation, ksp, 0, 0, 0);
951: return(0);
952: }
954: static PetscErrorCode KSPDGMRESImproveEig_DGMRES(KSP ksp, PetscInt neig)
955: {
956: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
957: PetscInt j,r_old, r = dgmres->r;
958: PetscBLASInt i = 0;
959: PetscInt neig1 = dgmres->neig + EIG_OFFSET;
960: PetscInt bmax = dgmres->max_neig;
961: PetscInt aug = r + neig; /* actual size of the augmented invariant basis */
962: PetscInt aug1 = bmax+neig1; /* maximum size of the augmented invariant basis */
963: PetscBLASInt ldA; /* leading dimension of AUAU and AUU*/
964: PetscBLASInt N; /* size of AUAU */
965: PetscReal *Q; /* orthogonal matrix of (left) schur vectors */
966: PetscReal *Z; /* orthogonal matrix of (right) schur vectors */
967: PetscReal *work; /* working vector */
968: PetscBLASInt lwork; /* size of the working vector */
969: PetscInt *perm; /* Permutation vector to sort eigenvalues */
970: PetscReal *wr, *wi, *beta, *modul; /* Real and imaginary part and modul of the eigenvalues of A*/
971: PetscInt ierr;
972: PetscBLASInt NbrEig = 0,nr,bm;
973: PetscBLASInt *select;
974: PetscBLASInt liwork, *iwork;
977: /* Block construction of the matrices AUU=(AU)'*U and (AU)'*AU*/
978: if (!AUU) {
979: PetscMalloc1(aug1*aug1, &AUU);
980: PetscMalloc1(aug1*aug1, &AUAU);
981: }
982: /* AUU = (AU)'*U = [(MU)'*U (MU)'*X; (MX)'*U (MX)'*X]
983: * Note that MU and MX have been computed previously either in ComputeDataDeflation() or down here in a previous call to this function */
984: /* (MU)'*U size (r x r) -- store in the <r> first columns of AUU*/
985: for (j=0; j < r; j++) {
986: VecMDot(UU[j], r, MU, &AUU[j*aug1]);
987: }
988: /* (MU)'*X size (r x neig) -- store in AUU from the column <r>*/
989: for (j = 0; j < neig; j++) {
990: VecMDot(XX[j], r, MU, &AUU[(r+j) *aug1]);
991: }
992: /* (MX)'*U size (neig x r) -- store in the <r> first columns of AUU from the row <r>*/
993: for (j = 0; j < r; j++) {
994: VecMDot(UU[j], neig, MX, &AUU[j*aug1+r]);
995: }
996: /* (MX)'*X size (neig neig) -- store in AUU from the column <r> and the row <r>*/
997: for (j = 0; j < neig; j++) {
998: VecMDot(XX[j], neig, MX, &AUU[(r+j) *aug1 + r]);
999: }
1001: /* AUAU = (AU)'*AU = [(MU)'*MU (MU)'*MX; (MX)'*MU (MX)'*MX] */
1002: /* (MU)'*MU size (r x r) -- store in the <r> first columns of AUAU*/
1003: for (j=0; j < r; j++) {
1004: VecMDot(MU[j], r, MU, &AUAU[j*aug1]);
1005: }
1006: /* (MU)'*MX size (r x neig) -- store in AUAU from the column <r>*/
1007: for (j = 0; j < neig; j++) {
1008: VecMDot(MX[j], r, MU, &AUAU[(r+j) *aug1]);
1009: }
1010: /* (MX)'*MU size (neig x r) -- store in the <r> first columns of AUAU from the row <r>*/
1011: for (j = 0; j < r; j++) {
1012: VecMDot(MU[j], neig, MX, &AUAU[j*aug1+r]);
1013: }
1014: /* (MX)'*MX size (neig neig) -- store in AUAU from the column <r> and the row <r>*/
1015: for (j = 0; j < neig; j++) {
1016: VecMDot(MX[j], neig, MX, &AUAU[(r+j) *aug1 + r]);
1017: }
1019: /* Computation of the eigenvectors */
1020: PetscBLASIntCast(aug1,&ldA);
1021: PetscBLASIntCast(aug,&N);
1022: lwork = 8 * N + 20; /* sizeof the working space */
1023: PetscMalloc1(N, &wr);
1024: PetscMalloc1(N, &wi);
1025: PetscMalloc1(N, &beta);
1026: PetscMalloc1(N, &modul);
1027: PetscMalloc1(N, &perm);
1028: PetscMalloc1(N*N, &Q);
1029: PetscMalloc1(N*N, &Z);
1030: PetscMalloc1(lwork, &work);
1031: #if defined(PETSC_MISSING_LAPACK_GGES)
1032: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GGES - Lapack routine is unavailable.");
1033: #else
1034: {
1035: PetscBLASInt info;
1036: PetscStackCallBLAS("LAPACKgges",LAPACKgges_("V", "V", "N", NULL, &N, AUAU, &ldA, AUU, &ldA, &i, wr, wi, beta, Q, &N, Z, &N, work, &lwork, NULL, &info));
1037: if (info) SETERRQ1 (PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGGES %d", (int) info);
1038: }
1039: #endif
1040: for (i=0; i<N; i++) {
1041: if (beta[i] !=0.0) {
1042: wr[i] /=beta[i];
1043: wi[i] /=beta[i];
1044: }
1045: }
1046: /* sort the eigenvalues */
1047: for (i=0; i<N; i++) modul[i]=PetscSqrtReal(wr[i]*wr[i]+wi[i]*wi[i]);
1048: for (i=0; i<N; i++) perm[i] = i;
1049: PetscSortRealWithPermutation(N, modul, perm);
1050: /* Save the norm of the largest eigenvalue */
1051: if (dgmres->lambdaN < modul[perm[N-1]]) dgmres->lambdaN = modul[perm[N-1]];
1052: /* Allocate space to extract the first r schur vectors */
1053: if (!SR2) {
1054: PetscMalloc1(aug1*bmax, &SR2);
1055: }
1056: /* count the number of extracted eigenvalues (complex conjugates count as 2) */
1057: while (NbrEig < bmax) {
1058: if (wi[perm[NbrEig]] == 0) NbrEig += 1;
1059: else NbrEig += 2;
1060: }
1061: if (NbrEig > bmax) NbrEig = bmax - 1;
1062: r_old = r; /* previous size of r */
1063: dgmres->r = r = NbrEig;
1065: /* Select the eigenvalues to reorder */
1066: PetscCalloc1(N, &select);
1067: if (!dgmres->GreatestEig) {
1068: for (j = 0; j < NbrEig; j++) select[perm[j]] = 1;
1069: } else {
1070: for (j = 0; j < NbrEig; j++) select[perm[N-j-1]] = 1;
1071: }
1072: /* Reorder and extract the new <r> schur vectors */
1073: lwork = PetscMax(4 * N + 16, 2 * NbrEig *(N - NbrEig));
1074: liwork = PetscMax(N + 6, 2 * NbrEig *(N - NbrEig));
1075: PetscFree(work);
1076: PetscMalloc1(lwork, &work);
1077: PetscMalloc1(liwork, &iwork);
1078: #if defined(PETSC_MISSING_LAPACK_TGSEN)
1079: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"TGSEN - Lapack routine is unavailable.");
1080: #else
1081: {
1082: PetscBLASInt info;
1083: PetscReal Dif[2];
1084: PetscBLASInt ijob = 2;
1085: PetscBLASInt wantQ = 1, wantZ = 1;
1086: PetscStackCallBLAS("LAPACKtgsen",LAPACKtgsen_(&ijob, &wantQ, &wantZ, select, &N, AUAU, &ldA, AUU, &ldA, wr, wi, beta, Q, &N, Z, &N, &NbrEig, NULL, NULL, &(Dif[0]), work, &lwork, iwork, &liwork, &info));
1087: if (info == 1) SETERRQ(PetscObjectComm((PetscObject)ksp), -1, "UNABLE TO REORDER THE EIGENVALUES WITH THE LAPACK ROUTINE : ILL-CONDITIONED PROBLEM");
1088: }
1089: #endif
1090: PetscFree(select);
1092: for (j=0; j<r; j++) {
1093: PetscMemcpy(&SR2[j*aug1], &(Z[j*N]), N*sizeof(PetscReal));
1094: }
1096: /* Multiply the Schur vectors SR2 by U (and X) to get a new U
1097: -- save it temporarily in MU */
1098: for (j = 0; j < r; j++) {
1099: VecZeroEntries(MU[j]);
1100: VecMAXPY(MU[j], r_old, &SR2[j*aug1], UU);
1101: VecMAXPY(MU[j], neig, &SR2[j*aug1+r_old], XX);
1102: }
1103: /* Form T = U'*MU*U */
1104: for (j = 0; j < r; j++) {
1105: VecCopy(MU[j], UU[j]);
1106: KSP_PCApplyBAorAB(ksp, UU[j], MU[j], VEC_TEMP_MATOP);
1107: }
1108: dgmres->matvecs += r;
1109: for (j = 0; j < r; j++) {
1110: VecMDot(MU[j], r, UU, &TT[j*bmax]);
1111: }
1112: /* Factorize T */
1113: PetscMemcpy(TTF, TT, bmax*r*sizeof(PetscReal));
1114: PetscBLASIntCast(r,&nr);
1115: PetscBLASIntCast(bmax,&bm);
1116: #if defined(PETSC_MISSING_LAPACK_GETRF)
1117: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GETRF - Lapack routine is unavailable.");
1118: #else
1119: {
1120: PetscBLASInt info;
1121: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&nr, &nr, TTF, &bm, INVP, &info));
1122: if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGETRF INFO=%d",(int) info);
1123: }
1124: #endif
1125: /* Free Memory */
1126: PetscFree(wr);
1127: PetscFree(wi);
1128: PetscFree(beta);
1129: PetscFree(modul);
1130: PetscFree(perm);
1131: PetscFree(Q);
1132: PetscFree(Z);
1133: PetscFree(work);
1134: PetscFree(iwork);
1135: return(0);
1136: }
1138: /* end new DGMRES functions */
1140: /*MC
1141: KSPDGMRES - Implements the deflated GMRES as defined in [1,2].
1142: In this implementation, the adaptive strategy allows to switch to the deflated GMRES when the
1143: stagnation occurs.
1145: Options Database Keys:
1146: GMRES Options (inherited):
1147: + -ksp_gmres_restart <restart> - the number of Krylov directions to orthogonalize against
1148: . -ksp_gmres_haptol <tol> - sets the tolerance for "happy ending" (exact convergence)
1149: . -ksp_gmres_preallocate - preallocate all the Krylov search directions initially (otherwise groups of
1150: vectors are allocated as needed)
1151: . -ksp_gmres_classicalgramschmidt - use classical (unmodified) Gram-Schmidt to orthogonalize against the Krylov space (fast) (the default)
1152: . -ksp_gmres_modifiedgramschmidt - use modified Gram-Schmidt in the orthogonalization (more stable, but slower)
1153: . -ksp_gmres_cgs_refinement_type <never,ifneeded,always> - determine if iterative refinement is used to increase the
1154: stability of the classical Gram-Schmidt orthogonalization.
1155: - -ksp_gmres_krylov_monitor - plot the Krylov space generated
1157: DGMRES Options Database Keys:
1158: + -ksp_dgmres_eigen <neig> - number of smallest eigenvalues to extract at each restart
1159: . -ksp_dgmres_max_eigen <max_neig> - maximum number of eigenvalues that can be extracted during the iterative
1160: process
1161: . -ksp_dgmres_force - use the deflation at each restart; switch off the adaptive strategy.
1162: - -ksp_dgmres_view_deflation_vecs <viewerspec> - View the deflation vectors, where viewerspec is a key that can be
1163: parsed by PetscOptionsGetViewer(). If neig > 1, viewerspec should
1164: end with ":append". No vectors will be viewed if the adaptive
1165: strategy chooses not to deflate, so -ksp_dgmres_force should also
1166: be given.
1167: The deflation vectors span a subspace that may be a good
1168: approximation of the subspace of smallest eigenvectors of the
1169: preconditioned operator, so this option can aid in understanding
1170: the performance of a preconditioner.
1172: Level: beginner
1174: Notes:
1175: Left and right preconditioning are supported, but not symmetric preconditioning. Complex arithmetic is not yet supported
1177: References:
1178: + 1. - J. Erhel, K. Burrage and B. Pohl, Restarted GMRES preconditioned by deflation,J. Computational and Applied Mathematics, 69(1996).
1179: - 2. - D. NUENTSA WAKAM and F. PACULL, Memory Efficient Hybrid Algebraic Solvers for Linear Systems Arising from Compressible Flows, Computers and Fluids,
1180: In Press, http://dx.doi.org/10.1016/j.compfluid.2012.03.023
1182: Contributed by: Desire NUENTSA WAKAM,INRIA
1184: .seealso: KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPFGMRES, KSPLGMRES,
1185: KSPGMRESSetRestart(), KSPGMRESSetHapTol(), KSPGMRESSetPreAllocateVectors(), KSPGMRESSetOrthogonalization(), KSPGMRESGetOrthogonalization(),
1186: KSPGMRESClassicalGramSchmidtOrthogonalization(), KSPGMRESModifiedGramSchmidtOrthogonalization(),
1187: KSPGMRESCGSRefinementType, KSPGMRESSetCGSRefinementType(), KSPGMRESGetCGSRefinementType(), KSPGMRESMonitorKrylov(), KSPSetPCSide()
1189: M*/
1191: PETSC_EXTERN PetscErrorCode KSPCreate_DGMRES(KSP ksp)
1192: {
1193: KSP_DGMRES *dgmres;
1197: PetscNewLog(ksp,&dgmres);
1198: ksp->data = (void*) dgmres;
1200: KSPSetSupportedNorm(ksp,KSP_NORM_PRECONDITIONED,PC_LEFT,3);
1201: KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_RIGHT,2);
1203: ksp->ops->buildsolution = KSPBuildSolution_DGMRES;
1204: ksp->ops->setup = KSPSetUp_DGMRES;
1205: ksp->ops->solve = KSPSolve_DGMRES;
1206: ksp->ops->destroy = KSPDestroy_DGMRES;
1207: ksp->ops->view = KSPView_DGMRES;
1208: ksp->ops->setfromoptions = KSPSetFromOptions_DGMRES;
1209: ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES;
1210: ksp->ops->computeeigenvalues = KSPComputeEigenvalues_GMRES;
1212: PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetPreAllocateVectors_C",KSPGMRESSetPreAllocateVectors_GMRES);
1213: PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetOrthogonalization_C",KSPGMRESSetOrthogonalization_GMRES);
1214: PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetRestart_C",KSPGMRESSetRestart_GMRES);
1215: PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetHapTol_C",KSPGMRESSetHapTol_GMRES);
1216: PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetCGSRefinementType_C",KSPGMRESSetCGSRefinementType_GMRES);
1217: /* -- New functions defined in DGMRES -- */
1218: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetEigen_C",KSPDGMRESSetEigen_DGMRES);
1219: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetMaxEigen_C",KSPDGMRESSetMaxEigen_DGMRES);
1220: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetRatio_C",KSPDGMRESSetRatio_DGMRES);
1221: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESForce_C",KSPDGMRESForce_DGMRES);
1222: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeSchurForm_C",KSPDGMRESComputeSchurForm_DGMRES);
1223: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeDeflationData_C",KSPDGMRESComputeDeflationData_DGMRES);
1224: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESApplyDeflation_C",KSPDGMRESApplyDeflation_DGMRES);
1225: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESImproveEig_C", KSPDGMRESImproveEig_DGMRES);
1227: PetscLogEventRegister("DGMRESCompDefl", KSP_CLASSID, &KSP_DGMRESComputeDeflationData);
1228: PetscLogEventRegister("DGMRESApplyDefl", KSP_CLASSID, &KSP_DGMRESApplyDeflation);
1230: dgmres->haptol = 1.0e-30;
1231: dgmres->q_preallocate = 0;
1232: dgmres->delta_allocate = GMRES_DELTA_DIRECTIONS;
1233: dgmres->orthog = KSPGMRESClassicalGramSchmidtOrthogonalization;
1234: dgmres->nrs = 0;
1235: dgmres->sol_temp = 0;
1236: dgmres->max_k = GMRES_DEFAULT_MAXK;
1237: dgmres->Rsvd = 0;
1238: dgmres->cgstype = KSP_GMRES_CGS_REFINE_NEVER;
1239: dgmres->orthogwork = 0;
1241: /* Default values for the deflation */
1242: dgmres->r = 0;
1243: dgmres->neig = DGMRES_DEFAULT_EIG;
1244: dgmres->max_neig = DGMRES_DEFAULT_MAXEIG-1;
1245: dgmres->lambdaN = 0.0;
1246: dgmres->smv = SMV;
1247: dgmres->matvecs = 0;
1248: dgmres->GreatestEig = PETSC_FALSE; /* experimental */
1249: dgmres->HasSchur = PETSC_FALSE;
1250: return(0);
1251: }