Actual source code: gmreig.c
petsc-3.14.6 2021-03-30
2: #include <../src/ksp/ksp/impls/gmres/gmresimpl.h>
3: #include <petscblaslapack.h>
5: PetscErrorCode KSPComputeExtremeSingularValues_GMRES(KSP ksp,PetscReal *emax,PetscReal *emin)
6: {
7: KSP_GMRES *gmres = (KSP_GMRES*)ksp->data;
9: PetscInt n = gmres->it + 1,i,N = gmres->max_k + 2;
10: PetscBLASInt bn, bN,lwork, idummy,lierr;
11: PetscScalar *R = gmres->Rsvd,*work = R + N*N,sdummy = 0;
12: PetscReal *realpart = gmres->Dsvd;
15: PetscBLASIntCast(n,&bn);
16: PetscBLASIntCast(N,&bN);
17: PetscBLASIntCast(5*N,&lwork);
18: PetscBLASIntCast(N,&idummy);
19: if (n <= 0) {
20: *emax = *emin = 1.0;
21: return(0);
22: }
23: /* copy R matrix to work space */
24: PetscArraycpy(R,gmres->hh_origin,(gmres->max_k+2)*(gmres->max_k+1));
26: /* zero below diagonal garbage */
27: for (i=0; i<n; i++) R[i*N+i+1] = 0.0;
29: /* compute Singular Values */
30: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
31: #if !defined(PETSC_USE_COMPLEX)
32: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","N",&bn,&bn,R,&bN,realpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,&lierr));
33: #else
34: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","N",&bn,&bn,R,&bN,realpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,realpart+N,&lierr));
35: #endif
36: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SVD Lapack routine %d",(int)lierr);
37: PetscFPTrapPop();
39: *emin = realpart[n-1];
40: *emax = realpart[0];
41: return(0);
42: }
44: /* ------------------------------------------------------------------------ */
45: /* ESSL has a different calling sequence for dgeev() and zgeev() than standard LAPACK */
46: PetscErrorCode KSPComputeEigenvalues_GMRES(KSP ksp,PetscInt nmax,PetscReal *r,PetscReal *c,PetscInt *neig)
47: {
48: #if defined(PETSC_HAVE_ESSL)
49: KSP_GMRES *gmres = (KSP_GMRES*)ksp->data;
51: PetscInt n = gmres->it + 1,N = gmres->max_k + 1;
52: PetscInt i,*perm;
53: PetscScalar *R = gmres->Rsvd;
54: PetscScalar *cwork = R + N*N,sdummy = 0;
55: PetscReal *work,*realpart = gmres->Dsvd;
56: PetscBLASInt zero = 0,bn,bN,idummy = -1,lwork;
59: PetscBLASIntCast(n,&bn);
60: PetscBLASIntCast(N,&bN);
61: PetscBLASIntCast(5*N,&lwork);
62: if (nmax < n) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_SIZ,"Not enough room in work space r and c for eigenvalues");
63: *neig = n;
65: if (!n) return(0);
67: /* copy R matrix to work space */
68: PetscArraycpy(R,gmres->hes_origin,N*N);
70: /* compute eigenvalues */
72: /* for ESSL version need really cwork of length N (complex), 2N
73: (real); already at least 5N of space has been allocated */
75: PetscMalloc1(lwork,&work);
76: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
77: PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_(&zero,R,&bN,cwork,&sdummy,&idummy,&idummy,&bn,work,&lwork));
78: PetscFPTrapPop();
79: PetscFree(work);
81: /* For now we stick with the convention of storing the real and imaginary
82: components of evalues separately. But is this what we really want? */
83: PetscMalloc1(n,&perm);
85: #if !defined(PETSC_USE_COMPLEX)
86: for (i=0; i<n; i++) {
87: realpart[i] = cwork[2*i];
88: perm[i] = i;
89: }
90: PetscSortRealWithPermutation(n,realpart,perm);
91: for (i=0; i<n; i++) {
92: r[i] = cwork[2*perm[i]];
93: c[i] = cwork[2*perm[i]+1];
94: }
95: #else
96: for (i=0; i<n; i++) {
97: realpart[i] = PetscRealPart(cwork[i]);
98: perm[i] = i;
99: }
100: PetscSortRealWithPermutation(n,realpart,perm);
101: for (i=0; i<n; i++) {
102: r[i] = PetscRealPart(cwork[perm[i]]);
103: c[i] = PetscImaginaryPart(cwork[perm[i]]);
104: }
105: #endif
106: PetscFree(perm);
107: #elif !defined(PETSC_USE_COMPLEX)
108: KSP_GMRES *gmres = (KSP_GMRES*)ksp->data;
110: PetscInt n = gmres->it + 1,N = gmres->max_k + 1,i,*perm;
111: PetscBLASInt bn, bN, lwork, idummy, l-1;
112: PetscScalar *R = gmres->Rsvd,*work = R + N*N;
113: PetscScalar *realpart = gmres->Dsvd,*imagpart = realpart + N,sdummy = 0;
116: PetscBLASIntCast(n,&bn);
117: PetscBLASIntCast(N,&bN);
118: PetscBLASIntCast(5*N,&lwork);
119: PetscBLASIntCast(N,&idummy);
120: if (nmax < n) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_SIZ,"Not enough room in work space r and c for eigenvalues");
121: *neig = n;
123: if (!n) return(0);
125: /* copy R matrix to work space */
126: PetscArraycpy(R,gmres->hes_origin,N*N);
128: /* compute eigenvalues */
129: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
130: PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","N",&bn,R,&bN,realpart,imagpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,&lierr));
131: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine %d",(int)lierr);
132: PetscFPTrapPop();
133: PetscMalloc1(n,&perm);
134: for (i=0; i<n; i++) perm[i] = i;
135: PetscSortRealWithPermutation(n,realpart,perm);
136: for (i=0; i<n; i++) {
137: r[i] = realpart[perm[i]];
138: c[i] = imagpart[perm[i]];
139: }
140: PetscFree(perm);
141: #else
142: KSP_GMRES *gmres = (KSP_GMRES*)ksp->data;
144: PetscInt n = gmres->it + 1,N = gmres->max_k + 1,i,*perm;
145: PetscScalar *R = gmres->Rsvd,*work = R + N*N,*eigs = work + 5*N,sdummy;
146: PetscBLASInt bn,bN,lwork,idummy,l-1;
149: PetscBLASIntCast(n,&bn);
150: PetscBLASIntCast(N,&bN);
151: PetscBLASIntCast(5*N,&lwork);
152: PetscBLASIntCast(N,&idummy);
153: if (nmax < n) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_SIZ,"Not enough room in work space r and c for eigenvalues");
154: *neig = n;
156: if (!n) return(0);
158: /* copy R matrix to work space */
159: PetscArraycpy(R,gmres->hes_origin,N*N);
161: /* compute eigenvalues */
162: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
163: PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","N",&bn,R,&bN,eigs,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,gmres->Dsvd,&lierr));
164: if (lierr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine");
165: PetscFPTrapPop();
166: PetscMalloc1(n,&perm);
167: for (i=0; i<n; i++) perm[i] = i;
168: for (i=0; i<n; i++) r[i] = PetscRealPart(eigs[i]);
169: PetscSortRealWithPermutation(n,r,perm);
170: for (i=0; i<n; i++) {
171: r[i] = PetscRealPart(eigs[perm[i]]);
172: c[i] = PetscImaginaryPart(eigs[perm[i]]);
173: }
174: PetscFree(perm);
175: #endif
176: return(0);
177: }
179: #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_HAVE_ESSL)
180: PetscErrorCode KSPComputeRitz_GMRES(KSP ksp,PetscBool ritz,PetscBool small,PetscInt *nrit,Vec S[],PetscReal *tetar,PetscReal *tetai)
181: {
182: KSP_GMRES *gmres = (KSP_GMRES*)ksp->data;
184: PetscInt n = gmres->it + 1,N = gmres->max_k + 1,NbrRitz,nb=0;
185: PetscInt i,j,*perm;
186: PetscReal *H,*Q,*Ht; /* H Hessenberg Matrix and Q matrix of eigenvectors of H*/
187: PetscReal *wr,*wi,*modul; /* Real and imaginary part and modul of the Ritz values*/
188: PetscReal *SR,*work;
189: PetscBLASInt bn,bN,lwork,idummy;
190: PetscScalar *t,sdummy = 0;
193: /* n: size of the Hessenberg matrix */
194: if (gmres->fullcycle) n = N-1;
195: /* NbrRitz: number of (harmonic) Ritz pairs to extract */
196: NbrRitz = PetscMin(*nrit,n);
198: /* Definition of PetscBLASInt for lapack routines*/
199: PetscBLASIntCast(n,&bn);
200: PetscBLASIntCast(N,&bN);
201: PetscBLASIntCast(N,&idummy);
202: PetscBLASIntCast(5*N,&lwork);
203: /* Memory allocation */
204: PetscMalloc1(bN*bN,&H);
205: PetscMalloc1(bn*bn,&Q);
206: PetscMalloc1(lwork,&work);
207: PetscMalloc1(n,&wr);
208: PetscMalloc1(n,&wi);
210: /* copy H matrix to work space */
211: if (gmres->fullcycle) {
212: PetscArraycpy(H,gmres->hes_ritz,bN*bN);
213: } else {
214: PetscArraycpy(H,gmres->hes_origin,bN*bN);
215: }
217: /* Modify H to compute Harmonic Ritz pairs H = H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
218: if (!ritz) {
219: /* Transpose the Hessenberg matrix => Ht */
220: PetscMalloc1(bn*bn,&Ht);
221: for (i=0; i<bn; i++) {
222: for (j=0; j<bn; j++) {
223: Ht[i*bn+j] = H[j*bN+i];
224: }
225: }
226: /* Solve the system H^T*t = h^2_{m+1,m}e_m */
227: PetscCalloc1(bn,&t);
228: /* t = h^2_{m+1,m}e_m */
229: if (gmres->fullcycle) {
230: t[bn-1] = PetscSqr(gmres->hes_ritz[(bn-1)*bN+bn]);
231: } else {
232: t[bn-1] = PetscSqr(gmres->hes_origin[(bn-1)*bN+bn]);
233: }
234: /* Call the LAPACK routine dgesv to compute t = H^{-T}*t */
235: {
236: PetscBLASInt info;
237: PetscBLASInt nrhs = 1;
238: PetscBLASInt *ipiv;
239: PetscMalloc1(bn,&ipiv);
240: PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&bn,&nrhs,Ht,&bn,ipiv,t,&bn,&info));
241: if (info) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_PLIB,"Error while calling the Lapack routine DGESV");
242: PetscFree(ipiv);
243: PetscFree(Ht);
244: }
245: /* Now form H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
246: for (i=0; i<bn; i++) H[(bn-1)*bn+i] += t[i];
247: PetscFree(t);
248: }
250: /* Compute (harmonic) Ritz pairs */
251: {
252: PetscBLASInt info;
253: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
254: PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","V",&bn,H,&bN,wr,wi,&sdummy,&idummy,Q,&bn,work,&lwork,&info));
255: if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine");
256: }
257: /* sort the (harmonic) Ritz values */
258: PetscMalloc1(n,&modul);
259: PetscMalloc1(n,&perm);
260: for (i=0; i<n; i++) modul[i] = PetscSqrtReal(wr[i]*wr[i]+wi[i]*wi[i]);
261: for (i=0; i<n; i++) perm[i] = i;
262: PetscSortRealWithPermutation(n,modul,perm);
263: /* count the number of extracted Ritz or Harmonic Ritz pairs (with complex conjugates) */
264: if (small) {
265: while (nb < NbrRitz) {
266: if (!wi[perm[nb]]) nb += 1;
267: else nb += 2;
268: }
269: PetscMalloc1(nb*n,&SR);
270: for (i=0; i<nb; i++) {
271: tetar[i] = wr[perm[i]];
272: tetai[i] = wi[perm[i]];
273: PetscArraycpy(&SR[i*n],&(Q[perm[i]*bn]),n);
274: }
275: } else {
276: while (nb < NbrRitz) {
277: if (wi[perm[n-nb-1]] == 0) nb += 1;
278: else nb += 2;
279: }
280: PetscMalloc1(nb*n,&SR);
281: for (i=0; i<nb; i++) {
282: tetar[i] = wr[perm[n-nb+i]];
283: tetai[i] = wi[perm[n-nb+i]];
284: PetscArraycpy(&SR[i*n], &(Q[perm[n-nb+i]*bn]), n);
285: }
286: }
287: PetscFree(modul);
288: PetscFree(perm);
290: /* Form the Ritz or Harmonic Ritz vectors S=VV*Sr,
291: where the columns of VV correspond to the basis of the Krylov subspace */
292: if (gmres->fullcycle) {
293: for (j=0; j<nb; j++) {
294: VecZeroEntries(S[j]);
295: VecMAXPY(S[j],n,&SR[j*n],gmres->vecb);
296: }
297: } else {
298: for (j=0; j<nb; j++) {
299: VecZeroEntries(S[j]);
300: VecMAXPY(S[j],n,&SR[j*n],&VEC_VV(0));
301: }
302: }
303: *nrit = nb;
304: PetscFree(H);
305: PetscFree(Q);
306: PetscFree(SR);
307: PetscFree(wr);
308: PetscFree(wi);
309: return(0);
310: }
311: #endif