Actual source code: gmreig.c
petsc-3.12.5 2020-03-29
2: #include <../src/ksp/ksp/impls/gmres/gmresimpl.h>
3: #include <petscblaslapack.h>
5: PetscErrorCode KSPComputeExtremeSingularValues_GMRES(KSP ksp,PetscReal *emax,PetscReal *emin)
6: {
7: #if defined(PETSC_MISSING_LAPACK_GESVD)
9: /*
10: The Cray math libraries on T3D/T3E, and early versions of Intel Math Kernel Libraries (MKL)
11: for PCs do not seem to have the DGESVD() lapack routines
12: */
13: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GESVD - Lapack routine is unavailable\nNot able to provide singular value estimates.");
14: #else
15: KSP_GMRES *gmres = (KSP_GMRES*)ksp->data;
17: PetscInt n = gmres->it + 1,i,N = gmres->max_k + 2;
18: PetscBLASInt bn, bN,lwork, idummy,lierr;
19: PetscScalar *R = gmres->Rsvd,*work = R + N*N,sdummy = 0;
20: PetscReal *realpart = gmres->Dsvd;
23: PetscBLASIntCast(n,&bn);
24: PetscBLASIntCast(N,&bN);
25: PetscBLASIntCast(5*N,&lwork);
26: PetscBLASIntCast(N,&idummy);
27: if (n <= 0) {
28: *emax = *emin = 1.0;
29: return(0);
30: }
31: /* copy R matrix to work space */
32: PetscArraycpy(R,gmres->hh_origin,(gmres->max_k+2)*(gmres->max_k+1));
34: /* zero below diagonal garbage */
35: for (i=0; i<n; i++) R[i*N+i+1] = 0.0;
37: /* compute Singular Values */
38: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
39: #if !defined(PETSC_USE_COMPLEX)
40: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","N",&bn,&bn,R,&bN,realpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,&lierr));
41: #else
42: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","N",&bn,&bn,R,&bN,realpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,realpart+N,&lierr));
43: #endif
44: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SVD Lapack routine %d",(int)lierr);
45: PetscFPTrapPop();
47: *emin = realpart[n-1];
48: *emax = realpart[0];
49: #endif
50: return(0);
51: }
53: /* ------------------------------------------------------------------------ */
54: /* ESSL has a different calling sequence for dgeev() and zgeev() than standard LAPACK */
55: PetscErrorCode KSPComputeEigenvalues_GMRES(KSP ksp,PetscInt nmax,PetscReal *r,PetscReal *c,PetscInt *neig)
56: {
57: #if defined(PETSC_HAVE_ESSL)
58: KSP_GMRES *gmres = (KSP_GMRES*)ksp->data;
60: PetscInt n = gmres->it + 1,N = gmres->max_k + 1;
61: PetscInt i,*perm;
62: PetscScalar *R = gmres->Rsvd;
63: PetscScalar *cwork = R + N*N,sdummy = 0;
64: PetscReal *work,*realpart = gmres->Dsvd;
65: PetscBLASInt zero = 0,bn,bN,idummy = -1,lwork;
68: PetscBLASIntCast(n,&bn);
69: PetscBLASIntCast(N,&bN);
70: PetscBLASIntCast(5*N,&lwork);
71: if (nmax < n) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_SIZ,"Not enough room in work space r and c for eigenvalues");
72: *neig = n;
74: if (!n) return(0);
76: /* copy R matrix to work space */
77: PetscArraycpy(R,gmres->hes_origin,N*N);
79: /* compute eigenvalues */
81: /* for ESSL version need really cwork of length N (complex), 2N
82: (real); already at least 5N of space has been allocated */
84: PetscMalloc1(lwork,&work);
85: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
86: PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_(&zero,R,&bN,cwork,&sdummy,&idummy,&idummy,&bn,work,&lwork));
87: PetscFPTrapPop();
88: PetscFree(work);
90: /* For now we stick with the convention of storing the real and imaginary
91: components of evalues separately. But is this what we really want? */
92: PetscMalloc1(n,&perm);
94: #if !defined(PETSC_USE_COMPLEX)
95: for (i=0; i<n; i++) {
96: realpart[i] = cwork[2*i];
97: perm[i] = i;
98: }
99: PetscSortRealWithPermutation(n,realpart,perm);
100: for (i=0; i<n; i++) {
101: r[i] = cwork[2*perm[i]];
102: c[i] = cwork[2*perm[i]+1];
103: }
104: #else
105: for (i=0; i<n; i++) {
106: realpart[i] = PetscRealPart(cwork[i]);
107: perm[i] = i;
108: }
109: PetscSortRealWithPermutation(n,realpart,perm);
110: for (i=0; i<n; i++) {
111: r[i] = PetscRealPart(cwork[perm[i]]);
112: c[i] = PetscImaginaryPart(cwork[perm[i]]);
113: }
114: #endif
115: PetscFree(perm);
116: #elif defined(PETSC_MISSING_LAPACK_GEEV)
118: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GEEV - Lapack routine is unavailable\nNot able to provide eigen values.");
119: #elif !defined(PETSC_USE_COMPLEX)
120: KSP_GMRES *gmres = (KSP_GMRES*)ksp->data;
122: PetscInt n = gmres->it + 1,N = gmres->max_k + 1,i,*perm;
123: PetscBLASInt bn, bN, lwork, idummy, l-1;
124: PetscScalar *R = gmres->Rsvd,*work = R + N*N;
125: PetscScalar *realpart = gmres->Dsvd,*imagpart = realpart + N,sdummy = 0;
128: PetscBLASIntCast(n,&bn);
129: PetscBLASIntCast(N,&bN);
130: PetscBLASIntCast(5*N,&lwork);
131: PetscBLASIntCast(N,&idummy);
132: if (nmax < n) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_SIZ,"Not enough room in work space r and c for eigenvalues");
133: *neig = n;
135: if (!n) return(0);
137: /* copy R matrix to work space */
138: PetscArraycpy(R,gmres->hes_origin,N*N);
140: /* compute eigenvalues */
141: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
142: PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","N",&bn,R,&bN,realpart,imagpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,&lierr));
143: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine %d",(int)lierr);
144: PetscFPTrapPop();
145: PetscMalloc1(n,&perm);
146: for (i=0; i<n; i++) perm[i] = i;
147: PetscSortRealWithPermutation(n,realpart,perm);
148: for (i=0; i<n; i++) {
149: r[i] = realpart[perm[i]];
150: c[i] = imagpart[perm[i]];
151: }
152: PetscFree(perm);
153: #else
154: KSP_GMRES *gmres = (KSP_GMRES*)ksp->data;
156: PetscInt n = gmres->it + 1,N = gmres->max_k + 1,i,*perm;
157: PetscScalar *R = gmres->Rsvd,*work = R + N*N,*eigs = work + 5*N,sdummy;
158: PetscBLASInt bn,bN,lwork,idummy,l-1;
161: PetscBLASIntCast(n,&bn);
162: PetscBLASIntCast(N,&bN);
163: PetscBLASIntCast(5*N,&lwork);
164: PetscBLASIntCast(N,&idummy);
165: if (nmax < n) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_SIZ,"Not enough room in work space r and c for eigenvalues");
166: *neig = n;
168: if (!n) return(0);
170: /* copy R matrix to work space */
171: PetscArraycpy(R,gmres->hes_origin,N*N);
173: /* compute eigenvalues */
174: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
175: PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","N",&bn,R,&bN,eigs,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,gmres->Dsvd,&lierr));
176: if (lierr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine");
177: PetscFPTrapPop();
178: PetscMalloc1(n,&perm);
179: for (i=0; i<n; i++) perm[i] = i;
180: for (i=0; i<n; i++) r[i] = PetscRealPart(eigs[i]);
181: PetscSortRealWithPermutation(n,r,perm);
182: for (i=0; i<n; i++) {
183: r[i] = PetscRealPart(eigs[perm[i]]);
184: c[i] = PetscImaginaryPart(eigs[perm[i]]);
185: }
186: PetscFree(perm);
187: #endif
188: return(0);
189: }
191: #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_HAVE_ESSL)
192: PetscErrorCode KSPComputeRitz_GMRES(KSP ksp,PetscBool ritz,PetscBool small,PetscInt *nrit,Vec S[],PetscReal *tetar,PetscReal *tetai)
193: {
194: KSP_GMRES *gmres = (KSP_GMRES*)ksp->data;
196: PetscInt n = gmres->it + 1,N = gmres->max_k + 1,NbrRitz,nb=0;
197: PetscInt i,j,*perm;
198: PetscReal *H,*Q,*Ht; /* H Hessenberg Matrix and Q matrix of eigenvectors of H*/
199: PetscReal *wr,*wi,*modul; /* Real and imaginary part and modul of the Ritz values*/
200: PetscReal *SR,*work;
201: PetscBLASInt bn,bN,lwork,idummy;
202: PetscScalar *t,sdummy = 0;
205: /* n: size of the Hessenberg matrix */
206: if (gmres->fullcycle) n = N-1;
207: /* NbrRitz: number of (harmonic) Ritz pairs to extract */
208: NbrRitz = PetscMin(*nrit,n);
210: /* Definition of PetscBLASInt for lapack routines*/
211: PetscBLASIntCast(n,&bn);
212: PetscBLASIntCast(N,&bN);
213: PetscBLASIntCast(N,&idummy);
214: PetscBLASIntCast(5*N,&lwork);
215: /* Memory allocation */
216: PetscMalloc1(bN*bN,&H);
217: PetscMalloc1(bn*bn,&Q);
218: PetscMalloc1(lwork,&work);
219: PetscMalloc1(n,&wr);
220: PetscMalloc1(n,&wi);
222: /* copy H matrix to work space */
223: if (gmres->fullcycle) {
224: PetscArraycpy(H,gmres->hes_ritz,bN*bN);
225: } else {
226: PetscArraycpy(H,gmres->hes_origin,bN*bN);
227: }
229: /* Modify H to compute Harmonic Ritz pairs H = H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
230: if (!ritz) {
231: /* Transpose the Hessenberg matrix => Ht */
232: PetscMalloc1(bn*bn,&Ht);
233: for (i=0; i<bn; i++) {
234: for (j=0; j<bn; j++) {
235: Ht[i*bn+j] = H[j*bN+i];
236: }
237: }
238: /* Solve the system H^T*t = h^2_{m+1,m}e_m */
239: PetscCalloc1(bn,&t);
240: /* t = h^2_{m+1,m}e_m */
241: if (gmres->fullcycle) {
242: t[bn-1] = PetscSqr(gmres->hes_ritz[(bn-1)*bN+bn]);
243: } else {
244: t[bn-1] = PetscSqr(gmres->hes_origin[(bn-1)*bN+bn]);
245: }
246: /* Call the LAPACK routine dgesv to compute t = H^{-T}*t */
247: #if defined(PETSC_MISSING_LAPACK_GESV)
248: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GESV - Lapack routine is unavailable.");
249: #else
250: {
251: PetscBLASInt info;
252: PetscBLASInt nrhs = 1;
253: PetscBLASInt *ipiv;
254: PetscMalloc1(bn,&ipiv);
255: PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&bn,&nrhs,Ht,&bn,ipiv,t,&bn,&info));
256: if (info) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_PLIB,"Error while calling the Lapack routine DGESV");
257: PetscFree(ipiv);
258: PetscFree(Ht);
259: }
260: #endif
261: /* Now form H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
262: for (i=0; i<bn; i++) H[(bn-1)*bn+i] += t[i];
263: PetscFree(t);
264: }
266: /* Compute (harmonic) Ritz pairs */
267: #if defined(PETSC_MISSING_LAPACK_HSEQR)
268: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GEEV - Lapack routine is unavailable\nNot able to provide eigen values.");
269: #else
270: {
271: PetscBLASInt info;
272: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
273: PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","V",&bn,H,&bN,wr,wi,&sdummy,&idummy,Q,&bn,work,&lwork,&info));
274: if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine");
275: }
276: #endif
277: /* sort the (harmonic) Ritz values */
278: PetscMalloc1(n,&modul);
279: PetscMalloc1(n,&perm);
280: for (i=0; i<n; i++) modul[i] = PetscSqrtReal(wr[i]*wr[i]+wi[i]*wi[i]);
281: for (i=0; i<n; i++) perm[i] = i;
282: PetscSortRealWithPermutation(n,modul,perm);
283: /* count the number of extracted Ritz or Harmonic Ritz pairs (with complex conjugates) */
284: if (small) {
285: while (nb < NbrRitz) {
286: if (!wi[perm[nb]]) nb += 1;
287: else nb += 2;
288: }
289: PetscMalloc1(nb*n,&SR);
290: for (i=0; i<nb; i++) {
291: tetar[i] = wr[perm[i]];
292: tetai[i] = wi[perm[i]];
293: PetscArraycpy(&SR[i*n],&(Q[perm[i]*bn]),n);
294: }
295: } else {
296: while (nb < NbrRitz) {
297: if (wi[perm[n-nb-1]] == 0) nb += 1;
298: else nb += 2;
299: }
300: PetscMalloc1(nb*n,&SR);
301: for (i=0; i<nb; i++) {
302: tetar[i] = wr[perm[n-nb+i]];
303: tetai[i] = wi[perm[n-nb+i]];
304: PetscArraycpy(&SR[i*n], &(Q[perm[n-nb+i]*bn]), n);
305: }
306: }
307: PetscFree(modul);
308: PetscFree(perm);
310: /* Form the Ritz or Harmonic Ritz vectors S=VV*Sr,
311: where the columns of VV correspond to the basis of the Krylov subspace */
312: if (gmres->fullcycle) {
313: for (j=0; j<nb; j++) {
314: VecZeroEntries(S[j]);
315: VecMAXPY(S[j],n,&SR[j*n],gmres->vecb);
316: }
317: } else {
318: for (j=0; j<nb; j++) {
319: VecZeroEntries(S[j]);
320: VecMAXPY(S[j],n,&SR[j*n],&VEC_VV(0));
321: }
322: }
323: *nrit = nb;
324: PetscFree(H);
325: PetscFree(Q);
326: PetscFree(SR);
327: PetscFree(wr);
328: PetscFree(wi);
329: return(0);
330: }
331: #endif