Actual source code: gmreig.c

petsc-3.14.6 2021-03-30
Report Typos and Errors

  2: #include <../src/ksp/ksp/impls/gmres/gmresimpl.h>
  3: #include <petscblaslapack.h>

  5: PetscErrorCode KSPComputeExtremeSingularValues_GMRES(KSP ksp,PetscReal *emax,PetscReal *emin)
  6: {
  7:   KSP_GMRES      *gmres = (KSP_GMRES*)ksp->data;
  9:   PetscInt       n = gmres->it + 1,i,N = gmres->max_k + 2;
 10:   PetscBLASInt   bn, bN,lwork, idummy,lierr;
 11:   PetscScalar    *R        = gmres->Rsvd,*work = R + N*N,sdummy = 0;
 12:   PetscReal      *realpart = gmres->Dsvd;

 15:   PetscBLASIntCast(n,&bn);
 16:   PetscBLASIntCast(N,&bN);
 17:   PetscBLASIntCast(5*N,&lwork);
 18:   PetscBLASIntCast(N,&idummy);
 19:   if (n <= 0) {
 20:     *emax = *emin = 1.0;
 21:     return(0);
 22:   }
 23:   /* copy R matrix to work space */
 24:   PetscArraycpy(R,gmres->hh_origin,(gmres->max_k+2)*(gmres->max_k+1));

 26:   /* zero below diagonal garbage */
 27:   for (i=0; i<n; i++) R[i*N+i+1] = 0.0;

 29:   /* compute Singular Values */
 30:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
 31: #if !defined(PETSC_USE_COMPLEX)
 32:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","N",&bn,&bn,R,&bN,realpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,&lierr));
 33: #else
 34:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","N",&bn,&bn,R,&bN,realpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,realpart+N,&lierr));
 35: #endif
 36:   if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SVD Lapack routine %d",(int)lierr);
 37:   PetscFPTrapPop();

 39:   *emin = realpart[n-1];
 40:   *emax = realpart[0];
 41:   return(0);
 42: }

 44: /* ------------------------------------------------------------------------ */
 45: /* ESSL has a different calling sequence for dgeev() and zgeev() than standard LAPACK */
 46: PetscErrorCode KSPComputeEigenvalues_GMRES(KSP ksp,PetscInt nmax,PetscReal *r,PetscReal *c,PetscInt *neig)
 47: {
 48: #if defined(PETSC_HAVE_ESSL)
 49:   KSP_GMRES      *gmres = (KSP_GMRES*)ksp->data;
 51:   PetscInt       n = gmres->it + 1,N = gmres->max_k + 1;
 52:   PetscInt       i,*perm;
 53:   PetscScalar    *R     = gmres->Rsvd;
 54:   PetscScalar    *cwork = R + N*N,sdummy = 0;
 55:   PetscReal      *work,*realpart = gmres->Dsvd;
 56:   PetscBLASInt   zero = 0,bn,bN,idummy = -1,lwork;

 59:   PetscBLASIntCast(n,&bn);
 60:   PetscBLASIntCast(N,&bN);
 61:   PetscBLASIntCast(5*N,&lwork);
 62:   if (nmax < n) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_SIZ,"Not enough room in work space r and c for eigenvalues");
 63:   *neig = n;

 65:   if (!n) return(0);

 67:   /* copy R matrix to work space */
 68:   PetscArraycpy(R,gmres->hes_origin,N*N);

 70:   /* compute eigenvalues */

 72:   /* for ESSL version need really cwork of length N (complex), 2N
 73:      (real); already at least 5N of space has been allocated */

 75:   PetscMalloc1(lwork,&work);
 76:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
 77:   PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_(&zero,R,&bN,cwork,&sdummy,&idummy,&idummy,&bn,work,&lwork));
 78:   PetscFPTrapPop();
 79:   PetscFree(work);

 81:   /* For now we stick with the convention of storing the real and imaginary
 82:      components of evalues separately.  But is this what we really want? */
 83:   PetscMalloc1(n,&perm);

 85: #if !defined(PETSC_USE_COMPLEX)
 86:   for (i=0; i<n; i++) {
 87:     realpart[i] = cwork[2*i];
 88:     perm[i]     = i;
 89:   }
 90:   PetscSortRealWithPermutation(n,realpart,perm);
 91:   for (i=0; i<n; i++) {
 92:     r[i] = cwork[2*perm[i]];
 93:     c[i] = cwork[2*perm[i]+1];
 94:   }
 95: #else
 96:   for (i=0; i<n; i++) {
 97:     realpart[i] = PetscRealPart(cwork[i]);
 98:     perm[i]     = i;
 99:   }
100:   PetscSortRealWithPermutation(n,realpart,perm);
101:   for (i=0; i<n; i++) {
102:     r[i] = PetscRealPart(cwork[perm[i]]);
103:     c[i] = PetscImaginaryPart(cwork[perm[i]]);
104:   }
105: #endif
106:   PetscFree(perm);
107: #elif !defined(PETSC_USE_COMPLEX)
108:   KSP_GMRES      *gmres = (KSP_GMRES*)ksp->data;
110:   PetscInt       n = gmres->it + 1,N = gmres->max_k + 1,i,*perm;
111:   PetscBLASInt   bn, bN, lwork, idummy, l-1;
112:   PetscScalar    *R        = gmres->Rsvd,*work = R + N*N;
113:   PetscScalar    *realpart = gmres->Dsvd,*imagpart = realpart + N,sdummy = 0;

116:   PetscBLASIntCast(n,&bn);
117:   PetscBLASIntCast(N,&bN);
118:   PetscBLASIntCast(5*N,&lwork);
119:   PetscBLASIntCast(N,&idummy);
120:   if (nmax < n) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_SIZ,"Not enough room in work space r and c for eigenvalues");
121:   *neig = n;

123:   if (!n) return(0);

125:   /* copy R matrix to work space */
126:   PetscArraycpy(R,gmres->hes_origin,N*N);

128:   /* compute eigenvalues */
129:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
130:   PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","N",&bn,R,&bN,realpart,imagpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,&lierr));
131:   if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine %d",(int)lierr);
132:   PetscFPTrapPop();
133:   PetscMalloc1(n,&perm);
134:   for (i=0; i<n; i++) perm[i] = i;
135:   PetscSortRealWithPermutation(n,realpart,perm);
136:   for (i=0; i<n; i++) {
137:     r[i] = realpart[perm[i]];
138:     c[i] = imagpart[perm[i]];
139:   }
140:   PetscFree(perm);
141: #else
142:   KSP_GMRES      *gmres = (KSP_GMRES*)ksp->data;
144:   PetscInt       n  = gmres->it + 1,N = gmres->max_k + 1,i,*perm;
145:   PetscScalar    *R = gmres->Rsvd,*work = R + N*N,*eigs = work + 5*N,sdummy;
146:   PetscBLASInt   bn,bN,lwork,idummy,l-1;

149:   PetscBLASIntCast(n,&bn);
150:   PetscBLASIntCast(N,&bN);
151:   PetscBLASIntCast(5*N,&lwork);
152:   PetscBLASIntCast(N,&idummy);
153:   if (nmax < n) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_SIZ,"Not enough room in work space r and c for eigenvalues");
154:   *neig = n;

156:   if (!n) return(0);

158:   /* copy R matrix to work space */
159:   PetscArraycpy(R,gmres->hes_origin,N*N);

161:   /* compute eigenvalues */
162:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
163:   PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","N",&bn,R,&bN,eigs,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,gmres->Dsvd,&lierr));
164:   if (lierr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine");
165:   PetscFPTrapPop();
166:   PetscMalloc1(n,&perm);
167:   for (i=0; i<n; i++) perm[i] = i;
168:   for (i=0; i<n; i++) r[i] = PetscRealPart(eigs[i]);
169:   PetscSortRealWithPermutation(n,r,perm);
170:   for (i=0; i<n; i++) {
171:     r[i] = PetscRealPart(eigs[perm[i]]);
172:     c[i] = PetscImaginaryPart(eigs[perm[i]]);
173:   }
174:   PetscFree(perm);
175: #endif
176:   return(0);
177: }

179: #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_HAVE_ESSL)
180: PetscErrorCode KSPComputeRitz_GMRES(KSP ksp,PetscBool ritz,PetscBool small,PetscInt *nrit,Vec S[],PetscReal *tetar,PetscReal *tetai)
181: {
182:   KSP_GMRES      *gmres = (KSP_GMRES*)ksp->data;
184:   PetscInt       n = gmres->it + 1,N = gmres->max_k + 1,NbrRitz,nb=0;
185:   PetscInt       i,j,*perm;
186:   PetscReal      *H,*Q,*Ht;              /* H Hessenberg Matrix and Q matrix of eigenvectors of H*/
187:   PetscReal      *wr,*wi,*modul;       /* Real and imaginary part and modul of the Ritz values*/
188:   PetscReal      *SR,*work;
189:   PetscBLASInt   bn,bN,lwork,idummy;
190:   PetscScalar    *t,sdummy = 0;

193:   /* n: size of the Hessenberg matrix */
194:   if (gmres->fullcycle) n = N-1;
195:   /* NbrRitz: number of (harmonic) Ritz pairs to extract */
196:   NbrRitz = PetscMin(*nrit,n);

198:   /* Definition of PetscBLASInt for lapack routines*/
199:   PetscBLASIntCast(n,&bn);
200:   PetscBLASIntCast(N,&bN);
201:   PetscBLASIntCast(N,&idummy);
202:   PetscBLASIntCast(5*N,&lwork);
203:   /* Memory allocation */
204:   PetscMalloc1(bN*bN,&H);
205:   PetscMalloc1(bn*bn,&Q);
206:   PetscMalloc1(lwork,&work);
207:   PetscMalloc1(n,&wr);
208:   PetscMalloc1(n,&wi);

210:   /* copy H matrix to work space */
211:   if (gmres->fullcycle) {
212:     PetscArraycpy(H,gmres->hes_ritz,bN*bN);
213:   } else {
214:     PetscArraycpy(H,gmres->hes_origin,bN*bN);
215:   }

217:   /* Modify H to compute Harmonic Ritz pairs H = H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
218:   if (!ritz) {
219:     /* Transpose the Hessenberg matrix => Ht */
220:     PetscMalloc1(bn*bn,&Ht);
221:     for (i=0; i<bn; i++) {
222:       for (j=0; j<bn; j++) {
223:         Ht[i*bn+j] = H[j*bN+i];
224:       }
225:     }
226:     /* Solve the system H^T*t = h^2_{m+1,m}e_m */
227:     PetscCalloc1(bn,&t);
228:     /* t = h^2_{m+1,m}e_m */
229:     if (gmres->fullcycle) {
230:       t[bn-1] = PetscSqr(gmres->hes_ritz[(bn-1)*bN+bn]);
231:     } else {
232:       t[bn-1] = PetscSqr(gmres->hes_origin[(bn-1)*bN+bn]);
233:     }
234:     /* Call the LAPACK routine dgesv to compute t = H^{-T}*t */
235:     {
236:       PetscBLASInt info;
237:       PetscBLASInt nrhs = 1;
238:       PetscBLASInt *ipiv;
239:       PetscMalloc1(bn,&ipiv);
240:       PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&bn,&nrhs,Ht,&bn,ipiv,t,&bn,&info));
241:       if (info) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_PLIB,"Error while calling the Lapack routine DGESV");
242:       PetscFree(ipiv);
243:       PetscFree(Ht);
244:     }
245:     /* Now form H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
246:     for (i=0; i<bn; i++) H[(bn-1)*bn+i] += t[i];
247:     PetscFree(t);
248:   }

250:   /* Compute (harmonic) Ritz pairs */
251:   {
252:     PetscBLASInt info;
253:     PetscFPTrapPush(PETSC_FP_TRAP_OFF);
254:     PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","V",&bn,H,&bN,wr,wi,&sdummy,&idummy,Q,&bn,work,&lwork,&info));
255:     if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine");
256:   }
257:   /* sort the (harmonic) Ritz values */
258:   PetscMalloc1(n,&modul);
259:   PetscMalloc1(n,&perm);
260:   for (i=0; i<n; i++) modul[i] = PetscSqrtReal(wr[i]*wr[i]+wi[i]*wi[i]);
261:   for (i=0; i<n; i++) perm[i] = i;
262:   PetscSortRealWithPermutation(n,modul,perm);
263:   /* count the number of extracted Ritz or Harmonic Ritz pairs (with complex conjugates) */
264:   if (small) {
265:     while (nb < NbrRitz) {
266:       if (!wi[perm[nb]]) nb += 1;
267:       else nb += 2;
268:     }
269:     PetscMalloc1(nb*n,&SR);
270:     for (i=0; i<nb; i++) {
271:       tetar[i] = wr[perm[i]];
272:       tetai[i] = wi[perm[i]];
273:       PetscArraycpy(&SR[i*n],&(Q[perm[i]*bn]),n);
274:     }
275:   } else {
276:     while (nb < NbrRitz) {
277:       if (wi[perm[n-nb-1]] == 0) nb += 1;
278:       else nb += 2;
279:     }
280:     PetscMalloc1(nb*n,&SR);
281:     for (i=0; i<nb; i++) {
282:       tetar[i] = wr[perm[n-nb+i]];
283:       tetai[i] = wi[perm[n-nb+i]];
284:       PetscArraycpy(&SR[i*n], &(Q[perm[n-nb+i]*bn]), n);
285:     }
286:   }
287:   PetscFree(modul);
288:   PetscFree(perm);

290:   /* Form the Ritz or Harmonic Ritz vectors S=VV*Sr,
291:     where the columns of VV correspond to the basis of the Krylov subspace */
292:   if (gmres->fullcycle) {
293:     for (j=0; j<nb; j++) {
294:       VecZeroEntries(S[j]);
295:       VecMAXPY(S[j],n,&SR[j*n],gmres->vecb);
296:     }
297:   } else {
298:     for (j=0; j<nb; j++) {
299:       VecZeroEntries(S[j]);
300:       VecMAXPY(S[j],n,&SR[j*n],&VEC_VV(0));
301:     }
302:   }
303:   *nrit = nb;
304:   PetscFree(H);
305:   PetscFree(Q);
306:   PetscFree(SR);
307:   PetscFree(wr);
308:   PetscFree(wi);
309:   return(0);
310: }
311: #endif