Actual source code: gmreig.c

petsc-3.9.4 2018-09-11
Report Typos and Errors

  2:  #include <../src/ksp/ksp/impls/gmres/gmresimpl.h>
  3:  #include <petscblaslapack.h>

  5: PetscErrorCode KSPComputeExtremeSingularValues_GMRES(KSP ksp,PetscReal *emax,PetscReal *emin)
  6: {
  7: #if defined(PETSC_MISSING_LAPACK_GESVD)
  9:   /*
 10:       The Cray math libraries on T3D/T3E, and early versions of Intel Math Kernel Libraries (MKL)
 11:       for PCs do not seem to have the DGESVD() lapack routines
 12:   */
 13:   SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GESVD - Lapack routine is unavailable\nNot able to provide singular value estimates.");
 14: #else
 15:   KSP_GMRES      *gmres = (KSP_GMRES*)ksp->data;
 17:   PetscInt       n = gmres->it + 1,i,N = gmres->max_k + 2;
 18:   PetscBLASInt   bn, bN,lwork, idummy,lierr;
 19:   PetscScalar    *R        = gmres->Rsvd,*work = R + N*N,sdummy;
 20:   PetscReal      *realpart = gmres->Dsvd;

 23:   PetscBLASIntCast(n,&bn);
 24:   PetscBLASIntCast(N,&bN);
 25:   PetscBLASIntCast(5*N,&lwork);
 26:   PetscBLASIntCast(N,&idummy);
 27:   if (n <= 0) {
 28:     *emax = *emin = 1.0;
 29:     return(0);
 30:   }
 31:   /* copy R matrix to work space */
 32:   PetscMemcpy(R,gmres->hh_origin,(gmres->max_k+2)*(gmres->max_k+1)*sizeof(PetscScalar));

 34:   /* zero below diagonal garbage */
 35:   for (i=0; i<n; i++) R[i*N+i+1] = 0.0;

 37:   /* compute Singular Values */
 38:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
 39: #if !defined(PETSC_USE_COMPLEX)
 40:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","N",&bn,&bn,R,&bN,realpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,&lierr));
 41: #else
 42:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","N",&bn,&bn,R,&bN,realpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,realpart+N,&lierr));
 43: #endif
 44:   if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SVD Lapack routine %d",(int)lierr);
 45:   PetscFPTrapPop();

 47:   *emin = realpart[n-1];
 48:   *emax = realpart[0];
 49: #endif
 50:   return(0);
 51: }

 53: /* ------------------------------------------------------------------------ */
 54: /* ESSL has a different calling sequence for dgeev() and zgeev() than standard LAPACK */
 55: PetscErrorCode KSPComputeEigenvalues_GMRES(KSP ksp,PetscInt nmax,PetscReal *r,PetscReal *c,PetscInt *neig)
 56: {
 57: #if defined(PETSC_HAVE_ESSL)
 58:   KSP_GMRES      *gmres = (KSP_GMRES*)ksp->data;
 60:   PetscInt       n = gmres->it + 1,N = gmres->max_k + 1;
 61:   PetscInt       i,*perm;
 62:   PetscScalar    *R     = gmres->Rsvd;
 63:   PetscScalar    *cwork = R + N*N,sdummy;
 64:   PetscReal      *work,*realpart = gmres->Dsvd;
 65:   PetscBLASInt   zero = 0,bn,bN,idummy,lwork;

 68:   PetscBLASIntCast(n,&bn);
 69:   PetscBLASIntCast(N,&bN);
 70:   idummy = -1;                  /* unused */
 71:   PetscBLASIntCast(5*N,&lwork);
 72:   if (nmax < n) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_SIZ,"Not enough room in work space r and c for eigenvalues");
 73:   *neig = n;

 75:   if (!n) return(0);

 77:   /* copy R matrix to work space */
 78:   PetscMemcpy(R,gmres->hes_origin,N*N*sizeof(PetscScalar));

 80:   /* compute eigenvalues */

 82:   /* for ESSL version need really cwork of length N (complex), 2N
 83:      (real); already at least 5N of space has been allocated */

 85:   PetscMalloc1(lwork,&work);
 86:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
 87:   PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_(&zero,R,&bN,cwork,&sdummy,&idummy,&idummy,&bn,work,&lwork));
 88:   PetscFPTrapPop();
 89:   PetscFree(work);

 91:   /* For now we stick with the convention of storing the real and imaginary
 92:      components of evalues separately.  But is this what we really want? */
 93:   PetscMalloc1(n,&perm);

 95: #if !defined(PETSC_USE_COMPLEX)
 96:   for (i=0; i<n; i++) {
 97:     realpart[i] = cwork[2*i];
 98:     perm[i]     = i;
 99:   }
100:   PetscSortRealWithPermutation(n,realpart,perm);
101:   for (i=0; i<n; i++) {
102:     r[i] = cwork[2*perm[i]];
103:     c[i] = cwork[2*perm[i]+1];
104:   }
105: #else
106:   for (i=0; i<n; i++) {
107:     realpart[i] = PetscRealPart(cwork[i]);
108:     perm[i]     = i;
109:   }
110:   PetscSortRealWithPermutation(n,realpart,perm);
111:   for (i=0; i<n; i++) {
112:     r[i] = PetscRealPart(cwork[perm[i]]);
113:     c[i] = PetscImaginaryPart(cwork[perm[i]]);
114:   }
115: #endif
116:   PetscFree(perm);
117: #elif defined(PETSC_MISSING_LAPACK_GEEV)
119:   SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GEEV - Lapack routine is unavailable\nNot able to provide eigen values.");
120: #elif !defined(PETSC_USE_COMPLEX)
121:   KSP_GMRES      *gmres = (KSP_GMRES*)ksp->data;
123:   PetscInt       n = gmres->it + 1,N = gmres->max_k + 1,i,*perm;
124:   PetscBLASInt   bn, bN, lwork, idummy, lierr;
125:   PetscScalar    *R        = gmres->Rsvd,*work = R + N*N;
126:   PetscScalar    *realpart = gmres->Dsvd,*imagpart = realpart + N,sdummy;

129:   PetscBLASIntCast(n,&bn);
130:   PetscBLASIntCast(N,&bN);
131:   PetscBLASIntCast(5*N,&lwork);
132:   PetscBLASIntCast(N,&idummy);
133:   if (nmax < n) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_SIZ,"Not enough room in work space r and c for eigenvalues");
134:   *neig = n;

136:   if (!n) return(0);

138:   /* copy R matrix to work space */
139:   PetscMemcpy(R,gmres->hes_origin,N*N*sizeof(PetscScalar));

141:   /* compute eigenvalues */
142:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
143:   PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","N",&bn,R,&bN,realpart,imagpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,&lierr));
144:   if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine %d",(int)lierr);
145:   PetscFPTrapPop();
146:   PetscMalloc1(n,&perm);
147:   for (i=0; i<n; i++) perm[i] = i;
148:   PetscSortRealWithPermutation(n,realpart,perm);
149:   for (i=0; i<n; i++) {
150:     r[i] = realpart[perm[i]];
151:     c[i] = imagpart[perm[i]];
152:   }
153:   PetscFree(perm);
154: #else
155:   KSP_GMRES      *gmres = (KSP_GMRES*)ksp->data;
157:   PetscInt       n  = gmres->it + 1,N = gmres->max_k + 1,i,*perm;
158:   PetscScalar    *R = gmres->Rsvd,*work = R + N*N,*eigs = work + 5*N,sdummy;
159:   PetscBLASInt   bn,bN,lwork,idummy,lierr;

162:   PetscBLASIntCast(n,&bn);
163:   PetscBLASIntCast(N,&bN);
164:   PetscBLASIntCast(5*N,&lwork);
165:   PetscBLASIntCast(N,&idummy);
166:   if (nmax < n) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_SIZ,"Not enough room in work space r and c for eigenvalues");
167:   *neig = n;

169:   if (!n) return(0);

171:   /* copy R matrix to work space */
172:   PetscMemcpy(R,gmres->hes_origin,N*N*sizeof(PetscScalar));

174:   /* compute eigenvalues */
175:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
176:   PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","N",&bn,R,&bN,eigs,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,gmres->Dsvd,&lierr));
177:   if (lierr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine");
178:   PetscFPTrapPop();
179:   PetscMalloc1(n,&perm);
180:   for (i=0; i<n; i++) perm[i] = i;
181:   for (i=0; i<n; i++) r[i] = PetscRealPart(eigs[i]);
182:   PetscSortRealWithPermutation(n,r,perm);
183:   for (i=0; i<n; i++) {
184:     r[i] = PetscRealPart(eigs[perm[i]]);
185:     c[i] = PetscImaginaryPart(eigs[perm[i]]);
186:   }
187:   PetscFree(perm);
188: #endif
189:   return(0);
190: }

192: #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_HAVE_ESSL)
193: PetscErrorCode KSPComputeRitz_GMRES(KSP ksp,PetscBool ritz,PetscBool small,PetscInt *nrit,Vec S[],PetscReal *tetar,PetscReal *tetai)
194: {
195:   KSP_GMRES      *gmres = (KSP_GMRES*)ksp->data;
197:   PetscInt       n = gmres->it + 1,N = gmres->max_k + 1,NbrRitz,nb=0;
198:   PetscInt       i,j,*perm;
199:   PetscReal      *H,*Q,*Ht;              /* H Hessenberg Matrix and Q matrix of eigenvectors of H*/
200:   PetscReal      *wr,*wi,*modul;       /* Real and imaginary part and modul of the Ritz values*/
201:   PetscReal      *SR,*work;
202:   PetscBLASInt   bn,bN,lwork,idummy;
203:   PetscScalar    *t,sdummy;

206:   /* n: size of the Hessenberg matrix */
207:   if (gmres->fullcycle) n = N-1;
208:   /* NbrRitz: number of (harmonic) Ritz pairs to extract */
209:   NbrRitz = PetscMin(*nrit,n);

211:   /* Definition of PetscBLASInt for lapack routines*/
212:   PetscBLASIntCast(n,&bn);
213:   PetscBLASIntCast(N,&bN);
214:   PetscBLASIntCast(N,&idummy);
215:   PetscBLASIntCast(5*N,&lwork);
216:   /* Memory allocation */
217:   PetscMalloc1(bN*bN,&H);
218:   PetscMalloc1(bn*bn,&Q);
219:   PetscMalloc1(lwork,&work);
220:   PetscMalloc1(n,&wr);
221:   PetscMalloc1(n,&wi);

223:   /* copy H matrix to work space */
224:   if (gmres->fullcycle) {
225:     PetscMemcpy(H,gmres->hes_ritz,bN*bN*sizeof(PetscReal));
226:   } else {
227:     PetscMemcpy(H,gmres->hes_origin,bN*bN*sizeof(PetscReal));
228:   }

230:   /* Modify H to compute Harmonic Ritz pairs H = H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
231:   if (!ritz) {
232:     /* Transpose the Hessenberg matrix => Ht */
233:     PetscMalloc1(bn*bn,&Ht);
234:     for (i=0; i<bn; i++) {
235:       for (j=0; j<bn; j++) {
236:         Ht[i*bn+j] = H[j*bN+i];
237:       }
238:     }
239:     /* Solve the system H^T*t = h^2_{m+1,m}e_m */
240:     PetscCalloc1(bn,&t);
241:     /* t = h^2_{m+1,m}e_m */
242:     if (gmres->fullcycle) {
243:       t[bn-1] = PetscSqr(gmres->hes_ritz[(bn-1)*bN+bn]);
244:     } else {
245:       t[bn-1] = PetscSqr(gmres->hes_origin[(bn-1)*bN+bn]);
246:     }
247:     /* Call the LAPACK routine dgesv to compute t = H^{-T}*t */
248: #if   defined(PETSC_MISSING_LAPACK_GESV)
249:     SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GESV - Lapack routine is unavailable.");
250: #else
251:     {
252:       PetscBLASInt info;
253:       PetscBLASInt nrhs = 1;
254:       PetscBLASInt *ipiv;
255:       PetscMalloc1(bn,&ipiv);
256:       PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&bn,&nrhs,Ht,&bn,ipiv,t,&bn,&info));
257:       if (info) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_PLIB,"Error while calling the Lapack routine DGESV");
258:       PetscFree(ipiv);
259:       PetscFree(Ht);
260:     }
261: #endif
262:     /* Now form H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
263:     for (i=0; i<bn; i++) H[(bn-1)*bn+i] += t[i];
264:     PetscFree(t);
265:   }

267:   /* Compute (harmonic) Ritz pairs */
268: #if defined(PETSC_MISSING_LAPACK_HSEQR)
269:   SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GEEV - Lapack routine is unavailable\nNot able to provide eigen values.");
270: #else
271:   {
272:     PetscBLASInt info;
273:     PetscFPTrapPush(PETSC_FP_TRAP_OFF);
274:     PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","V",&bn,H,&bN,wr,wi,&sdummy,&idummy,Q,&bn,work,&lwork,&info));
275:     if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine");
276:   }
277: #endif
278:   /* sort the (harmonic) Ritz values */
279:   PetscMalloc1(n,&modul);
280:   PetscMalloc1(n,&perm);
281:   for (i=0; i<n; i++) modul[i] = PetscSqrtReal(wr[i]*wr[i]+wi[i]*wi[i]);
282:   for (i=0; i<n; i++) perm[i] = i;
283:   PetscSortRealWithPermutation(n,modul,perm);
284:   /* count the number of extracted Ritz or Harmonic Ritz pairs (with complex conjugates) */
285:   if (small) {
286:     while (nb < NbrRitz) {
287:       if (!wi[perm[nb]]) nb += 1;
288:       else nb += 2;
289:     }
290:     PetscMalloc1(nb*n,&SR);
291:     for (i=0; i<nb; i++) {
292:       tetar[i] = wr[perm[i]];
293:       tetai[i] = wi[perm[i]];
294:       PetscMemcpy(&SR[i*n],&(Q[perm[i]*bn]),n*sizeof(PetscReal));
295:     }
296:   } else {
297:     while (nb < NbrRitz) {
298:       if (wi[perm[n-nb-1]] == 0) nb += 1;
299:       else nb += 2;
300:     }
301:     PetscMalloc1(nb*n,&SR);
302:     for (i=0; i<nb; i++) {
303:       tetar[i] = wr[perm[n-nb+i]];
304:       tetai[i] = wi[perm[n-nb+i]];
305:       PetscMemcpy(&SR[i*n], &(Q[perm[n-nb+i]*bn]), n*sizeof(PetscReal));
306:     }
307:   }
308:   PetscFree(modul);
309:   PetscFree(perm);

311:   /* Form the Ritz or Harmonic Ritz vectors S=VV*Sr, 
312:     where the columns of VV correspond to the basis of the Krylov subspace */
313:   if (gmres->fullcycle) {
314:     for (j=0; j<nb; j++) {
315:       VecZeroEntries(S[j]);
316:       VecMAXPY(S[j],n,&SR[j*n],gmres->vecb);
317:     }
318:   } else {
319:     for (j=0; j<nb; j++) {
320:       VecZeroEntries(S[j]);
321:       VecMAXPY(S[j],n,&SR[j*n],&VEC_VV(0));
322:     }
323:   }
324:   *nrit = nb;
325:   PetscFree(H);
326:   PetscFree(Q);
327:   PetscFree(SR);
328:   PetscFree(wr);
329:   PetscFree(wi);
330:   return(0);
331: }
332: #endif