xref: /petsc/src/ksp/ksp/impls/gmres/gmreig.c (revision c87ba875e4007ad659b117ea274f03d5f4cd5ea7)
1 
2 #include <../src/ksp/ksp/impls/gmres/gmresimpl.h>
3 #include <petscblaslapack.h>
4 
5 PetscErrorCode KSPComputeExtremeSingularValues_GMRES(KSP ksp,PetscReal *emax,PetscReal *emin)
6 {
7   KSP_GMRES      *gmres = (KSP_GMRES*)ksp->data;
8   PetscErrorCode ierr;
9   PetscInt       n = gmres->it + 1,i,N = gmres->max_k + 2;
10   PetscBLASInt   bn, bN,lwork, idummy,lierr;
11   PetscScalar    *R        = gmres->Rsvd,*work = R + N*N,sdummy = 0;
12   PetscReal      *realpart = gmres->Dsvd;
13 
14   PetscFunctionBegin;
15   ierr = PetscBLASIntCast(n,&bn);CHKERRQ(ierr);
16   ierr = PetscBLASIntCast(N,&bN);CHKERRQ(ierr);
17   ierr = PetscBLASIntCast(5*N,&lwork);CHKERRQ(ierr);
18   ierr = PetscBLASIntCast(N,&idummy);CHKERRQ(ierr);
19   if (n <= 0) {
20     *emax = *emin = 1.0;
21     PetscFunctionReturn(0);
22   }
23   /* copy R matrix to work space */
24   ierr = PetscArraycpy(R,gmres->hh_origin,(gmres->max_k+2)*(gmres->max_k+1));CHKERRQ(ierr);
25 
26   /* zero below diagonal garbage */
27   for (i=0; i<n; i++) R[i*N+i+1] = 0.0;
28 
29   /* compute Singular Values */
30   ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
31 #if !defined(PETSC_USE_COMPLEX)
32   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","N",&bn,&bn,R,&bN,realpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,&lierr));
33 #else
34   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","N",&bn,&bn,R,&bN,realpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,realpart+N,&lierr));
35 #endif
36   if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SVD Lapack routine %d",(int)lierr);
37   ierr = PetscFPTrapPop();CHKERRQ(ierr);
38 
39   *emin = realpart[n-1];
40   *emax = realpart[0];
41   PetscFunctionReturn(0);
42 }
43 
44 /* ------------------------------------------------------------------------ */
45 /* ESSL has a different calling sequence for dgeev() and zgeev() than standard LAPACK */
46 PetscErrorCode KSPComputeEigenvalues_GMRES(KSP ksp,PetscInt nmax,PetscReal *r,PetscReal *c,PetscInt *neig)
47 {
48 #if defined(PETSC_HAVE_ESSL)
49   KSP_GMRES      *gmres = (KSP_GMRES*)ksp->data;
50   PetscErrorCode ierr;
51   PetscInt       n = gmres->it + 1,N = gmres->max_k + 1;
52   PetscInt       i,*perm;
53   PetscScalar    *R     = gmres->Rsvd;
54   PetscScalar    *cwork = R + N*N,sdummy = 0;
55   PetscReal      *work,*realpart = gmres->Dsvd;
56   PetscBLASInt   zero = 0,bn,bN,idummy = -1,lwork;
57 
58   PetscFunctionBegin;
59   ierr   = PetscBLASIntCast(n,&bn);CHKERRQ(ierr);
60   ierr   = PetscBLASIntCast(N,&bN);CHKERRQ(ierr);
61   ierr   = PetscBLASIntCast(5*N,&lwork);CHKERRQ(ierr);
62   if (nmax < n) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_SIZ,"Not enough room in work space r and c for eigenvalues");
63   *neig = n;
64 
65   if (!n) PetscFunctionReturn(0);
66 
67   /* copy R matrix to work space */
68   ierr = PetscArraycpy(R,gmres->hes_origin,N*N);CHKERRQ(ierr);
69 
70   /* compute eigenvalues */
71 
72   /* for ESSL version need really cwork of length N (complex), 2N
73      (real); already at least 5N of space has been allocated */
74 
75   ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
76   ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
77   PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_(&zero,R,&bN,cwork,&sdummy,&idummy,&idummy,&bn,work,&lwork));
78   ierr = PetscFPTrapPop();CHKERRQ(ierr);
79   ierr = PetscFree(work);CHKERRQ(ierr);
80 
81   /* For now we stick with the convention of storing the real and imaginary
82      components of evalues separately.  But is this what we really want? */
83   ierr = PetscMalloc1(n,&perm);CHKERRQ(ierr);
84 
85 #if !defined(PETSC_USE_COMPLEX)
86   for (i=0; i<n; i++) {
87     realpart[i] = cwork[2*i];
88     perm[i]     = i;
89   }
90   ierr = PetscSortRealWithPermutation(n,realpart,perm);CHKERRQ(ierr);
91   for (i=0; i<n; i++) {
92     r[i] = cwork[2*perm[i]];
93     c[i] = cwork[2*perm[i]+1];
94   }
95 #else
96   for (i=0; i<n; i++) {
97     realpart[i] = PetscRealPart(cwork[i]);
98     perm[i]     = i;
99   }
100   ierr = PetscSortRealWithPermutation(n,realpart,perm);CHKERRQ(ierr);
101   for (i=0; i<n; i++) {
102     r[i] = PetscRealPart(cwork[perm[i]]);
103     c[i] = PetscImaginaryPart(cwork[perm[i]]);
104   }
105 #endif
106   ierr = PetscFree(perm);CHKERRQ(ierr);
107 #elif !defined(PETSC_USE_COMPLEX)
108   KSP_GMRES      *gmres = (KSP_GMRES*)ksp->data;
109   PetscErrorCode ierr;
110   PetscInt       n = gmres->it + 1,N = gmres->max_k + 1,i,*perm;
111   PetscBLASInt   bn, bN, lwork, idummy, lierr = -1;
112   PetscScalar    *R        = gmres->Rsvd,*work = R + N*N;
113   PetscScalar    *realpart = gmres->Dsvd,*imagpart = realpart + N,sdummy = 0;
114 
115   PetscFunctionBegin;
116   ierr = PetscBLASIntCast(n,&bn);CHKERRQ(ierr);
117   ierr = PetscBLASIntCast(N,&bN);CHKERRQ(ierr);
118   ierr = PetscBLASIntCast(5*N,&lwork);CHKERRQ(ierr);
119   ierr = PetscBLASIntCast(N,&idummy);CHKERRQ(ierr);
120   if (nmax < n) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_SIZ,"Not enough room in work space r and c for eigenvalues");
121   *neig = n;
122 
123   if (!n) PetscFunctionReturn(0);
124 
125   /* copy R matrix to work space */
126   ierr = PetscArraycpy(R,gmres->hes_origin,N*N);CHKERRQ(ierr);
127 
128   /* compute eigenvalues */
129   ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
130   PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","N",&bn,R,&bN,realpart,imagpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,&lierr));
131   if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine %d",(int)lierr);
132   ierr = PetscFPTrapPop();CHKERRQ(ierr);
133   ierr = PetscMalloc1(n,&perm);CHKERRQ(ierr);
134   for (i=0; i<n; i++) perm[i] = i;
135   ierr = PetscSortRealWithPermutation(n,realpart,perm);CHKERRQ(ierr);
136   for (i=0; i<n; i++) {
137     r[i] = realpart[perm[i]];
138     c[i] = imagpart[perm[i]];
139   }
140   ierr = PetscFree(perm);CHKERRQ(ierr);
141 #else
142   KSP_GMRES      *gmres = (KSP_GMRES*)ksp->data;
143   PetscErrorCode ierr;
144   PetscInt       n  = gmres->it + 1,N = gmres->max_k + 1,i,*perm;
145   PetscScalar    *R = gmres->Rsvd,*work = R + N*N,*eigs = work + 5*N,sdummy;
146   PetscBLASInt   bn,bN,lwork,idummy,lierr = -1;
147 
148   PetscFunctionBegin;
149   ierr = PetscBLASIntCast(n,&bn);CHKERRQ(ierr);
150   ierr = PetscBLASIntCast(N,&bN);CHKERRQ(ierr);
151   ierr = PetscBLASIntCast(5*N,&lwork);CHKERRQ(ierr);
152   ierr = PetscBLASIntCast(N,&idummy);CHKERRQ(ierr);
153   if (nmax < n) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_SIZ,"Not enough room in work space r and c for eigenvalues");
154   *neig = n;
155 
156   if (!n) PetscFunctionReturn(0);
157 
158   /* copy R matrix to work space */
159   ierr = PetscArraycpy(R,gmres->hes_origin,N*N);CHKERRQ(ierr);
160 
161   /* compute eigenvalues */
162   ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
163   PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","N",&bn,R,&bN,eigs,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,gmres->Dsvd,&lierr));
164   if (lierr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine");
165   ierr = PetscFPTrapPop();CHKERRQ(ierr);
166   ierr = PetscMalloc1(n,&perm);CHKERRQ(ierr);
167   for (i=0; i<n; i++) perm[i] = i;
168   for (i=0; i<n; i++) r[i] = PetscRealPart(eigs[i]);
169   ierr = PetscSortRealWithPermutation(n,r,perm);CHKERRQ(ierr);
170   for (i=0; i<n; i++) {
171     r[i] = PetscRealPart(eigs[perm[i]]);
172     c[i] = PetscImaginaryPart(eigs[perm[i]]);
173   }
174   ierr = PetscFree(perm);CHKERRQ(ierr);
175 #endif
176   PetscFunctionReturn(0);
177 }
178 
179 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_HAVE_ESSL)
180 PetscErrorCode KSPComputeRitz_GMRES(KSP ksp,PetscBool ritz,PetscBool small,PetscInt *nrit,Vec S[],PetscReal *tetar,PetscReal *tetai)
181 {
182   KSP_GMRES      *gmres = (KSP_GMRES*)ksp->data;
183   PetscErrorCode ierr;
184   PetscInt       n = gmres->it + 1,N = gmres->max_k + 1,NbrRitz,nb=0;
185   PetscInt       i,j,*perm;
186   PetscReal      *H,*Q,*Ht;              /* H Hessenberg Matrix and Q matrix of eigenvectors of H*/
187   PetscReal      *wr,*wi,*modul;       /* Real and imaginary part and modul of the Ritz values*/
188   PetscReal      *SR,*work;
189   PetscBLASInt   bn,bN,lwork,idummy;
190   PetscScalar    *t,sdummy = 0;
191 
192   PetscFunctionBegin;
193   /* n: size of the Hessenberg matrix */
194   if (gmres->fullcycle) n = N-1;
195   /* NbrRitz: number of (harmonic) Ritz pairs to extract */
196   NbrRitz = PetscMin(*nrit,n);
197 
198   /* Definition of PetscBLASInt for lapack routines*/
199   ierr = PetscBLASIntCast(n,&bn);CHKERRQ(ierr);
200   ierr = PetscBLASIntCast(N,&bN);CHKERRQ(ierr);
201   ierr = PetscBLASIntCast(N,&idummy);CHKERRQ(ierr);
202   ierr = PetscBLASIntCast(5*N,&lwork);CHKERRQ(ierr);
203   /* Memory allocation */
204   ierr = PetscMalloc1(bN*bN,&H);CHKERRQ(ierr);
205   ierr = PetscMalloc1(bn*bn,&Q);CHKERRQ(ierr);
206   ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
207   ierr = PetscMalloc1(n,&wr);CHKERRQ(ierr);
208   ierr = PetscMalloc1(n,&wi);CHKERRQ(ierr);
209 
210   /* copy H matrix to work space */
211   if (gmres->fullcycle) {
212     ierr = PetscArraycpy(H,gmres->hes_ritz,bN*bN);CHKERRQ(ierr);
213   } else {
214     ierr = PetscArraycpy(H,gmres->hes_origin,bN*bN);CHKERRQ(ierr);
215   }
216 
217   /* Modify H to compute Harmonic Ritz pairs H = H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
218   if (!ritz) {
219     /* Transpose the Hessenberg matrix => Ht */
220     ierr = PetscMalloc1(bn*bn,&Ht);CHKERRQ(ierr);
221     for (i=0; i<bn; i++) {
222       for (j=0; j<bn; j++) {
223         Ht[i*bn+j] = H[j*bN+i];
224       }
225     }
226     /* Solve the system H^T*t = h^2_{m+1,m}e_m */
227     ierr = PetscCalloc1(bn,&t);CHKERRQ(ierr);
228     /* t = h^2_{m+1,m}e_m */
229     if (gmres->fullcycle) {
230       t[bn-1] = PetscSqr(gmres->hes_ritz[(bn-1)*bN+bn]);
231     } else {
232       t[bn-1] = PetscSqr(gmres->hes_origin[(bn-1)*bN+bn]);
233     }
234     /* Call the LAPACK routine dgesv to compute t = H^{-T}*t */
235     {
236       PetscBLASInt info;
237       PetscBLASInt nrhs = 1;
238       PetscBLASInt *ipiv;
239       ierr = PetscMalloc1(bn,&ipiv);CHKERRQ(ierr);
240       PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&bn,&nrhs,Ht,&bn,ipiv,t,&bn,&info));
241       if (info) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_PLIB,"Error while calling the Lapack routine DGESV");
242       ierr = PetscFree(ipiv);CHKERRQ(ierr);
243       ierr = PetscFree(Ht);CHKERRQ(ierr);
244     }
245     /* Now form H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
246     for (i=0; i<bn; i++) H[(bn-1)*bn+i] += t[i];
247     ierr = PetscFree(t);CHKERRQ(ierr);
248   }
249 
250   /* Compute (harmonic) Ritz pairs */
251   {
252     PetscBLASInt info;
253     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
254     PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","V",&bn,H,&bN,wr,wi,&sdummy,&idummy,Q,&bn,work,&lwork,&info));
255     if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine");
256   }
257   /* sort the (harmonic) Ritz values */
258   ierr = PetscMalloc1(n,&modul);CHKERRQ(ierr);
259   ierr = PetscMalloc1(n,&perm);CHKERRQ(ierr);
260   for (i=0; i<n; i++) modul[i] = PetscSqrtReal(wr[i]*wr[i]+wi[i]*wi[i]);
261   for (i=0; i<n; i++) perm[i] = i;
262   ierr = PetscSortRealWithPermutation(n,modul,perm);CHKERRQ(ierr);
263   /* count the number of extracted Ritz or Harmonic Ritz pairs (with complex conjugates) */
264   if (small) {
265     while (nb < NbrRitz) {
266       if (!wi[perm[nb]]) nb += 1;
267       else nb += 2;
268     }
269     ierr = PetscMalloc1(nb*n,&SR);CHKERRQ(ierr);
270     for (i=0; i<nb; i++) {
271       tetar[i] = wr[perm[i]];
272       tetai[i] = wi[perm[i]];
273       ierr = PetscArraycpy(&SR[i*n],&(Q[perm[i]*bn]),n);CHKERRQ(ierr);
274     }
275   } else {
276     while (nb < NbrRitz) {
277       if (wi[perm[n-nb-1]] == 0) nb += 1;
278       else nb += 2;
279     }
280     ierr = PetscMalloc1(nb*n,&SR);CHKERRQ(ierr);
281     for (i=0; i<nb; i++) {
282       tetar[i] = wr[perm[n-nb+i]];
283       tetai[i] = wi[perm[n-nb+i]];
284       ierr = PetscArraycpy(&SR[i*n], &(Q[perm[n-nb+i]*bn]), n);CHKERRQ(ierr);
285     }
286   }
287   ierr = PetscFree(modul);CHKERRQ(ierr);
288   ierr = PetscFree(perm);CHKERRQ(ierr);
289 
290   /* Form the Ritz or Harmonic Ritz vectors S=VV*Sr,
291     where the columns of VV correspond to the basis of the Krylov subspace */
292   if (gmres->fullcycle) {
293     for (j=0; j<nb; j++) {
294       ierr = VecZeroEntries(S[j]);CHKERRQ(ierr);
295       ierr = VecMAXPY(S[j],n,&SR[j*n],gmres->vecb);CHKERRQ(ierr);
296     }
297   } else {
298     for (j=0; j<nb; j++) {
299       ierr = VecZeroEntries(S[j]);CHKERRQ(ierr);
300       ierr = VecMAXPY(S[j],n,&SR[j*n],&VEC_VV(0));CHKERRQ(ierr);
301     }
302   }
303   *nrit = nb;
304   ierr  = PetscFree(H);CHKERRQ(ierr);
305   ierr  = PetscFree(Q);CHKERRQ(ierr);
306   ierr  = PetscFree(SR);CHKERRQ(ierr);
307   ierr  = PetscFree(wr);CHKERRQ(ierr);
308   ierr  = PetscFree(wi);CHKERRQ(ierr);
309   PetscFunctionReturn(0);
310 }
311 #endif
312 
313 
314