xref: /petsc/src/ksp/ksp/impls/gmres/gmreig.c (revision b41ce5d507ea9a58bfa83cf403107a702e77a67d)
1 
2 #include <../src/ksp/ksp/impls/gmres/gmresimpl.h>
3 #include <petscblaslapack.h>
4 
5 PetscErrorCode KSPComputeExtremeSingularValues_GMRES(KSP ksp,PetscReal *emax,PetscReal *emin)
6 {
7 #if defined(PETSC_MISSING_LAPACK_GESVD)
8   PetscFunctionBegin;
9   /*
10       The Cray math libraries on T3D/T3E, and early versions of Intel Math Kernel Libraries (MKL)
11       for PCs do not seem to have the DGESVD() lapack routines
12   */
13   SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GESVD - Lapack routine is unavailable\nNot able to provide singular value estimates.");
14 #else
15   KSP_GMRES      *gmres = (KSP_GMRES*)ksp->data;
16   PetscErrorCode ierr;
17   PetscInt       n = gmres->it + 1,i,N = gmres->max_k + 2;
18   PetscBLASInt   bn, bN,lwork, idummy,lierr;
19   PetscScalar    *R        = gmres->Rsvd,*work = R + N*N,sdummy;
20   PetscReal      *realpart = gmres->Dsvd;
21 
22   PetscFunctionBegin;
23   ierr = PetscBLASIntCast(n,&bn);CHKERRQ(ierr);
24   ierr = PetscBLASIntCast(N,&bN);CHKERRQ(ierr);
25   ierr = PetscBLASIntCast(5*N,&lwork);CHKERRQ(ierr);
26   ierr = PetscBLASIntCast(N,&idummy);CHKERRQ(ierr);
27   if (n <= 0) {
28     *emax = *emin = 1.0;
29     PetscFunctionReturn(0);
30   }
31   /* copy R matrix to work space */
32   ierr = PetscMemcpy(R,gmres->hh_origin,(gmres->max_k+2)*(gmres->max_k+1)*sizeof(PetscScalar));CHKERRQ(ierr);
33 
34   /* zero below diagonal garbage */
35   for (i=0; i<n; i++) R[i*N+i+1] = 0.0;
36 
37   /* compute Singular Values */
38   ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
39 #if !defined(PETSC_USE_COMPLEX)
40   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","N",&bn,&bn,R,&bN,realpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,&lierr));
41 #else
42   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","N",&bn,&bn,R,&bN,realpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,realpart+N,&lierr));
43 #endif
44   if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SVD Lapack routine %d",(int)lierr);
45   ierr = PetscFPTrapPop();CHKERRQ(ierr);
46 
47   *emin = realpart[n-1];
48   *emax = realpart[0];
49 #endif
50   PetscFunctionReturn(0);
51 }
52 
53 /* ------------------------------------------------------------------------ */
54 /* ESSL has a different calling sequence for dgeev() and zgeev() than standard LAPACK */
55 PetscErrorCode KSPComputeEigenvalues_GMRES(KSP ksp,PetscInt nmax,PetscReal *r,PetscReal *c,PetscInt *neig)
56 {
57 #if defined(PETSC_HAVE_ESSL)
58   KSP_GMRES      *gmres = (KSP_GMRES*)ksp->data;
59   PetscErrorCode ierr;
60   PetscInt       n = gmres->it + 1,N = gmres->max_k + 1;
61   PetscInt       i,*perm;
62   PetscScalar    *R     = gmres->Rsvd;
63   PetscScalar    *cwork = R + N*N,sdummy;
64   PetscReal      *work,*realpart = gmres->Dsvd;
65   PetscBLASInt   zero = 0,bn,bN,idummy,lwork;
66 
67   PetscFunctionBegin;
68   ierr   = PetscBLASIntCast(n,&bn);CHKERRQ(ierr);
69   ierr   = PetscBLASIntCast(N,&bN);CHKERRQ(ierr);
70   idummy = -1;                  /* unused */
71   ierr   = PetscBLASIntCast(5*N,&lwork);CHKERRQ(ierr);
72   if (nmax < n) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_SIZ,"Not enough room in work space r and c for eigenvalues");
73   *neig = n;
74 
75   if (!n) PetscFunctionReturn(0);
76 
77   /* copy R matrix to work space */
78   ierr = PetscMemcpy(R,gmres->hes_origin,N*N*sizeof(PetscScalar));CHKERRQ(ierr);
79 
80   /* compute eigenvalues */
81 
82   /* for ESSL version need really cwork of length N (complex), 2N
83      (real); already at least 5N of space has been allocated */
84 
85   ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
86   ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
87   PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_(&zero,R,&bN,cwork,&sdummy,&idummy,&idummy,&bn,work,&lwork));
88   ierr = PetscFPTrapPop();CHKERRQ(ierr);
89   ierr = PetscFree(work);CHKERRQ(ierr);
90 
91   /* For now we stick with the convention of storing the real and imaginary
92      components of evalues separately.  But is this what we really want? */
93   ierr = PetscMalloc1(n,&perm);CHKERRQ(ierr);
94 
95 #if !defined(PETSC_USE_COMPLEX)
96   for (i=0; i<n; i++) {
97     realpart[i] = cwork[2*i];
98     perm[i]     = i;
99   }
100   ierr = PetscSortRealWithPermutation(n,realpart,perm);CHKERRQ(ierr);
101   for (i=0; i<n; i++) {
102     r[i] = cwork[2*perm[i]];
103     c[i] = cwork[2*perm[i]+1];
104   }
105 #else
106   for (i=0; i<n; i++) {
107     realpart[i] = PetscRealPart(cwork[i]);
108     perm[i]     = i;
109   }
110   ierr = PetscSortRealWithPermutation(n,realpart,perm);CHKERRQ(ierr);
111   for (i=0; i<n; i++) {
112     r[i] = PetscRealPart(cwork[perm[i]]);
113     c[i] = PetscImaginaryPart(cwork[perm[i]]);
114   }
115 #endif
116   ierr = PetscFree(perm);CHKERRQ(ierr);
117 #elif defined(PETSC_MISSING_LAPACK_GEEV)
118   PetscFunctionBegin;
119   SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GEEV - Lapack routine is unavailable\nNot able to provide eigen values.");
120 #elif !defined(PETSC_USE_COMPLEX)
121   KSP_GMRES      *gmres = (KSP_GMRES*)ksp->data;
122   PetscErrorCode ierr;
123   PetscInt       n = gmres->it + 1,N = gmres->max_k + 1,i,*perm;
124   PetscBLASInt   bn, bN, lwork, idummy, lierr;
125   PetscScalar    *R        = gmres->Rsvd,*work = R + N*N;
126   PetscScalar    *realpart = gmres->Dsvd,*imagpart = realpart + N,sdummy;
127 
128   PetscFunctionBegin;
129   ierr = PetscBLASIntCast(n,&bn);CHKERRQ(ierr);
130   ierr = PetscBLASIntCast(N,&bN);CHKERRQ(ierr);
131   ierr = PetscBLASIntCast(5*N,&lwork);CHKERRQ(ierr);
132   ierr = PetscBLASIntCast(N,&idummy);CHKERRQ(ierr);
133   if (nmax < n) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_SIZ,"Not enough room in work space r and c for eigenvalues");
134   *neig = n;
135 
136   if (!n) PetscFunctionReturn(0);
137 
138   /* copy R matrix to work space */
139   ierr = PetscMemcpy(R,gmres->hes_origin,N*N*sizeof(PetscScalar));CHKERRQ(ierr);
140 
141   /* compute eigenvalues */
142   ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
143   PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","N",&bn,R,&bN,realpart,imagpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,&lierr));
144   if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine %d",(int)lierr);
145   ierr = PetscFPTrapPop();CHKERRQ(ierr);
146   ierr = PetscMalloc1(n,&perm);CHKERRQ(ierr);
147   for (i=0; i<n; i++) perm[i] = i;
148   ierr = PetscSortRealWithPermutation(n,realpart,perm);CHKERRQ(ierr);
149   for (i=0; i<n; i++) {
150     r[i] = realpart[perm[i]];
151     c[i] = imagpart[perm[i]];
152   }
153   ierr = PetscFree(perm);CHKERRQ(ierr);
154 #else
155   KSP_GMRES      *gmres = (KSP_GMRES*)ksp->data;
156   PetscErrorCode ierr;
157   PetscInt       n  = gmres->it + 1,N = gmres->max_k + 1,i,*perm;
158   PetscScalar    *R = gmres->Rsvd,*work = R + N*N,*eigs = work + 5*N,sdummy;
159   PetscBLASInt   bn,bN,lwork,idummy,lierr;
160 
161   PetscFunctionBegin;
162   ierr = PetscBLASIntCast(n,&bn);CHKERRQ(ierr);
163   ierr = PetscBLASIntCast(N,&bN);CHKERRQ(ierr);
164   ierr = PetscBLASIntCast(5*N,&lwork);CHKERRQ(ierr);
165   ierr = PetscBLASIntCast(N,&idummy);CHKERRQ(ierr);
166   if (nmax < n) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_SIZ,"Not enough room in work space r and c for eigenvalues");
167   *neig = n;
168 
169   if (!n) PetscFunctionReturn(0);
170 
171   /* copy R matrix to work space */
172   ierr = PetscMemcpy(R,gmres->hes_origin,N*N*sizeof(PetscScalar));CHKERRQ(ierr);
173 
174   /* compute eigenvalues */
175   ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
176   PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","N",&bn,R,&bN,eigs,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,gmres->Dsvd,&lierr));
177   if (lierr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine");
178   ierr = PetscFPTrapPop();CHKERRQ(ierr);
179   ierr = PetscMalloc1(n,&perm);CHKERRQ(ierr);
180   for (i=0; i<n; i++) perm[i] = i;
181   for (i=0; i<n; i++) r[i] = PetscRealPart(eigs[i]);
182   ierr = PetscSortRealWithPermutation(n,r,perm);CHKERRQ(ierr);
183   for (i=0; i<n; i++) {
184     r[i] = PetscRealPart(eigs[perm[i]]);
185     c[i] = PetscImaginaryPart(eigs[perm[i]]);
186   }
187   ierr = PetscFree(perm);CHKERRQ(ierr);
188 #endif
189   PetscFunctionReturn(0);
190 }
191 
192 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_HAVE_ESSL)
193 PetscErrorCode KSPComputeRitz_GMRES(KSP ksp,PetscBool ritz,PetscBool small,PetscInt *nrit,Vec S[],PetscReal *tetar,PetscReal *tetai)
194 {
195   KSP_GMRES      *gmres = (KSP_GMRES*)ksp->data;
196   PetscErrorCode ierr;
197   PetscInt       n = gmres->it + 1,N = gmres->max_k + 1,NbrRitz,nb=0;
198   PetscInt       i,j,*perm;
199   PetscReal      *H,*Q,*Ht;              /* H Hessenberg Matrix and Q matrix of eigenvectors of H*/
200   PetscReal      *wr,*wi,*modul;       /* Real and imaginary part and modul of the Ritz values*/
201   PetscReal      *SR,*work;
202   PetscBLASInt   bn,bN,lwork,idummy;
203   PetscScalar    *t,sdummy;
204 
205   PetscFunctionBegin;
206   /* n: size of the Hessenberg matrix */
207   if (gmres->fullcycle) n = N-1;
208   /* NbrRitz: number of (harmonic) Ritz pairs to extract */
209   NbrRitz = PetscMin(*nrit,n);
210 
211   /* Definition of PetscBLASInt for lapack routines*/
212   ierr = PetscBLASIntCast(n,&bn);CHKERRQ(ierr);
213   ierr = PetscBLASIntCast(N,&bN);CHKERRQ(ierr);
214   ierr = PetscBLASIntCast(N,&idummy);CHKERRQ(ierr);
215   ierr = PetscBLASIntCast(5*N,&lwork);CHKERRQ(ierr);
216   /* Memory allocation */
217   ierr = PetscMalloc1(bN*bN,&H);CHKERRQ(ierr);
218   ierr = PetscMalloc1(bn*bn,&Q);CHKERRQ(ierr);
219   ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
220   ierr = PetscMalloc1(n,&wr);CHKERRQ(ierr);
221   ierr = PetscMalloc1(n,&wi);CHKERRQ(ierr);
222 
223   /* copy H matrix to work space */
224   if (gmres->fullcycle) {
225     ierr = PetscMemcpy(H,gmres->hes_ritz,bN*bN*sizeof(PetscReal));CHKERRQ(ierr);
226   } else {
227     ierr = PetscMemcpy(H,gmres->hes_origin,bN*bN*sizeof(PetscReal));CHKERRQ(ierr);
228   }
229 
230   /* Modify H to compute Harmonic Ritz pairs H = H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
231   if (!ritz) {
232     /* Transpose the Hessenberg matrix => Ht */
233     ierr = PetscMalloc1(bn*bn,&Ht);CHKERRQ(ierr);
234     for (i=0; i<bn; i++) {
235       for (j=0; j<bn; j++) {
236         Ht[i*bn+j] = H[j*bN+i];
237       }
238     }
239     /* Solve the system H^T*t = h^2_{m+1,m}e_m */
240     ierr = PetscCalloc1(bn,&t);CHKERRQ(ierr);
241     /* t = h^2_{m+1,m}e_m */
242     if (gmres->fullcycle) {
243       t[bn-1] = PetscSqr(gmres->hes_ritz[(bn-1)*bN+bn]);
244     } else {
245       t[bn-1] = PetscSqr(gmres->hes_origin[(bn-1)*bN+bn]);
246     }
247     /* Call the LAPACK routine dgesv to compute t = H^{-T}*t */
248 #if   defined(PETSC_MISSING_LAPACK_GESV)
249     SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GESV - Lapack routine is unavailable.");
250 #else
251     {
252       PetscBLASInt info;
253       PetscBLASInt nrhs = 1;
254       PetscBLASInt *ipiv;
255       ierr = PetscMalloc1(bn,&ipiv);CHKERRQ(ierr);
256       PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&bn,&nrhs,Ht,&bn,ipiv,t,&bn,&info));
257       if (info) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_PLIB,"Error while calling the Lapack routine DGESV");
258       ierr = PetscFree(ipiv);CHKERRQ(ierr);
259       ierr = PetscFree(Ht);CHKERRQ(ierr);
260     }
261 #endif
262     /* Now form H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
263     for (i=0; i<bn; i++) H[(bn-1)*bn+i] += t[i];
264     ierr = PetscFree(t);CHKERRQ(ierr);
265   }
266 
267   /* Compute (harmonic) Ritz pairs */
268 #if defined(PETSC_MISSING_LAPACK_HSEQR)
269   SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GEEV - Lapack routine is unavailable\nNot able to provide eigen values.");
270 #else
271   {
272     PetscBLASInt info;
273     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
274     PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","V",&bn,H,&bN,wr,wi,&sdummy,&idummy,Q,&bn,work,&lwork,&info));
275     if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine");
276   }
277 #endif
278   /* sort the (harmonic) Ritz values */
279   ierr = PetscMalloc1(n,&modul);CHKERRQ(ierr);
280   ierr = PetscMalloc1(n,&perm);CHKERRQ(ierr);
281   for (i=0; i<n; i++) modul[i] = PetscSqrtReal(wr[i]*wr[i]+wi[i]*wi[i]);
282   for (i=0; i<n; i++) perm[i] = i;
283   ierr = PetscSortRealWithPermutation(n,modul,perm);CHKERRQ(ierr);
284   /* count the number of extracted Ritz or Harmonic Ritz pairs (with complex conjugates) */
285   if (small) {
286     while (nb < NbrRitz) {
287       if (!wi[perm[nb]]) nb += 1;
288       else nb += 2;
289     }
290     ierr = PetscMalloc1(nb*n,&SR);CHKERRQ(ierr);
291     for (i=0; i<nb; i++) {
292       tetar[i] = wr[perm[i]];
293       tetai[i] = wi[perm[i]];
294       ierr = PetscMemcpy(&SR[i*n],&(Q[perm[i]*bn]),n*sizeof(PetscReal));CHKERRQ(ierr);
295     }
296   } else {
297     while (nb < NbrRitz) {
298       if (wi[perm[n-nb-1]] == 0) nb += 1;
299       else nb += 2;
300     }
301     ierr = PetscMalloc1(nb*n,&SR);CHKERRQ(ierr);
302     for (i=0; i<nb; i++) {
303       tetar[i] = wr[perm[n-nb+i]];
304       tetai[i] = wi[perm[n-nb+i]];
305       ierr = PetscMemcpy(&SR[i*n], &(Q[perm[n-nb+i]*bn]), n*sizeof(PetscReal));CHKERRQ(ierr);
306     }
307   }
308   ierr = PetscFree(modul);CHKERRQ(ierr);
309   ierr = PetscFree(perm);CHKERRQ(ierr);
310 
311   /* Form the Ritz or Harmonic Ritz vectors S=VV*Sr,
312     where the columns of VV correspond to the basis of the Krylov subspace */
313   if (gmres->fullcycle) {
314     for (j=0; j<nb; j++) {
315       ierr = VecZeroEntries(S[j]);CHKERRQ(ierr);
316       ierr = VecMAXPY(S[j],n,&SR[j*n],gmres->vecb);CHKERRQ(ierr);
317     }
318   } else {
319     for (j=0; j<nb; j++) {
320       ierr = VecZeroEntries(S[j]);CHKERRQ(ierr);
321       ierr = VecMAXPY(S[j],n,&SR[j*n],&VEC_VV(0));CHKERRQ(ierr);
322     }
323   }
324   *nrit = nb;
325   ierr  = PetscFree(H);CHKERRQ(ierr);
326   ierr  = PetscFree(Q);CHKERRQ(ierr);
327   ierr  = PetscFree(SR);CHKERRQ(ierr);
328   ierr  = PetscFree(wr);CHKERRQ(ierr);
329   ierr  = PetscFree(wi);CHKERRQ(ierr);
330   PetscFunctionReturn(0);
331 }
332 #endif
333 
334 
335