1 2 #include <../src/ksp/ksp/impls/gmres/gmresimpl.h> 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "KSPComputeExtremeSingularValues_GMRES" 7 PetscErrorCode KSPComputeExtremeSingularValues_GMRES(KSP ksp,PetscReal *emax,PetscReal *emin) 8 { 9 #if defined(PETSC_MISSING_LAPACK_GESVD) 10 PetscFunctionBegin; 11 /* 12 The Cray math libraries on T3D/T3E, and early versions of Intel Math Kernel Libraries (MKL) 13 for PCs do not seem to have the DGESVD() lapack routines 14 */ 15 SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GESVD - Lapack routine is unavailable\nNot able to provide singular value estimates."); 16 #else 17 KSP_GMRES *gmres = (KSP_GMRES*)ksp->data; 18 PetscErrorCode ierr; 19 PetscInt n = gmres->it + 1,i,N = gmres->max_k + 2; 20 PetscBLASInt bn, bN,lwork, idummy,lierr; 21 PetscScalar *R = gmres->Rsvd,*work = R + N*N,sdummy; 22 PetscReal *realpart = gmres->Dsvd; 23 24 PetscFunctionBegin; 25 ierr = PetscBLASIntCast(n,&bn);CHKERRQ(ierr); 26 ierr = PetscBLASIntCast(N,&bN);CHKERRQ(ierr); 27 ierr = PetscBLASIntCast(5*N,&lwork);CHKERRQ(ierr); 28 ierr = PetscBLASIntCast(N,&idummy);CHKERRQ(ierr); 29 if (n <= 0) { 30 *emax = *emin = 1.0; 31 PetscFunctionReturn(0); 32 } 33 /* copy R matrix to work space */ 34 ierr = PetscMemcpy(R,gmres->hh_origin,(gmres->max_k+2)*(gmres->max_k+1)*sizeof(PetscScalar));CHKERRQ(ierr); 35 36 /* zero below diagonal garbage */ 37 for (i=0; i<n; i++) R[i*N+i+1] = 0.0; 38 39 /* compute Singular Values */ 40 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 41 #if !defined(PETSC_USE_COMPLEX) 42 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","N",&bn,&bn,R,&bN,realpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,&lierr)); 43 #else 44 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","N",&bn,&bn,R,&bN,realpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,realpart+N,&lierr)); 45 #endif 46 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SVD Lapack routine %d",(int)lierr); 47 ierr = PetscFPTrapPop();CHKERRQ(ierr); 48 49 *emin = realpart[n-1]; 50 *emax = realpart[0]; 51 #endif 52 PetscFunctionReturn(0); 53 } 54 55 /* ------------------------------------------------------------------------ */ 56 /* ESSL has a different calling sequence for dgeev() and zgeev() than standard LAPACK */ 57 #undef __FUNCT__ 58 #define __FUNCT__ "KSPComputeEigenvalues_GMRES" 59 PetscErrorCode KSPComputeEigenvalues_GMRES(KSP ksp,PetscInt nmax,PetscReal *r,PetscReal *c,PetscInt *neig) 60 { 61 #if defined(PETSC_HAVE_ESSL) 62 KSP_GMRES *gmres = (KSP_GMRES*)ksp->data; 63 PetscErrorCode ierr; 64 PetscInt n = gmres->it + 1,N = gmres->max_k + 1; 65 PetscInt i,*perm; 66 PetscScalar *R = gmres->Rsvd; 67 PetscScalar *cwork = R + N*N,sdummy; 68 PetscReal *work,*realpart = gmres->Dsvd; 69 PetscBLASInt zero = 0,bn,bN,idummy,lwork; 70 71 PetscFunctionBegin; 72 ierr = PetscBLASIntCast(n,&bn);CHKERRQ(ierr); 73 ierr = PetscBLASIntCast(N,&bN);CHKERRQ(ierr); 74 idummy = -1; /* unused */ 75 ierr = PetscBLASIntCast(5*N,&lwork);CHKERRQ(ierr); 76 if (nmax < n) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_SIZ,"Not enough room in work space r and c for eigenvalues"); 77 *neig = n; 78 79 if (!n) PetscFunctionReturn(0); 80 81 /* copy R matrix to work space */ 82 ierr = PetscMemcpy(R,gmres->hes_origin,N*N*sizeof(PetscScalar));CHKERRQ(ierr); 83 84 /* compute eigenvalues */ 85 86 /* for ESSL version need really cwork of length N (complex), 2N 87 (real); already at least 5N of space has been allocated */ 88 89 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 90 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 91 PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_(&zero,R,&bN,cwork,&sdummy,&idummy,&idummy,&bn,work,&lwork)); 92 ierr = PetscFPTrapPop();CHKERRQ(ierr); 93 ierr = PetscFree(work);CHKERRQ(ierr); 94 95 /* For now we stick with the convention of storing the real and imaginary 96 components of evalues separately. But is this what we really want? */ 97 ierr = PetscMalloc1(n,&perm);CHKERRQ(ierr); 98 99 #if !defined(PETSC_USE_COMPLEX) 100 for (i=0; i<n; i++) { 101 realpart[i] = cwork[2*i]; 102 perm[i] = i; 103 } 104 ierr = PetscSortRealWithPermutation(n,realpart,perm);CHKERRQ(ierr); 105 for (i=0; i<n; i++) { 106 r[i] = cwork[2*perm[i]]; 107 c[i] = cwork[2*perm[i]+1]; 108 } 109 #else 110 for (i=0; i<n; i++) { 111 realpart[i] = PetscRealPart(cwork[i]); 112 perm[i] = i; 113 } 114 ierr = PetscSortRealWithPermutation(n,realpart,perm);CHKERRQ(ierr); 115 for (i=0; i<n; i++) { 116 r[i] = PetscRealPart(cwork[perm[i]]); 117 c[i] = PetscImaginaryPart(cwork[perm[i]]); 118 } 119 #endif 120 ierr = PetscFree(perm);CHKERRQ(ierr); 121 #elif defined(PETSC_MISSING_LAPACK_GEEV) 122 PetscFunctionBegin; 123 SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GEEV - Lapack routine is unavailable\nNot able to provide eigen values."); 124 #elif !defined(PETSC_USE_COMPLEX) 125 KSP_GMRES *gmres = (KSP_GMRES*)ksp->data; 126 PetscErrorCode ierr; 127 PetscInt n = gmres->it + 1,N = gmres->max_k + 1,i,*perm; 128 PetscBLASInt bn, bN, lwork, idummy, lierr; 129 PetscScalar *R = gmres->Rsvd,*work = R + N*N; 130 PetscScalar *realpart = gmres->Dsvd,*imagpart = realpart + N,sdummy; 131 132 PetscFunctionBegin; 133 ierr = PetscBLASIntCast(n,&bn);CHKERRQ(ierr); 134 ierr = PetscBLASIntCast(N,&bN);CHKERRQ(ierr); 135 ierr = PetscBLASIntCast(5*N,&lwork);CHKERRQ(ierr); 136 ierr = PetscBLASIntCast(N,&idummy);CHKERRQ(ierr); 137 if (nmax < n) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_SIZ,"Not enough room in work space r and c for eigenvalues"); 138 *neig = n; 139 140 if (!n) PetscFunctionReturn(0); 141 142 /* copy R matrix to work space */ 143 ierr = PetscMemcpy(R,gmres->hes_origin,N*N*sizeof(PetscScalar));CHKERRQ(ierr); 144 145 /* compute eigenvalues */ 146 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 147 PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","N",&bn,R,&bN,realpart,imagpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,&lierr)); 148 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine %d",(int)lierr); 149 ierr = PetscFPTrapPop();CHKERRQ(ierr); 150 ierr = PetscMalloc1(n,&perm);CHKERRQ(ierr); 151 for (i=0; i<n; i++) perm[i] = i; 152 ierr = PetscSortRealWithPermutation(n,realpart,perm);CHKERRQ(ierr); 153 for (i=0; i<n; i++) { 154 r[i] = realpart[perm[i]]; 155 c[i] = imagpart[perm[i]]; 156 } 157 ierr = PetscFree(perm);CHKERRQ(ierr); 158 #else 159 KSP_GMRES *gmres = (KSP_GMRES*)ksp->data; 160 PetscErrorCode ierr; 161 PetscInt n = gmres->it + 1,N = gmres->max_k + 1,i,*perm; 162 PetscScalar *R = gmres->Rsvd,*work = R + N*N,*eigs = work + 5*N,sdummy; 163 PetscBLASInt bn,bN,lwork,idummy,lierr; 164 165 PetscFunctionBegin; 166 ierr = PetscBLASIntCast(n,&bn);CHKERRQ(ierr); 167 ierr = PetscBLASIntCast(N,&bN);CHKERRQ(ierr); 168 ierr = PetscBLASIntCast(5*N,&lwork);CHKERRQ(ierr); 169 ierr = PetscBLASIntCast(N,&idummy);CHKERRQ(ierr); 170 if (nmax < n) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_SIZ,"Not enough room in work space r and c for eigenvalues"); 171 *neig = n; 172 173 if (!n) PetscFunctionReturn(0); 174 175 /* copy R matrix to work space */ 176 ierr = PetscMemcpy(R,gmres->hes_origin,N*N*sizeof(PetscScalar));CHKERRQ(ierr); 177 178 /* compute eigenvalues */ 179 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 180 PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","N",&bn,R,&bN,eigs,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,gmres->Dsvd,&lierr)); 181 if (lierr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine"); 182 ierr = PetscFPTrapPop();CHKERRQ(ierr); 183 ierr = PetscMalloc1(n,&perm);CHKERRQ(ierr); 184 for (i=0; i<n; i++) perm[i] = i; 185 for (i=0; i<n; i++) r[i] = PetscRealPart(eigs[i]); 186 ierr = PetscSortRealWithPermutation(n,r,perm);CHKERRQ(ierr); 187 for (i=0; i<n; i++) { 188 r[i] = PetscRealPart(eigs[perm[i]]); 189 c[i] = PetscImaginaryPart(eigs[perm[i]]); 190 } 191 ierr = PetscFree(perm);CHKERRQ(ierr); 192 #endif 193 PetscFunctionReturn(0); 194 } 195 196 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_HAVE_ESSL) 197 #undef __FUNCT__ 198 #define __FUNCT__ "KSPComputeRitz_GMRES" 199 PetscErrorCode KSPComputeRitz_GMRES(KSP ksp,PetscBool ritz,PetscBool small,PetscInt *nrit,Vec S[],PetscReal *tetar,PetscReal *tetai) 200 { 201 KSP_GMRES *gmres = (KSP_GMRES*)ksp->data; 202 PetscErrorCode ierr; 203 PetscInt n = gmres->it + 1,N = gmres->max_k + 1,NbrRitz,nb=0; 204 PetscInt i,j,*perm; 205 PetscReal *H,*Q,*Ht; /* H Hessenberg Matrix and Q matrix of eigenvectors of H*/ 206 PetscReal *wr,*wi,*modul; /* Real and imaginary part and modul of the Ritz values*/ 207 PetscReal *SR,*work; 208 PetscBLASInt bn,bN,lwork,idummy; 209 PetscScalar *t,sdummy; 210 211 PetscFunctionBegin; 212 /* n: size of the Hessenberg matrix */ 213 if (gmres->fullcycle) n = N-1; 214 /* NbrRitz: number of (harmonic) Ritz pairs to extract */ 215 NbrRitz = PetscMin(*nrit,n); 216 217 /* Definition of PetscBLASInt for lapack routines*/ 218 ierr = PetscBLASIntCast(n,&bn);CHKERRQ(ierr); 219 ierr = PetscBLASIntCast(N,&bN);CHKERRQ(ierr); 220 ierr = PetscBLASIntCast(N,&idummy);CHKERRQ(ierr); 221 ierr = PetscBLASIntCast(5*N,&lwork);CHKERRQ(ierr); 222 /* Memory allocation */ 223 ierr = PetscMalloc1(bN*bN,&H);CHKERRQ(ierr); 224 ierr = PetscMalloc1(bn*bn,&Q);CHKERRQ(ierr); 225 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 226 ierr = PetscMalloc1(n,&wr);CHKERRQ(ierr); 227 ierr = PetscMalloc1(n,&wi);CHKERRQ(ierr); 228 229 /* copy H matrix to work space */ 230 if (gmres->fullcycle) { 231 ierr = PetscMemcpy(H,gmres->hes_ritz,bN*bN*sizeof(PetscReal));CHKERRQ(ierr); 232 } else { 233 ierr = PetscMemcpy(H,gmres->hes_origin,bN*bN*sizeof(PetscReal));CHKERRQ(ierr); 234 } 235 236 /* Modify H to compute Harmonic Ritz pairs H = H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */ 237 if (!ritz) { 238 /* Transpose the Hessenberg matrix => Ht */ 239 ierr = PetscMalloc1(bn*bn,&Ht);CHKERRQ(ierr); 240 for (i=0; i<bn; i++) { 241 for (j=0; j<bn; j++) { 242 Ht[i*bn+j] = H[j*bN+i]; 243 } 244 } 245 /* Solve the system H^T*t = h^2_{m+1,m}e_m */ 246 ierr = PetscCalloc1(bn,&t);CHKERRQ(ierr); 247 /* t = h^2_{m+1,m}e_m */ 248 if (gmres->fullcycle) { 249 t[bn-1] = PetscSqr(gmres->hes_ritz[(bn-1)*bN+bn]); 250 } else { 251 t[bn-1] = PetscSqr(gmres->hes_origin[(bn-1)*bN+bn]); 252 } 253 /* Call the LAPACK routine dgesv to compute t = H^{-T}*t */ 254 #if defined(PETSC_MISSING_LAPACK_GESV) 255 SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GESV - Lapack routine is unavailable."); 256 #else 257 { 258 PetscBLASInt info; 259 PetscBLASInt nrhs = 1; 260 PetscBLASInt *ipiv; 261 ierr = PetscMalloc1(bn,&ipiv);CHKERRQ(ierr); 262 PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&bn,&nrhs,Ht,&bn,ipiv,t,&bn,&info)); 263 if (info) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_PLIB,"Error while calling the Lapack routine DGESV"); 264 ierr = PetscFree(ipiv);CHKERRQ(ierr); 265 ierr = PetscFree(Ht);CHKERRQ(ierr); 266 } 267 #endif 268 /* Now form H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */ 269 for (i=0; i<bn; i++) H[(bn-1)*bn+i] += t[i]; 270 ierr = PetscFree(t);CHKERRQ(ierr); 271 } 272 273 /* Compute (harmonic) Ritz pairs */ 274 #if defined(PETSC_MISSING_LAPACK_HSEQR) 275 SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GEEV - Lapack routine is unavailable\nNot able to provide eigen values."); 276 #else 277 { 278 PetscBLASInt info; 279 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 280 PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","V",&bn,H,&bN,wr,wi,&sdummy,&idummy,Q,&bn,work,&lwork,&info)); 281 if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine"); 282 } 283 #endif 284 /* sort the (harmonic) Ritz values */ 285 ierr = PetscMalloc1(n,&modul);CHKERRQ(ierr); 286 ierr = PetscMalloc1(n,&perm);CHKERRQ(ierr); 287 for (i=0; i<n; i++) modul[i] = PetscSqrtReal(wr[i]*wr[i]+wi[i]*wi[i]); 288 for (i=0; i<n; i++) perm[i] = i; 289 ierr = PetscSortRealWithPermutation(n,modul,perm);CHKERRQ(ierr); 290 /* count the number of extracted Ritz or Harmonic Ritz pairs (with complex conjugates) */ 291 if (small) { 292 while (nb < NbrRitz) { 293 if (!wi[perm[nb]]) nb += 1; 294 else nb += 2; 295 } 296 ierr = PetscMalloc(nb*n*sizeof(PetscReal),&SR);CHKERRQ(ierr); 297 for (i=0; i<nb; i++) { 298 tetar[i] = wr[perm[i]]; 299 tetai[i] = wi[perm[i]]; 300 ierr = PetscMemcpy(&SR[i*n],&(Q[perm[i]*bn]),n*sizeof(PetscReal));CHKERRQ(ierr); 301 } 302 } else { 303 while (nb < NbrRitz) { 304 if (wi[perm[n-nb-1]] == 0) nb += 1; 305 else nb += 2; 306 } 307 ierr = PetscMalloc(nb*n*sizeof(PetscReal),&SR);CHKERRQ(ierr); 308 for (i=0; i<nb; i++) { 309 tetar[i] = wr[perm[n-nb+i]]; 310 tetai[i] = wi[perm[n-nb+i]]; 311 ierr = PetscMemcpy(&SR[i*n], &(Q[perm[n-nb+i]*bn]), n*sizeof(PetscReal));CHKERRQ(ierr); 312 } 313 } 314 ierr = PetscFree(modul);CHKERRQ(ierr); 315 ierr = PetscFree(perm);CHKERRQ(ierr); 316 317 /* Form the Ritz or Harmonic Ritz vectors S=VV*Sr, 318 where the columns of VV correspond to the basis of the Krylov subspace */ 319 if (gmres->fullcycle) { 320 for (j=0; j<nb; j++) { 321 ierr = VecZeroEntries(S[j]);CHKERRQ(ierr); 322 ierr = VecMAXPY(S[j],n,&SR[j*n],gmres->vecb);CHKERRQ(ierr); 323 } 324 } else { 325 for (j=0; j<nb; j++) { 326 ierr = VecZeroEntries(S[j]);CHKERRQ(ierr); 327 ierr = VecMAXPY(S[j],n,&SR[j*n],&VEC_VV(0));CHKERRQ(ierr); 328 } 329 } 330 *nrit = nb; 331 ierr = PetscFree(H);CHKERRQ(ierr); 332 ierr = PetscFree(Q);CHKERRQ(ierr); 333 ierr = PetscFree(SR);CHKERRQ(ierr); 334 ierr = PetscFree(wr);CHKERRQ(ierr); 335 ierr = PetscFree(wi);CHKERRQ(ierr); 336 PetscFunctionReturn(0); 337 } 338 #endif 339 340 341