1 2 #include <../src/ksp/ksp/impls/gmres/gmresimpl.h> 3 #include <petscblaslapack.h> 4 5 PetscErrorCode KSPComputeExtremeSingularValues_GMRES(KSP ksp,PetscReal *emax,PetscReal *emin) 6 { 7 #if defined(PETSC_MISSING_LAPACK_GESVD) 8 PetscFunctionBegin; 9 /* 10 The Cray math libraries on T3D/T3E, and early versions of Intel Math Kernel Libraries (MKL) 11 for PCs do not seem to have the DGESVD() lapack routines 12 */ 13 SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GESVD - Lapack routine is unavailable\nNot able to provide singular value estimates."); 14 #else 15 KSP_GMRES *gmres = (KSP_GMRES*)ksp->data; 16 PetscErrorCode ierr; 17 PetscInt n = gmres->it + 1,i,N = gmres->max_k + 2; 18 PetscBLASInt bn, bN,lwork, idummy,lierr; 19 PetscScalar *R = gmres->Rsvd,*work = R + N*N,sdummy; 20 PetscReal *realpart = gmres->Dsvd; 21 22 PetscFunctionBegin; 23 ierr = PetscBLASIntCast(n,&bn);CHKERRQ(ierr); 24 ierr = PetscBLASIntCast(N,&bN);CHKERRQ(ierr); 25 ierr = PetscBLASIntCast(5*N,&lwork);CHKERRQ(ierr); 26 ierr = PetscBLASIntCast(N,&idummy);CHKERRQ(ierr); 27 if (n <= 0) { 28 *emax = *emin = 1.0; 29 PetscFunctionReturn(0); 30 } 31 /* copy R matrix to work space */ 32 ierr = PetscMemcpy(R,gmres->hh_origin,(gmres->max_k+2)*(gmres->max_k+1)*sizeof(PetscScalar));CHKERRQ(ierr); 33 34 /* zero below diagonal garbage */ 35 for (i=0; i<n; i++) R[i*N+i+1] = 0.0; 36 37 /* compute Singular Values */ 38 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 39 #if !defined(PETSC_USE_COMPLEX) 40 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","N",&bn,&bn,R,&bN,realpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,&lierr)); 41 #else 42 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","N",&bn,&bn,R,&bN,realpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,realpart+N,&lierr)); 43 #endif 44 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SVD Lapack routine %d",(int)lierr); 45 ierr = PetscFPTrapPop();CHKERRQ(ierr); 46 47 *emin = realpart[n-1]; 48 *emax = realpart[0]; 49 #endif 50 PetscFunctionReturn(0); 51 } 52 53 /* ------------------------------------------------------------------------ */ 54 /* ESSL has a different calling sequence for dgeev() and zgeev() than standard LAPACK */ 55 PetscErrorCode KSPComputeEigenvalues_GMRES(KSP ksp,PetscInt nmax,PetscReal *r,PetscReal *c,PetscInt *neig) 56 { 57 #if defined(PETSC_HAVE_ESSL) 58 KSP_GMRES *gmres = (KSP_GMRES*)ksp->data; 59 PetscErrorCode ierr; 60 PetscInt n = gmres->it + 1,N = gmres->max_k + 1; 61 PetscInt i,*perm; 62 PetscScalar *R = gmres->Rsvd; 63 PetscScalar *cwork = R + N*N,sdummy; 64 PetscReal *work,*realpart = gmres->Dsvd; 65 PetscBLASInt zero = 0,bn,bN,idummy,lwork; 66 67 PetscFunctionBegin; 68 ierr = PetscBLASIntCast(n,&bn);CHKERRQ(ierr); 69 ierr = PetscBLASIntCast(N,&bN);CHKERRQ(ierr); 70 idummy = -1; /* unused */ 71 ierr = PetscBLASIntCast(5*N,&lwork);CHKERRQ(ierr); 72 if (nmax < n) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_SIZ,"Not enough room in work space r and c for eigenvalues"); 73 *neig = n; 74 75 if (!n) PetscFunctionReturn(0); 76 77 /* copy R matrix to work space */ 78 ierr = PetscMemcpy(R,gmres->hes_origin,N*N*sizeof(PetscScalar));CHKERRQ(ierr); 79 80 /* compute eigenvalues */ 81 82 /* for ESSL version need really cwork of length N (complex), 2N 83 (real); already at least 5N of space has been allocated */ 84 85 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 86 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 87 PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_(&zero,R,&bN,cwork,&sdummy,&idummy,&idummy,&bn,work,&lwork)); 88 ierr = PetscFPTrapPop();CHKERRQ(ierr); 89 ierr = PetscFree(work);CHKERRQ(ierr); 90 91 /* For now we stick with the convention of storing the real and imaginary 92 components of evalues separately. But is this what we really want? */ 93 ierr = PetscMalloc1(n,&perm);CHKERRQ(ierr); 94 95 #if !defined(PETSC_USE_COMPLEX) 96 for (i=0; i<n; i++) { 97 realpart[i] = cwork[2*i]; 98 perm[i] = i; 99 } 100 ierr = PetscSortRealWithPermutation(n,realpart,perm);CHKERRQ(ierr); 101 for (i=0; i<n; i++) { 102 r[i] = cwork[2*perm[i]]; 103 c[i] = cwork[2*perm[i]+1]; 104 } 105 #else 106 for (i=0; i<n; i++) { 107 realpart[i] = PetscRealPart(cwork[i]); 108 perm[i] = i; 109 } 110 ierr = PetscSortRealWithPermutation(n,realpart,perm);CHKERRQ(ierr); 111 for (i=0; i<n; i++) { 112 r[i] = PetscRealPart(cwork[perm[i]]); 113 c[i] = PetscImaginaryPart(cwork[perm[i]]); 114 } 115 #endif 116 ierr = PetscFree(perm);CHKERRQ(ierr); 117 #elif defined(PETSC_MISSING_LAPACK_GEEV) 118 PetscFunctionBegin; 119 SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GEEV - Lapack routine is unavailable\nNot able to provide eigen values."); 120 #elif !defined(PETSC_USE_COMPLEX) 121 KSP_GMRES *gmres = (KSP_GMRES*)ksp->data; 122 PetscErrorCode ierr; 123 PetscInt n = gmres->it + 1,N = gmres->max_k + 1,i,*perm; 124 PetscBLASInt bn, bN, lwork, idummy, lierr; 125 PetscScalar *R = gmres->Rsvd,*work = R + N*N; 126 PetscScalar *realpart = gmres->Dsvd,*imagpart = realpart + N,sdummy; 127 128 PetscFunctionBegin; 129 ierr = PetscBLASIntCast(n,&bn);CHKERRQ(ierr); 130 ierr = PetscBLASIntCast(N,&bN);CHKERRQ(ierr); 131 ierr = PetscBLASIntCast(5*N,&lwork);CHKERRQ(ierr); 132 ierr = PetscBLASIntCast(N,&idummy);CHKERRQ(ierr); 133 if (nmax < n) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_SIZ,"Not enough room in work space r and c for eigenvalues"); 134 *neig = n; 135 136 if (!n) PetscFunctionReturn(0); 137 138 /* copy R matrix to work space */ 139 ierr = PetscMemcpy(R,gmres->hes_origin,N*N*sizeof(PetscScalar));CHKERRQ(ierr); 140 141 /* compute eigenvalues */ 142 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 143 PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","N",&bn,R,&bN,realpart,imagpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,&lierr)); 144 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine %d",(int)lierr); 145 ierr = PetscFPTrapPop();CHKERRQ(ierr); 146 ierr = PetscMalloc1(n,&perm);CHKERRQ(ierr); 147 for (i=0; i<n; i++) perm[i] = i; 148 ierr = PetscSortRealWithPermutation(n,realpart,perm);CHKERRQ(ierr); 149 for (i=0; i<n; i++) { 150 r[i] = realpart[perm[i]]; 151 c[i] = imagpart[perm[i]]; 152 } 153 ierr = PetscFree(perm);CHKERRQ(ierr); 154 #else 155 KSP_GMRES *gmres = (KSP_GMRES*)ksp->data; 156 PetscErrorCode ierr; 157 PetscInt n = gmres->it + 1,N = gmres->max_k + 1,i,*perm; 158 PetscScalar *R = gmres->Rsvd,*work = R + N*N,*eigs = work + 5*N,sdummy; 159 PetscBLASInt bn,bN,lwork,idummy,lierr; 160 161 PetscFunctionBegin; 162 ierr = PetscBLASIntCast(n,&bn);CHKERRQ(ierr); 163 ierr = PetscBLASIntCast(N,&bN);CHKERRQ(ierr); 164 ierr = PetscBLASIntCast(5*N,&lwork);CHKERRQ(ierr); 165 ierr = PetscBLASIntCast(N,&idummy);CHKERRQ(ierr); 166 if (nmax < n) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_SIZ,"Not enough room in work space r and c for eigenvalues"); 167 *neig = n; 168 169 if (!n) PetscFunctionReturn(0); 170 171 /* copy R matrix to work space */ 172 ierr = PetscMemcpy(R,gmres->hes_origin,N*N*sizeof(PetscScalar));CHKERRQ(ierr); 173 174 /* compute eigenvalues */ 175 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 176 PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","N",&bn,R,&bN,eigs,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,gmres->Dsvd,&lierr)); 177 if (lierr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine"); 178 ierr = PetscFPTrapPop();CHKERRQ(ierr); 179 ierr = PetscMalloc1(n,&perm);CHKERRQ(ierr); 180 for (i=0; i<n; i++) perm[i] = i; 181 for (i=0; i<n; i++) r[i] = PetscRealPart(eigs[i]); 182 ierr = PetscSortRealWithPermutation(n,r,perm);CHKERRQ(ierr); 183 for (i=0; i<n; i++) { 184 r[i] = PetscRealPart(eigs[perm[i]]); 185 c[i] = PetscImaginaryPart(eigs[perm[i]]); 186 } 187 ierr = PetscFree(perm);CHKERRQ(ierr); 188 #endif 189 PetscFunctionReturn(0); 190 } 191 192 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_HAVE_ESSL) 193 PetscErrorCode KSPComputeRitz_GMRES(KSP ksp,PetscBool ritz,PetscBool small,PetscInt *nrit,Vec S[],PetscReal *tetar,PetscReal *tetai) 194 { 195 KSP_GMRES *gmres = (KSP_GMRES*)ksp->data; 196 PetscErrorCode ierr; 197 PetscInt n = gmres->it + 1,N = gmres->max_k + 1,NbrRitz,nb=0; 198 PetscInt i,j,*perm; 199 PetscReal *H,*Q,*Ht; /* H Hessenberg Matrix and Q matrix of eigenvectors of H*/ 200 PetscReal *wr,*wi,*modul; /* Real and imaginary part and modul of the Ritz values*/ 201 PetscReal *SR,*work; 202 PetscBLASInt bn,bN,lwork,idummy; 203 PetscScalar *t,sdummy; 204 205 PetscFunctionBegin; 206 /* n: size of the Hessenberg matrix */ 207 if (gmres->fullcycle) n = N-1; 208 /* NbrRitz: number of (harmonic) Ritz pairs to extract */ 209 NbrRitz = PetscMin(*nrit,n); 210 211 /* Definition of PetscBLASInt for lapack routines*/ 212 ierr = PetscBLASIntCast(n,&bn);CHKERRQ(ierr); 213 ierr = PetscBLASIntCast(N,&bN);CHKERRQ(ierr); 214 ierr = PetscBLASIntCast(N,&idummy);CHKERRQ(ierr); 215 ierr = PetscBLASIntCast(5*N,&lwork);CHKERRQ(ierr); 216 /* Memory allocation */ 217 ierr = PetscMalloc1(bN*bN,&H);CHKERRQ(ierr); 218 ierr = PetscMalloc1(bn*bn,&Q);CHKERRQ(ierr); 219 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 220 ierr = PetscMalloc1(n,&wr);CHKERRQ(ierr); 221 ierr = PetscMalloc1(n,&wi);CHKERRQ(ierr); 222 223 /* copy H matrix to work space */ 224 if (gmres->fullcycle) { 225 ierr = PetscMemcpy(H,gmres->hes_ritz,bN*bN*sizeof(PetscReal));CHKERRQ(ierr); 226 } else { 227 ierr = PetscMemcpy(H,gmres->hes_origin,bN*bN*sizeof(PetscReal));CHKERRQ(ierr); 228 } 229 230 /* Modify H to compute Harmonic Ritz pairs H = H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */ 231 if (!ritz) { 232 /* Transpose the Hessenberg matrix => Ht */ 233 ierr = PetscMalloc1(bn*bn,&Ht);CHKERRQ(ierr); 234 for (i=0; i<bn; i++) { 235 for (j=0; j<bn; j++) { 236 Ht[i*bn+j] = H[j*bN+i]; 237 } 238 } 239 /* Solve the system H^T*t = h^2_{m+1,m}e_m */ 240 ierr = PetscCalloc1(bn,&t);CHKERRQ(ierr); 241 /* t = h^2_{m+1,m}e_m */ 242 if (gmres->fullcycle) { 243 t[bn-1] = PetscSqr(gmres->hes_ritz[(bn-1)*bN+bn]); 244 } else { 245 t[bn-1] = PetscSqr(gmres->hes_origin[(bn-1)*bN+bn]); 246 } 247 /* Call the LAPACK routine dgesv to compute t = H^{-T}*t */ 248 #if defined(PETSC_MISSING_LAPACK_GESV) 249 SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GESV - Lapack routine is unavailable."); 250 #else 251 { 252 PetscBLASInt info; 253 PetscBLASInt nrhs = 1; 254 PetscBLASInt *ipiv; 255 ierr = PetscMalloc1(bn,&ipiv);CHKERRQ(ierr); 256 PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&bn,&nrhs,Ht,&bn,ipiv,t,&bn,&info)); 257 if (info) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_PLIB,"Error while calling the Lapack routine DGESV"); 258 ierr = PetscFree(ipiv);CHKERRQ(ierr); 259 ierr = PetscFree(Ht);CHKERRQ(ierr); 260 } 261 #endif 262 /* Now form H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */ 263 for (i=0; i<bn; i++) H[(bn-1)*bn+i] += t[i]; 264 ierr = PetscFree(t);CHKERRQ(ierr); 265 } 266 267 /* Compute (harmonic) Ritz pairs */ 268 #if defined(PETSC_MISSING_LAPACK_HSEQR) 269 SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GEEV - Lapack routine is unavailable\nNot able to provide eigen values."); 270 #else 271 { 272 PetscBLASInt info; 273 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 274 PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","V",&bn,H,&bN,wr,wi,&sdummy,&idummy,Q,&bn,work,&lwork,&info)); 275 if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine"); 276 } 277 #endif 278 /* sort the (harmonic) Ritz values */ 279 ierr = PetscMalloc1(n,&modul);CHKERRQ(ierr); 280 ierr = PetscMalloc1(n,&perm);CHKERRQ(ierr); 281 for (i=0; i<n; i++) modul[i] = PetscSqrtReal(wr[i]*wr[i]+wi[i]*wi[i]); 282 for (i=0; i<n; i++) perm[i] = i; 283 ierr = PetscSortRealWithPermutation(n,modul,perm);CHKERRQ(ierr); 284 /* count the number of extracted Ritz or Harmonic Ritz pairs (with complex conjugates) */ 285 if (small) { 286 while (nb < NbrRitz) { 287 if (!wi[perm[nb]]) nb += 1; 288 else nb += 2; 289 } 290 ierr = PetscMalloc1(nb*n,&SR);CHKERRQ(ierr); 291 for (i=0; i<nb; i++) { 292 tetar[i] = wr[perm[i]]; 293 tetai[i] = wi[perm[i]]; 294 ierr = PetscMemcpy(&SR[i*n],&(Q[perm[i]*bn]),n*sizeof(PetscReal));CHKERRQ(ierr); 295 } 296 } else { 297 while (nb < NbrRitz) { 298 if (wi[perm[n-nb-1]] == 0) nb += 1; 299 else nb += 2; 300 } 301 ierr = PetscMalloc1(nb*n,&SR);CHKERRQ(ierr); 302 for (i=0; i<nb; i++) { 303 tetar[i] = wr[perm[n-nb+i]]; 304 tetai[i] = wi[perm[n-nb+i]]; 305 ierr = PetscMemcpy(&SR[i*n], &(Q[perm[n-nb+i]*bn]), n*sizeof(PetscReal));CHKERRQ(ierr); 306 } 307 } 308 ierr = PetscFree(modul);CHKERRQ(ierr); 309 ierr = PetscFree(perm);CHKERRQ(ierr); 310 311 /* Form the Ritz or Harmonic Ritz vectors S=VV*Sr, 312 where the columns of VV correspond to the basis of the Krylov subspace */ 313 if (gmres->fullcycle) { 314 for (j=0; j<nb; j++) { 315 ierr = VecZeroEntries(S[j]);CHKERRQ(ierr); 316 ierr = VecMAXPY(S[j],n,&SR[j*n],gmres->vecb);CHKERRQ(ierr); 317 } 318 } else { 319 for (j=0; j<nb; j++) { 320 ierr = VecZeroEntries(S[j]);CHKERRQ(ierr); 321 ierr = VecMAXPY(S[j],n,&SR[j*n],&VEC_VV(0));CHKERRQ(ierr); 322 } 323 } 324 *nrit = nb; 325 ierr = PetscFree(H);CHKERRQ(ierr); 326 ierr = PetscFree(Q);CHKERRQ(ierr); 327 ierr = PetscFree(SR);CHKERRQ(ierr); 328 ierr = PetscFree(wr);CHKERRQ(ierr); 329 ierr = PetscFree(wi);CHKERRQ(ierr); 330 PetscFunctionReturn(0); 331 } 332 #endif 333 334 335