1 2 #include <../src/ksp/ksp/impls/gmres/gmresimpl.h> 3 #include <petscblaslapack.h> 4 5 PetscErrorCode KSPComputeExtremeSingularValues_GMRES(KSP ksp,PetscReal *emax,PetscReal *emin) 6 { 7 #if defined(PETSC_MISSING_LAPACK_GESVD) 8 PetscFunctionBegin; 9 /* 10 The Cray math libraries on T3D/T3E, and early versions of Intel Math Kernel Libraries (MKL) 11 for PCs do not seem to have the DGESVD() lapack routines 12 */ 13 SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GESVD - Lapack routine is unavailable\nNot able to provide singular value estimates."); 14 #else 15 KSP_GMRES *gmres = (KSP_GMRES*)ksp->data; 16 PetscErrorCode ierr; 17 PetscInt n = gmres->it + 1,i,N = gmres->max_k + 2; 18 PetscBLASInt bn, bN,lwork, idummy,lierr; 19 PetscScalar *R = gmres->Rsvd,*work = R + N*N,sdummy = 0; 20 PetscReal *realpart = gmres->Dsvd; 21 22 PetscFunctionBegin; 23 ierr = PetscBLASIntCast(n,&bn);CHKERRQ(ierr); 24 ierr = PetscBLASIntCast(N,&bN);CHKERRQ(ierr); 25 ierr = PetscBLASIntCast(5*N,&lwork);CHKERRQ(ierr); 26 ierr = PetscBLASIntCast(N,&idummy);CHKERRQ(ierr); 27 if (n <= 0) { 28 *emax = *emin = 1.0; 29 PetscFunctionReturn(0); 30 } 31 /* copy R matrix to work space */ 32 ierr = PetscArraycpy(R,gmres->hh_origin,(gmres->max_k+2)*(gmres->max_k+1));CHKERRQ(ierr); 33 34 /* zero below diagonal garbage */ 35 for (i=0; i<n; i++) R[i*N+i+1] = 0.0; 36 37 /* compute Singular Values */ 38 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 39 #if !defined(PETSC_USE_COMPLEX) 40 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","N",&bn,&bn,R,&bN,realpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,&lierr)); 41 #else 42 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","N",&bn,&bn,R,&bN,realpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,realpart+N,&lierr)); 43 #endif 44 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SVD Lapack routine %d",(int)lierr); 45 ierr = PetscFPTrapPop();CHKERRQ(ierr); 46 47 *emin = realpart[n-1]; 48 *emax = realpart[0]; 49 #endif 50 PetscFunctionReturn(0); 51 } 52 53 /* ------------------------------------------------------------------------ */ 54 /* ESSL has a different calling sequence for dgeev() and zgeev() than standard LAPACK */ 55 PetscErrorCode KSPComputeEigenvalues_GMRES(KSP ksp,PetscInt nmax,PetscReal *r,PetscReal *c,PetscInt *neig) 56 { 57 #if defined(PETSC_HAVE_ESSL) 58 KSP_GMRES *gmres = (KSP_GMRES*)ksp->data; 59 PetscErrorCode ierr; 60 PetscInt n = gmres->it + 1,N = gmres->max_k + 1; 61 PetscInt i,*perm; 62 PetscScalar *R = gmres->Rsvd; 63 PetscScalar *cwork = R + N*N,sdummy = 0; 64 PetscReal *work,*realpart = gmres->Dsvd; 65 PetscBLASInt zero = 0,bn,bN,idummy = -1,lwork; 66 67 PetscFunctionBegin; 68 ierr = PetscBLASIntCast(n,&bn);CHKERRQ(ierr); 69 ierr = PetscBLASIntCast(N,&bN);CHKERRQ(ierr); 70 ierr = PetscBLASIntCast(5*N,&lwork);CHKERRQ(ierr); 71 if (nmax < n) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_SIZ,"Not enough room in work space r and c for eigenvalues"); 72 *neig = n; 73 74 if (!n) PetscFunctionReturn(0); 75 76 /* copy R matrix to work space */ 77 ierr = PetscArraycpy(R,gmres->hes_origin,N*N);CHKERRQ(ierr); 78 79 /* compute eigenvalues */ 80 81 /* for ESSL version need really cwork of length N (complex), 2N 82 (real); already at least 5N of space has been allocated */ 83 84 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 85 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 86 PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_(&zero,R,&bN,cwork,&sdummy,&idummy,&idummy,&bn,work,&lwork)); 87 ierr = PetscFPTrapPop();CHKERRQ(ierr); 88 ierr = PetscFree(work);CHKERRQ(ierr); 89 90 /* For now we stick with the convention of storing the real and imaginary 91 components of evalues separately. But is this what we really want? */ 92 ierr = PetscMalloc1(n,&perm);CHKERRQ(ierr); 93 94 #if !defined(PETSC_USE_COMPLEX) 95 for (i=0; i<n; i++) { 96 realpart[i] = cwork[2*i]; 97 perm[i] = i; 98 } 99 ierr = PetscSortRealWithPermutation(n,realpart,perm);CHKERRQ(ierr); 100 for (i=0; i<n; i++) { 101 r[i] = cwork[2*perm[i]]; 102 c[i] = cwork[2*perm[i]+1]; 103 } 104 #else 105 for (i=0; i<n; i++) { 106 realpart[i] = PetscRealPart(cwork[i]); 107 perm[i] = i; 108 } 109 ierr = PetscSortRealWithPermutation(n,realpart,perm);CHKERRQ(ierr); 110 for (i=0; i<n; i++) { 111 r[i] = PetscRealPart(cwork[perm[i]]); 112 c[i] = PetscImaginaryPart(cwork[perm[i]]); 113 } 114 #endif 115 ierr = PetscFree(perm);CHKERRQ(ierr); 116 #elif defined(PETSC_MISSING_LAPACK_GEEV) 117 PetscFunctionBegin; 118 SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GEEV - Lapack routine is unavailable\nNot able to provide eigen values."); 119 #elif !defined(PETSC_USE_COMPLEX) 120 KSP_GMRES *gmres = (KSP_GMRES*)ksp->data; 121 PetscErrorCode ierr; 122 PetscInt n = gmres->it + 1,N = gmres->max_k + 1,i,*perm; 123 PetscBLASInt bn, bN, lwork, idummy, lierr = -1; 124 PetscScalar *R = gmres->Rsvd,*work = R + N*N; 125 PetscScalar *realpart = gmres->Dsvd,*imagpart = realpart + N,sdummy = 0; 126 127 PetscFunctionBegin; 128 ierr = PetscBLASIntCast(n,&bn);CHKERRQ(ierr); 129 ierr = PetscBLASIntCast(N,&bN);CHKERRQ(ierr); 130 ierr = PetscBLASIntCast(5*N,&lwork);CHKERRQ(ierr); 131 ierr = PetscBLASIntCast(N,&idummy);CHKERRQ(ierr); 132 if (nmax < n) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_SIZ,"Not enough room in work space r and c for eigenvalues"); 133 *neig = n; 134 135 if (!n) PetscFunctionReturn(0); 136 137 /* copy R matrix to work space */ 138 ierr = PetscArraycpy(R,gmres->hes_origin,N*N);CHKERRQ(ierr); 139 140 /* compute eigenvalues */ 141 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 142 PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","N",&bn,R,&bN,realpart,imagpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,&lierr)); 143 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine %d",(int)lierr); 144 ierr = PetscFPTrapPop();CHKERRQ(ierr); 145 ierr = PetscMalloc1(n,&perm);CHKERRQ(ierr); 146 for (i=0; i<n; i++) perm[i] = i; 147 ierr = PetscSortRealWithPermutation(n,realpart,perm);CHKERRQ(ierr); 148 for (i=0; i<n; i++) { 149 r[i] = realpart[perm[i]]; 150 c[i] = imagpart[perm[i]]; 151 } 152 ierr = PetscFree(perm);CHKERRQ(ierr); 153 #else 154 KSP_GMRES *gmres = (KSP_GMRES*)ksp->data; 155 PetscErrorCode ierr; 156 PetscInt n = gmres->it + 1,N = gmres->max_k + 1,i,*perm; 157 PetscScalar *R = gmres->Rsvd,*work = R + N*N,*eigs = work + 5*N,sdummy; 158 PetscBLASInt bn,bN,lwork,idummy,lierr = -1; 159 160 PetscFunctionBegin; 161 ierr = PetscBLASIntCast(n,&bn);CHKERRQ(ierr); 162 ierr = PetscBLASIntCast(N,&bN);CHKERRQ(ierr); 163 ierr = PetscBLASIntCast(5*N,&lwork);CHKERRQ(ierr); 164 ierr = PetscBLASIntCast(N,&idummy);CHKERRQ(ierr); 165 if (nmax < n) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_SIZ,"Not enough room in work space r and c for eigenvalues"); 166 *neig = n; 167 168 if (!n) PetscFunctionReturn(0); 169 170 /* copy R matrix to work space */ 171 ierr = PetscArraycpy(R,gmres->hes_origin,N*N);CHKERRQ(ierr); 172 173 /* compute eigenvalues */ 174 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 175 PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","N",&bn,R,&bN,eigs,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,gmres->Dsvd,&lierr)); 176 if (lierr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine"); 177 ierr = PetscFPTrapPop();CHKERRQ(ierr); 178 ierr = PetscMalloc1(n,&perm);CHKERRQ(ierr); 179 for (i=0; i<n; i++) perm[i] = i; 180 for (i=0; i<n; i++) r[i] = PetscRealPart(eigs[i]); 181 ierr = PetscSortRealWithPermutation(n,r,perm);CHKERRQ(ierr); 182 for (i=0; i<n; i++) { 183 r[i] = PetscRealPart(eigs[perm[i]]); 184 c[i] = PetscImaginaryPart(eigs[perm[i]]); 185 } 186 ierr = PetscFree(perm);CHKERRQ(ierr); 187 #endif 188 PetscFunctionReturn(0); 189 } 190 191 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_HAVE_ESSL) 192 PetscErrorCode KSPComputeRitz_GMRES(KSP ksp,PetscBool ritz,PetscBool small,PetscInt *nrit,Vec S[],PetscReal *tetar,PetscReal *tetai) 193 { 194 KSP_GMRES *gmres = (KSP_GMRES*)ksp->data; 195 PetscErrorCode ierr; 196 PetscInt n = gmres->it + 1,N = gmres->max_k + 1,NbrRitz,nb=0; 197 PetscInt i,j,*perm; 198 PetscReal *H,*Q,*Ht; /* H Hessenberg Matrix and Q matrix of eigenvectors of H*/ 199 PetscReal *wr,*wi,*modul; /* Real and imaginary part and modul of the Ritz values*/ 200 PetscReal *SR,*work; 201 PetscBLASInt bn,bN,lwork,idummy; 202 PetscScalar *t,sdummy = 0; 203 204 PetscFunctionBegin; 205 /* n: size of the Hessenberg matrix */ 206 if (gmres->fullcycle) n = N-1; 207 /* NbrRitz: number of (harmonic) Ritz pairs to extract */ 208 NbrRitz = PetscMin(*nrit,n); 209 210 /* Definition of PetscBLASInt for lapack routines*/ 211 ierr = PetscBLASIntCast(n,&bn);CHKERRQ(ierr); 212 ierr = PetscBLASIntCast(N,&bN);CHKERRQ(ierr); 213 ierr = PetscBLASIntCast(N,&idummy);CHKERRQ(ierr); 214 ierr = PetscBLASIntCast(5*N,&lwork);CHKERRQ(ierr); 215 /* Memory allocation */ 216 ierr = PetscMalloc1(bN*bN,&H);CHKERRQ(ierr); 217 ierr = PetscMalloc1(bn*bn,&Q);CHKERRQ(ierr); 218 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 219 ierr = PetscMalloc1(n,&wr);CHKERRQ(ierr); 220 ierr = PetscMalloc1(n,&wi);CHKERRQ(ierr); 221 222 /* copy H matrix to work space */ 223 if (gmres->fullcycle) { 224 ierr = PetscArraycpy(H,gmres->hes_ritz,bN*bN);CHKERRQ(ierr); 225 } else { 226 ierr = PetscArraycpy(H,gmres->hes_origin,bN*bN);CHKERRQ(ierr); 227 } 228 229 /* Modify H to compute Harmonic Ritz pairs H = H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */ 230 if (!ritz) { 231 /* Transpose the Hessenberg matrix => Ht */ 232 ierr = PetscMalloc1(bn*bn,&Ht);CHKERRQ(ierr); 233 for (i=0; i<bn; i++) { 234 for (j=0; j<bn; j++) { 235 Ht[i*bn+j] = H[j*bN+i]; 236 } 237 } 238 /* Solve the system H^T*t = h^2_{m+1,m}e_m */ 239 ierr = PetscCalloc1(bn,&t);CHKERRQ(ierr); 240 /* t = h^2_{m+1,m}e_m */ 241 if (gmres->fullcycle) { 242 t[bn-1] = PetscSqr(gmres->hes_ritz[(bn-1)*bN+bn]); 243 } else { 244 t[bn-1] = PetscSqr(gmres->hes_origin[(bn-1)*bN+bn]); 245 } 246 /* Call the LAPACK routine dgesv to compute t = H^{-T}*t */ 247 #if defined(PETSC_MISSING_LAPACK_GESV) 248 SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GESV - Lapack routine is unavailable."); 249 #else 250 { 251 PetscBLASInt info; 252 PetscBLASInt nrhs = 1; 253 PetscBLASInt *ipiv; 254 ierr = PetscMalloc1(bn,&ipiv);CHKERRQ(ierr); 255 PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&bn,&nrhs,Ht,&bn,ipiv,t,&bn,&info)); 256 if (info) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_PLIB,"Error while calling the Lapack routine DGESV"); 257 ierr = PetscFree(ipiv);CHKERRQ(ierr); 258 ierr = PetscFree(Ht);CHKERRQ(ierr); 259 } 260 #endif 261 /* Now form H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */ 262 for (i=0; i<bn; i++) H[(bn-1)*bn+i] += t[i]; 263 ierr = PetscFree(t);CHKERRQ(ierr); 264 } 265 266 /* Compute (harmonic) Ritz pairs */ 267 #if defined(PETSC_MISSING_LAPACK_HSEQR) 268 SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GEEV - Lapack routine is unavailable\nNot able to provide eigen values."); 269 #else 270 { 271 PetscBLASInt info; 272 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 273 PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","V",&bn,H,&bN,wr,wi,&sdummy,&idummy,Q,&bn,work,&lwork,&info)); 274 if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine"); 275 } 276 #endif 277 /* sort the (harmonic) Ritz values */ 278 ierr = PetscMalloc1(n,&modul);CHKERRQ(ierr); 279 ierr = PetscMalloc1(n,&perm);CHKERRQ(ierr); 280 for (i=0; i<n; i++) modul[i] = PetscSqrtReal(wr[i]*wr[i]+wi[i]*wi[i]); 281 for (i=0; i<n; i++) perm[i] = i; 282 ierr = PetscSortRealWithPermutation(n,modul,perm);CHKERRQ(ierr); 283 /* count the number of extracted Ritz or Harmonic Ritz pairs (with complex conjugates) */ 284 if (small) { 285 while (nb < NbrRitz) { 286 if (!wi[perm[nb]]) nb += 1; 287 else nb += 2; 288 } 289 ierr = PetscMalloc1(nb*n,&SR);CHKERRQ(ierr); 290 for (i=0; i<nb; i++) { 291 tetar[i] = wr[perm[i]]; 292 tetai[i] = wi[perm[i]]; 293 ierr = PetscArraycpy(&SR[i*n],&(Q[perm[i]*bn]),n);CHKERRQ(ierr); 294 } 295 } else { 296 while (nb < NbrRitz) { 297 if (wi[perm[n-nb-1]] == 0) nb += 1; 298 else nb += 2; 299 } 300 ierr = PetscMalloc1(nb*n,&SR);CHKERRQ(ierr); 301 for (i=0; i<nb; i++) { 302 tetar[i] = wr[perm[n-nb+i]]; 303 tetai[i] = wi[perm[n-nb+i]]; 304 ierr = PetscArraycpy(&SR[i*n], &(Q[perm[n-nb+i]*bn]), n);CHKERRQ(ierr); 305 } 306 } 307 ierr = PetscFree(modul);CHKERRQ(ierr); 308 ierr = PetscFree(perm);CHKERRQ(ierr); 309 310 /* Form the Ritz or Harmonic Ritz vectors S=VV*Sr, 311 where the columns of VV correspond to the basis of the Krylov subspace */ 312 if (gmres->fullcycle) { 313 for (j=0; j<nb; j++) { 314 ierr = VecZeroEntries(S[j]);CHKERRQ(ierr); 315 ierr = VecMAXPY(S[j],n,&SR[j*n],gmres->vecb);CHKERRQ(ierr); 316 } 317 } else { 318 for (j=0; j<nb; j++) { 319 ierr = VecZeroEntries(S[j]);CHKERRQ(ierr); 320 ierr = VecMAXPY(S[j],n,&SR[j*n],&VEC_VV(0));CHKERRQ(ierr); 321 } 322 } 323 *nrit = nb; 324 ierr = PetscFree(H);CHKERRQ(ierr); 325 ierr = PetscFree(Q);CHKERRQ(ierr); 326 ierr = PetscFree(SR);CHKERRQ(ierr); 327 ierr = PetscFree(wr);CHKERRQ(ierr); 328 ierr = PetscFree(wi);CHKERRQ(ierr); 329 PetscFunctionReturn(0); 330 } 331 #endif 332 333 334