1 2 #include <../src/ksp/ksp/impls/gmres/gmresimpl.h> 3 #include <petscblaslapack.h> 4 5 PetscErrorCode KSPComputeExtremeSingularValues_GMRES(KSP ksp,PetscReal *emax,PetscReal *emin) 6 { 7 KSP_GMRES *gmres = (KSP_GMRES*)ksp->data; 8 PetscErrorCode ierr; 9 PetscInt n = gmres->it + 1,i,N = gmres->max_k + 2; 10 PetscBLASInt bn, bN,lwork, idummy,lierr; 11 PetscScalar *R = gmres->Rsvd,*work = R + N*N,sdummy = 0; 12 PetscReal *realpart = gmres->Dsvd; 13 14 PetscFunctionBegin; 15 ierr = PetscBLASIntCast(n,&bn);CHKERRQ(ierr); 16 ierr = PetscBLASIntCast(N,&bN);CHKERRQ(ierr); 17 ierr = PetscBLASIntCast(5*N,&lwork);CHKERRQ(ierr); 18 ierr = PetscBLASIntCast(N,&idummy);CHKERRQ(ierr); 19 if (n <= 0) { 20 *emax = *emin = 1.0; 21 PetscFunctionReturn(0); 22 } 23 /* copy R matrix to work space */ 24 ierr = PetscArraycpy(R,gmres->hh_origin,(gmres->max_k+2)*(gmres->max_k+1));CHKERRQ(ierr); 25 26 /* zero below diagonal garbage */ 27 for (i=0; i<n; i++) R[i*N+i+1] = 0.0; 28 29 /* compute Singular Values */ 30 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 31 #if !defined(PETSC_USE_COMPLEX) 32 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","N",&bn,&bn,R,&bN,realpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,&lierr)); 33 #else 34 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","N",&bn,&bn,R,&bN,realpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,realpart+N,&lierr)); 35 #endif 36 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SVD Lapack routine %d",(int)lierr); 37 ierr = PetscFPTrapPop();CHKERRQ(ierr); 38 39 *emin = realpart[n-1]; 40 *emax = realpart[0]; 41 PetscFunctionReturn(0); 42 } 43 44 /* ------------------------------------------------------------------------ */ 45 /* ESSL has a different calling sequence for dgeev() and zgeev() than standard LAPACK */ 46 PetscErrorCode KSPComputeEigenvalues_GMRES(KSP ksp,PetscInt nmax,PetscReal *r,PetscReal *c,PetscInt *neig) 47 { 48 #if defined(PETSC_HAVE_ESSL) 49 KSP_GMRES *gmres = (KSP_GMRES*)ksp->data; 50 PetscErrorCode ierr; 51 PetscInt n = gmres->it + 1,N = gmres->max_k + 1; 52 PetscInt i,*perm; 53 PetscScalar *R = gmres->Rsvd; 54 PetscScalar *cwork = R + N*N,sdummy = 0; 55 PetscReal *work,*realpart = gmres->Dsvd; 56 PetscBLASInt zero = 0,bn,bN,idummy = -1,lwork; 57 58 PetscFunctionBegin; 59 ierr = PetscBLASIntCast(n,&bn);CHKERRQ(ierr); 60 ierr = PetscBLASIntCast(N,&bN);CHKERRQ(ierr); 61 ierr = PetscBLASIntCast(5*N,&lwork);CHKERRQ(ierr); 62 if (nmax < n) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_SIZ,"Not enough room in work space r and c for eigenvalues"); 63 *neig = n; 64 65 if (!n) PetscFunctionReturn(0); 66 67 /* copy R matrix to work space */ 68 ierr = PetscArraycpy(R,gmres->hes_origin,N*N);CHKERRQ(ierr); 69 70 /* compute eigenvalues */ 71 72 /* for ESSL version need really cwork of length N (complex), 2N 73 (real); already at least 5N of space has been allocated */ 74 75 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 76 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 77 PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_(&zero,R,&bN,cwork,&sdummy,&idummy,&idummy,&bn,work,&lwork)); 78 ierr = PetscFPTrapPop();CHKERRQ(ierr); 79 ierr = PetscFree(work);CHKERRQ(ierr); 80 81 /* For now we stick with the convention of storing the real and imaginary 82 components of evalues separately. But is this what we really want? */ 83 ierr = PetscMalloc1(n,&perm);CHKERRQ(ierr); 84 85 #if !defined(PETSC_USE_COMPLEX) 86 for (i=0; i<n; i++) { 87 realpart[i] = cwork[2*i]; 88 perm[i] = i; 89 } 90 ierr = PetscSortRealWithPermutation(n,realpart,perm);CHKERRQ(ierr); 91 for (i=0; i<n; i++) { 92 r[i] = cwork[2*perm[i]]; 93 c[i] = cwork[2*perm[i]+1]; 94 } 95 #else 96 for (i=0; i<n; i++) { 97 realpart[i] = PetscRealPart(cwork[i]); 98 perm[i] = i; 99 } 100 ierr = PetscSortRealWithPermutation(n,realpart,perm);CHKERRQ(ierr); 101 for (i=0; i<n; i++) { 102 r[i] = PetscRealPart(cwork[perm[i]]); 103 c[i] = PetscImaginaryPart(cwork[perm[i]]); 104 } 105 #endif 106 ierr = PetscFree(perm);CHKERRQ(ierr); 107 #elif !defined(PETSC_USE_COMPLEX) 108 KSP_GMRES *gmres = (KSP_GMRES*)ksp->data; 109 PetscErrorCode ierr; 110 PetscInt n = gmres->it + 1,N = gmres->max_k + 1,i,*perm; 111 PetscBLASInt bn, bN, lwork, idummy, lierr = -1; 112 PetscScalar *R = gmres->Rsvd,*work = R + N*N; 113 PetscScalar *realpart = gmres->Dsvd,*imagpart = realpart + N,sdummy = 0; 114 115 PetscFunctionBegin; 116 ierr = PetscBLASIntCast(n,&bn);CHKERRQ(ierr); 117 ierr = PetscBLASIntCast(N,&bN);CHKERRQ(ierr); 118 ierr = PetscBLASIntCast(5*N,&lwork);CHKERRQ(ierr); 119 ierr = PetscBLASIntCast(N,&idummy);CHKERRQ(ierr); 120 if (nmax < n) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_SIZ,"Not enough room in work space r and c for eigenvalues"); 121 *neig = n; 122 123 if (!n) PetscFunctionReturn(0); 124 125 /* copy R matrix to work space */ 126 ierr = PetscArraycpy(R,gmres->hes_origin,N*N);CHKERRQ(ierr); 127 128 /* compute eigenvalues */ 129 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 130 PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","N",&bn,R,&bN,realpart,imagpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,&lierr)); 131 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine %d",(int)lierr); 132 ierr = PetscFPTrapPop();CHKERRQ(ierr); 133 ierr = PetscMalloc1(n,&perm);CHKERRQ(ierr); 134 for (i=0; i<n; i++) perm[i] = i; 135 ierr = PetscSortRealWithPermutation(n,realpart,perm);CHKERRQ(ierr); 136 for (i=0; i<n; i++) { 137 r[i] = realpart[perm[i]]; 138 c[i] = imagpart[perm[i]]; 139 } 140 ierr = PetscFree(perm);CHKERRQ(ierr); 141 #else 142 KSP_GMRES *gmres = (KSP_GMRES*)ksp->data; 143 PetscErrorCode ierr; 144 PetscInt n = gmres->it + 1,N = gmres->max_k + 1,i,*perm; 145 PetscScalar *R = gmres->Rsvd,*work = R + N*N,*eigs = work + 5*N,sdummy; 146 PetscBLASInt bn,bN,lwork,idummy,lierr = -1; 147 148 PetscFunctionBegin; 149 ierr = PetscBLASIntCast(n,&bn);CHKERRQ(ierr); 150 ierr = PetscBLASIntCast(N,&bN);CHKERRQ(ierr); 151 ierr = PetscBLASIntCast(5*N,&lwork);CHKERRQ(ierr); 152 ierr = PetscBLASIntCast(N,&idummy);CHKERRQ(ierr); 153 if (nmax < n) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_SIZ,"Not enough room in work space r and c for eigenvalues"); 154 *neig = n; 155 156 if (!n) PetscFunctionReturn(0); 157 158 /* copy R matrix to work space */ 159 ierr = PetscArraycpy(R,gmres->hes_origin,N*N);CHKERRQ(ierr); 160 161 /* compute eigenvalues */ 162 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 163 PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","N",&bn,R,&bN,eigs,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,gmres->Dsvd,&lierr)); 164 if (lierr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine"); 165 ierr = PetscFPTrapPop();CHKERRQ(ierr); 166 ierr = PetscMalloc1(n,&perm);CHKERRQ(ierr); 167 for (i=0; i<n; i++) perm[i] = i; 168 for (i=0; i<n; i++) r[i] = PetscRealPart(eigs[i]); 169 ierr = PetscSortRealWithPermutation(n,r,perm);CHKERRQ(ierr); 170 for (i=0; i<n; i++) { 171 r[i] = PetscRealPart(eigs[perm[i]]); 172 c[i] = PetscImaginaryPart(eigs[perm[i]]); 173 } 174 ierr = PetscFree(perm);CHKERRQ(ierr); 175 #endif 176 PetscFunctionReturn(0); 177 } 178 179 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_HAVE_ESSL) 180 PetscErrorCode KSPComputeRitz_GMRES(KSP ksp,PetscBool ritz,PetscBool small,PetscInt *nrit,Vec S[],PetscReal *tetar,PetscReal *tetai) 181 { 182 KSP_GMRES *gmres = (KSP_GMRES*)ksp->data; 183 PetscErrorCode ierr; 184 PetscInt n = gmres->it + 1,N = gmres->max_k + 1,NbrRitz,nb=0; 185 PetscInt i,j,*perm; 186 PetscReal *H,*Q,*Ht; /* H Hessenberg Matrix and Q matrix of eigenvectors of H*/ 187 PetscReal *wr,*wi,*modul; /* Real and imaginary part and modul of the Ritz values*/ 188 PetscReal *SR,*work; 189 PetscBLASInt bn,bN,lwork,idummy; 190 PetscScalar *t,sdummy = 0; 191 192 PetscFunctionBegin; 193 /* n: size of the Hessenberg matrix */ 194 if (gmres->fullcycle) n = N-1; 195 /* NbrRitz: number of (harmonic) Ritz pairs to extract */ 196 NbrRitz = PetscMin(*nrit,n); 197 198 /* Definition of PetscBLASInt for lapack routines*/ 199 ierr = PetscBLASIntCast(n,&bn);CHKERRQ(ierr); 200 ierr = PetscBLASIntCast(N,&bN);CHKERRQ(ierr); 201 ierr = PetscBLASIntCast(N,&idummy);CHKERRQ(ierr); 202 ierr = PetscBLASIntCast(5*N,&lwork);CHKERRQ(ierr); 203 /* Memory allocation */ 204 ierr = PetscMalloc1(bN*bN,&H);CHKERRQ(ierr); 205 ierr = PetscMalloc1(bn*bn,&Q);CHKERRQ(ierr); 206 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 207 ierr = PetscMalloc1(n,&wr);CHKERRQ(ierr); 208 ierr = PetscMalloc1(n,&wi);CHKERRQ(ierr); 209 210 /* copy H matrix to work space */ 211 if (gmres->fullcycle) { 212 ierr = PetscArraycpy(H,gmres->hes_ritz,bN*bN);CHKERRQ(ierr); 213 } else { 214 ierr = PetscArraycpy(H,gmres->hes_origin,bN*bN);CHKERRQ(ierr); 215 } 216 217 /* Modify H to compute Harmonic Ritz pairs H = H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */ 218 if (!ritz) { 219 /* Transpose the Hessenberg matrix => Ht */ 220 ierr = PetscMalloc1(bn*bn,&Ht);CHKERRQ(ierr); 221 for (i=0; i<bn; i++) { 222 for (j=0; j<bn; j++) { 223 Ht[i*bn+j] = H[j*bN+i]; 224 } 225 } 226 /* Solve the system H^T*t = h^2_{m+1,m}e_m */ 227 ierr = PetscCalloc1(bn,&t);CHKERRQ(ierr); 228 /* t = h^2_{m+1,m}e_m */ 229 if (gmres->fullcycle) { 230 t[bn-1] = PetscSqr(gmres->hes_ritz[(bn-1)*bN+bn]); 231 } else { 232 t[bn-1] = PetscSqr(gmres->hes_origin[(bn-1)*bN+bn]); 233 } 234 /* Call the LAPACK routine dgesv to compute t = H^{-T}*t */ 235 { 236 PetscBLASInt info; 237 PetscBLASInt nrhs = 1; 238 PetscBLASInt *ipiv; 239 ierr = PetscMalloc1(bn,&ipiv);CHKERRQ(ierr); 240 PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&bn,&nrhs,Ht,&bn,ipiv,t,&bn,&info)); 241 if (info) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_PLIB,"Error while calling the Lapack routine DGESV"); 242 ierr = PetscFree(ipiv);CHKERRQ(ierr); 243 ierr = PetscFree(Ht);CHKERRQ(ierr); 244 } 245 /* Now form H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */ 246 for (i=0; i<bn; i++) H[(bn-1)*bn+i] += t[i]; 247 ierr = PetscFree(t);CHKERRQ(ierr); 248 } 249 250 /* Compute (harmonic) Ritz pairs */ 251 { 252 PetscBLASInt info; 253 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 254 PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","V",&bn,H,&bN,wr,wi,&sdummy,&idummy,Q,&bn,work,&lwork,&info)); 255 if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine"); 256 } 257 /* sort the (harmonic) Ritz values */ 258 ierr = PetscMalloc1(n,&modul);CHKERRQ(ierr); 259 ierr = PetscMalloc1(n,&perm);CHKERRQ(ierr); 260 for (i=0; i<n; i++) modul[i] = PetscSqrtReal(wr[i]*wr[i]+wi[i]*wi[i]); 261 for (i=0; i<n; i++) perm[i] = i; 262 ierr = PetscSortRealWithPermutation(n,modul,perm);CHKERRQ(ierr); 263 /* count the number of extracted Ritz or Harmonic Ritz pairs (with complex conjugates) */ 264 if (small) { 265 while (nb < NbrRitz) { 266 if (!wi[perm[nb]]) nb += 1; 267 else nb += 2; 268 } 269 ierr = PetscMalloc1(nb*n,&SR);CHKERRQ(ierr); 270 for (i=0; i<nb; i++) { 271 tetar[i] = wr[perm[i]]; 272 tetai[i] = wi[perm[i]]; 273 ierr = PetscArraycpy(&SR[i*n],&(Q[perm[i]*bn]),n);CHKERRQ(ierr); 274 } 275 } else { 276 while (nb < NbrRitz) { 277 if (wi[perm[n-nb-1]] == 0) nb += 1; 278 else nb += 2; 279 } 280 ierr = PetscMalloc1(nb*n,&SR);CHKERRQ(ierr); 281 for (i=0; i<nb; i++) { 282 tetar[i] = wr[perm[n-nb+i]]; 283 tetai[i] = wi[perm[n-nb+i]]; 284 ierr = PetscArraycpy(&SR[i*n], &(Q[perm[n-nb+i]*bn]), n);CHKERRQ(ierr); 285 } 286 } 287 ierr = PetscFree(modul);CHKERRQ(ierr); 288 ierr = PetscFree(perm);CHKERRQ(ierr); 289 290 /* Form the Ritz or Harmonic Ritz vectors S=VV*Sr, 291 where the columns of VV correspond to the basis of the Krylov subspace */ 292 if (gmres->fullcycle) { 293 for (j=0; j<nb; j++) { 294 ierr = VecZeroEntries(S[j]);CHKERRQ(ierr); 295 ierr = VecMAXPY(S[j],n,&SR[j*n],gmres->vecb);CHKERRQ(ierr); 296 } 297 } else { 298 for (j=0; j<nb; j++) { 299 ierr = VecZeroEntries(S[j]);CHKERRQ(ierr); 300 ierr = VecMAXPY(S[j],n,&SR[j*n],&VEC_VV(0));CHKERRQ(ierr); 301 } 302 } 303 *nrit = nb; 304 ierr = PetscFree(H);CHKERRQ(ierr); 305 ierr = PetscFree(Q);CHKERRQ(ierr); 306 ierr = PetscFree(SR);CHKERRQ(ierr); 307 ierr = PetscFree(wr);CHKERRQ(ierr); 308 ierr = PetscFree(wi);CHKERRQ(ierr); 309 PetscFunctionReturn(0); 310 } 311 #endif 312 313 314