1 #include <../src/ksp/ksp/impls/gmres/gmresimpl.h> 2 #include <petscblaslapack.h> 3 4 PetscErrorCode KSPComputeExtremeSingularValues_GMRES(KSP ksp, PetscReal *emax, PetscReal *emin) 5 { 6 KSP_GMRES *gmres = (KSP_GMRES *)ksp->data; 7 PetscInt n = gmres->it + 1, i, N = gmres->max_k + 2; 8 PetscBLASInt bn, bN, lwork, idummy, lierr; 9 PetscScalar *R = gmres->Rsvd, *work = R + N * N, sdummy = 0; 10 PetscReal *realpart = gmres->Dsvd; 11 12 PetscFunctionBegin; 13 PetscCall(PetscBLASIntCast(n, &bn)); 14 PetscCall(PetscBLASIntCast(N, &bN)); 15 PetscCall(PetscBLASIntCast(5 * N, &lwork)); 16 PetscCall(PetscBLASIntCast(N, &idummy)); 17 if (n <= 0) { 18 *emax = *emin = 1.0; 19 PetscFunctionReturn(PETSC_SUCCESS); 20 } 21 /* copy R matrix to work space */ 22 PetscCall(PetscArraycpy(R, gmres->hh_origin, (gmres->max_k + 2) * (gmres->max_k + 1))); 23 24 /* zero below diagonal garbage */ 25 for (i = 0; i < n; i++) R[i * N + i + 1] = 0.0; 26 27 /* compute Singular Values */ 28 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 29 #if !defined(PETSC_USE_COMPLEX) 30 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("N", "N", &bn, &bn, R, &bN, realpart, &sdummy, &idummy, &sdummy, &idummy, work, &lwork, &lierr)); 31 #else 32 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("N", "N", &bn, &bn, R, &bN, realpart, &sdummy, &idummy, &sdummy, &idummy, work, &lwork, realpart + N, &lierr)); 33 #endif 34 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SVD Lapack routine %d", (int)lierr); 35 PetscCall(PetscFPTrapPop()); 36 37 *emin = realpart[n - 1]; 38 *emax = realpart[0]; 39 PetscFunctionReturn(PETSC_SUCCESS); 40 } 41 42 PetscErrorCode KSPComputeEigenvalues_GMRES(KSP ksp, PetscInt nmax, PetscReal *r, PetscReal *c, PetscInt *neig) 43 { 44 #if !defined(PETSC_USE_COMPLEX) 45 KSP_GMRES *gmres = (KSP_GMRES *)ksp->data; 46 PetscInt n = gmres->it + 1, N = gmres->max_k + 1, i, *perm; 47 PetscBLASInt bn, bN, lwork, idummy, lierr = -1; 48 PetscScalar *R = gmres->Rsvd, *work = R + N * N; 49 PetscScalar *realpart = gmres->Dsvd, *imagpart = realpart + N, sdummy = 0; 50 51 PetscFunctionBegin; 52 PetscCall(PetscBLASIntCast(n, &bn)); 53 PetscCall(PetscBLASIntCast(N, &bN)); 54 PetscCall(PetscBLASIntCast(5 * N, &lwork)); 55 PetscCall(PetscBLASIntCast(N, &idummy)); 56 PetscCheck(nmax >= n, PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_SIZ, "Not enough room in work space r and c for eigenvalues"); 57 *neig = n; 58 59 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 60 61 /* copy R matrix to work space */ 62 PetscCall(PetscArraycpy(R, gmres->hes_origin, N * N)); 63 64 /* compute eigenvalues */ 65 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 66 PetscCallBLAS("LAPACKgeev", LAPACKgeev_("N", "N", &bn, R, &bN, realpart, imagpart, &sdummy, &idummy, &sdummy, &idummy, work, &lwork, &lierr)); 67 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in LAPACK routine %d", (int)lierr); 68 PetscCall(PetscFPTrapPop()); 69 PetscCall(PetscMalloc1(n, &perm)); 70 for (i = 0; i < n; i++) perm[i] = i; 71 PetscCall(PetscSortRealWithPermutation(n, realpart, perm)); 72 for (i = 0; i < n; i++) { 73 r[i] = realpart[perm[i]]; 74 c[i] = imagpart[perm[i]]; 75 } 76 PetscCall(PetscFree(perm)); 77 #else 78 KSP_GMRES *gmres = (KSP_GMRES *)ksp->data; 79 PetscInt n = gmres->it + 1, N = gmres->max_k + 1, i, *perm; 80 PetscScalar *R = gmres->Rsvd, *work = R + N * N, *eigs = work + 5 * N, sdummy; 81 PetscBLASInt bn, bN, lwork, idummy, lierr = -1; 82 83 PetscFunctionBegin; 84 PetscCall(PetscBLASIntCast(n, &bn)); 85 PetscCall(PetscBLASIntCast(N, &bN)); 86 PetscCall(PetscBLASIntCast(5 * N, &lwork)); 87 PetscCall(PetscBLASIntCast(N, &idummy)); 88 PetscCheck(nmax >= n, PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_SIZ, "Not enough room in work space r and c for eigenvalues"); 89 *neig = n; 90 91 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 92 93 /* copy R matrix to work space */ 94 PetscCall(PetscArraycpy(R, gmres->hes_origin, N * N)); 95 96 /* compute eigenvalues */ 97 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 98 PetscCallBLAS("LAPACKgeev", LAPACKgeev_("N", "N", &bn, R, &bN, eigs, &sdummy, &idummy, &sdummy, &idummy, work, &lwork, gmres->Dsvd, &lierr)); 99 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in LAPACK routine"); 100 PetscCall(PetscFPTrapPop()); 101 PetscCall(PetscMalloc1(n, &perm)); 102 for (i = 0; i < n; i++) perm[i] = i; 103 for (i = 0; i < n; i++) r[i] = PetscRealPart(eigs[i]); 104 PetscCall(PetscSortRealWithPermutation(n, r, perm)); 105 for (i = 0; i < n; i++) { 106 r[i] = PetscRealPart(eigs[perm[i]]); 107 c[i] = PetscImaginaryPart(eigs[perm[i]]); 108 } 109 PetscCall(PetscFree(perm)); 110 #endif 111 PetscFunctionReturn(PETSC_SUCCESS); 112 } 113 114 PetscErrorCode KSPComputeRitz_GMRES(KSP ksp, PetscBool ritz, PetscBool small, PetscInt *nrit, Vec S[], PetscReal *tetar, PetscReal *tetai) 115 { 116 KSP_GMRES *gmres = (KSP_GMRES *)ksp->data; 117 PetscInt NbrRitz, nb = 0, n; 118 PetscInt i, j, *perm; 119 PetscScalar *H, *Q, *Ht; /* H Hessenberg matrix; Q matrix of eigenvectors of H */ 120 PetscScalar *wr, *wi; /* Real and imaginary part of the Ritz values */ 121 PetscScalar *SR, *work; 122 PetscReal *modul; 123 PetscBLASInt bn, bN, lwork, idummy; 124 PetscScalar *t, sdummy = 0; 125 Mat A; 126 127 PetscFunctionBegin; 128 /* Express sizes in PetscBLASInt for LAPACK routines*/ 129 PetscCall(PetscBLASIntCast(gmres->fullcycle ? gmres->max_k : gmres->it + 1, &bn)); /* size of the Hessenberg matrix */ 130 PetscCall(PetscBLASIntCast(gmres->max_k + 1, &bN)); /* LDA of the Hessenberg matrix */ 131 PetscCall(PetscBLASIntCast(gmres->max_k + 1, &idummy)); 132 PetscCall(PetscBLASIntCast(5 * (gmres->max_k + 1) * (gmres->max_k + 1), &lwork)); 133 134 /* NbrRitz: number of (Harmonic) Ritz pairs to extract */ 135 NbrRitz = PetscMin(*nrit, bn); 136 PetscCall(KSPGetOperators(ksp, &A, NULL)); 137 PetscCall(MatGetSize(A, &n, NULL)); 138 NbrRitz = PetscMin(NbrRitz, n); 139 140 PetscCall(PetscMalloc4(bN * bN, &H, bn * bn, &Q, bn, &wr, bn, &wi)); 141 142 /* copy H matrix to work space */ 143 PetscCall(PetscArraycpy(H, gmres->fullcycle ? gmres->hes_ritz : gmres->hes_origin, bN * bN)); 144 145 /* Modify H to compute Harmonic Ritz pairs H = H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */ 146 if (!ritz) { 147 /* Transpose the Hessenberg matrix => Ht */ 148 PetscCall(PetscMalloc1(bn * bn, &Ht)); 149 for (i = 0; i < bn; i++) { 150 for (j = 0; j < bn; j++) Ht[i * bn + j] = PetscConj(H[j * bN + i]); 151 } 152 /* Solve the system H^T*t = h^2_{m+1,m}e_m */ 153 PetscCall(PetscCalloc1(bn, &t)); 154 /* t = h^2_{m+1,m}e_m */ 155 if (gmres->fullcycle) t[bn - 1] = PetscSqr(gmres->hes_ritz[(bn - 1) * bN + bn]); 156 else t[bn - 1] = PetscSqr(gmres->hes_origin[(bn - 1) * bN + bn]); 157 158 /* Call the LAPACK routine dgesv to compute t = H^{-T}*t */ 159 { 160 PetscBLASInt info; 161 PetscBLASInt nrhs = 1; 162 PetscBLASInt *ipiv; 163 PetscCall(PetscMalloc1(bn, &ipiv)); 164 PetscCallBLAS("LAPACKgesv", LAPACKgesv_(&bn, &nrhs, Ht, &bn, ipiv, t, &bn, &info)); 165 PetscCheck(!info, PetscObjectComm((PetscObject)ksp), PETSC_ERR_PLIB, "Error while calling the Lapack routine DGESV"); 166 PetscCall(PetscFree(ipiv)); 167 PetscCall(PetscFree(Ht)); 168 } 169 /* Form H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */ 170 for (i = 0; i < bn; i++) H[(bn - 1) * bn + i] += t[i]; 171 PetscCall(PetscFree(t)); 172 } 173 174 /* 175 Compute (Harmonic) Ritz pairs; 176 For a real Ritz eigenvector at wr(j) Q(:,j) columns contain the real right eigenvector 177 For a complex Ritz pair of eigenvectors at wr(j), wi(j), wr(j+1), and wi(j+1), Q(:,j) + i Q(:,j+1) and Q(:,j) - i Q(:,j+1) are the two eigenvectors 178 */ 179 { 180 PetscBLASInt info; 181 #if defined(PETSC_USE_COMPLEX) 182 PetscReal *rwork = NULL; 183 #endif 184 PetscCall(PetscMalloc1(lwork, &work)); 185 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 186 #if !defined(PETSC_USE_COMPLEX) 187 PetscCallBLAS("LAPACKgeev", LAPACKgeev_("N", "V", &bn, H, &bN, wr, wi, &sdummy, &idummy, Q, &bn, work, &lwork, &info)); 188 #else 189 PetscCall(PetscMalloc1(2 * n, &rwork)); 190 PetscCallBLAS("LAPACKgeev", LAPACKgeev_("N", "V", &bn, H, &bN, wr, &sdummy, &idummy, Q, &bn, work, &lwork, rwork, &info)); 191 PetscCall(PetscFree(rwork)); 192 #endif 193 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in LAPACK routine"); 194 PetscCall(PetscFPTrapPop()); 195 PetscCall(PetscFree(work)); 196 } 197 /* sort the (Harmonic) Ritz values */ 198 PetscCall(PetscMalloc2(bn, &modul, bn, &perm)); 199 #if defined(PETSC_USE_COMPLEX) 200 for (i = 0; i < bn; i++) modul[i] = PetscAbsScalar(wr[i]); 201 #else 202 for (i = 0; i < bn; i++) modul[i] = PetscSqrtReal(wr[i] * wr[i] + wi[i] * wi[i]); 203 #endif 204 for (i = 0; i < bn; i++) perm[i] = i; 205 PetscCall(PetscSortRealWithPermutation(bn, modul, perm)); 206 207 #if defined(PETSC_USE_COMPLEX) 208 /* sort extracted (Harmonic) Ritz pairs */ 209 nb = NbrRitz; 210 PetscCall(PetscMalloc1(nb * bn, &SR)); 211 for (i = 0; i < nb; i++) { 212 if (small) { 213 tetar[i] = PetscRealPart(wr[perm[i]]); 214 tetai[i] = PetscImaginaryPart(wr[perm[i]]); 215 PetscCall(PetscArraycpy(&SR[i * bn], &(Q[perm[i] * bn]), bn)); 216 } else { 217 tetar[i] = PetscRealPart(wr[perm[bn - nb + i]]); 218 tetai[i] = PetscImaginaryPart(wr[perm[bn - nb + i]]); 219 PetscCall(PetscArraycpy(&SR[i * bn], &(Q[perm[bn - nb + i] * bn]), bn)); /* permute columns of Q */ 220 } 221 } 222 #else 223 /* count the number of extracted (Harmonic) Ritz pairs (with complex conjugates) */ 224 if (small) { 225 while (nb < NbrRitz) { 226 if (!wi[perm[nb]]) nb += 1; 227 else { 228 if (nb < NbrRitz - 1) nb += 2; 229 else break; 230 } 231 } 232 PetscCall(PetscMalloc1(nb * bn, &SR)); 233 for (i = 0; i < nb; i++) { 234 tetar[i] = wr[perm[i]]; 235 tetai[i] = wi[perm[i]]; 236 PetscCall(PetscArraycpy(&SR[i * bn], &(Q[perm[i] * bn]), bn)); 237 } 238 } else { 239 while (nb < NbrRitz) { 240 if (wi[perm[bn - nb - 1]] == 0) nb += 1; 241 else { 242 if (nb < NbrRitz - 1) nb += 2; 243 else break; 244 } 245 } 246 PetscCall(PetscMalloc1(nb * bn, &SR)); /* bn rows, nb columns */ 247 for (i = 0; i < nb; i++) { 248 tetar[i] = wr[perm[bn - nb + i]]; 249 tetai[i] = wi[perm[bn - nb + i]]; 250 PetscCall(PetscArraycpy(&SR[i * bn], &(Q[perm[bn - nb + i] * bn]), bn)); /* permute columns of Q */ 251 } 252 } 253 #endif 254 PetscCall(PetscFree2(modul, perm)); 255 PetscCall(PetscFree4(H, Q, wr, wi)); 256 257 /* Form the (Harmonic) Ritz vectors S = SR*V, columns of VV correspond to the basis of the Krylov subspace */ 258 for (j = 0; j < nb; j++) PetscCall(VecMAXPBY(S[j], bn, &SR[j * bn], 0, gmres->fullcycle ? gmres->vecb : &VEC_VV(0))); 259 260 PetscCall(PetscFree(SR)); 261 *nrit = nb; 262 PetscFunctionReturn(PETSC_SUCCESS); 263 } 264