1 2 #include <../src/ksp/ksp/impls/gmres/gmresimpl.h> 3 #include <petscblaslapack.h> 4 5 PetscErrorCode KSPComputeExtremeSingularValues_GMRES(KSP ksp, PetscReal *emax, PetscReal *emin) 6 { 7 KSP_GMRES *gmres = (KSP_GMRES *)ksp->data; 8 PetscInt n = gmres->it + 1, i, N = gmres->max_k + 2; 9 PetscBLASInt bn, bN, lwork, idummy, lierr; 10 PetscScalar *R = gmres->Rsvd, *work = R + N * N, sdummy = 0; 11 PetscReal *realpart = gmres->Dsvd; 12 13 PetscFunctionBegin; 14 PetscCall(PetscBLASIntCast(n, &bn)); 15 PetscCall(PetscBLASIntCast(N, &bN)); 16 PetscCall(PetscBLASIntCast(5 * N, &lwork)); 17 PetscCall(PetscBLASIntCast(N, &idummy)); 18 if (n <= 0) { 19 *emax = *emin = 1.0; 20 PetscFunctionReturn(0); 21 } 22 /* copy R matrix to work space */ 23 PetscCall(PetscArraycpy(R, gmres->hh_origin, (gmres->max_k + 2) * (gmres->max_k + 1))); 24 25 /* zero below diagonal garbage */ 26 for (i = 0; i < n; i++) R[i * N + i + 1] = 0.0; 27 28 /* compute Singular Values */ 29 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 30 #if !defined(PETSC_USE_COMPLEX) 31 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("N", "N", &bn, &bn, R, &bN, realpart, &sdummy, &idummy, &sdummy, &idummy, work, &lwork, &lierr)); 32 #else 33 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("N", "N", &bn, &bn, R, &bN, realpart, &sdummy, &idummy, &sdummy, &idummy, work, &lwork, realpart + N, &lierr)); 34 #endif 35 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SVD Lapack routine %d", (int)lierr); 36 PetscCall(PetscFPTrapPop()); 37 38 *emin = realpart[n - 1]; 39 *emax = realpart[0]; 40 PetscFunctionReturn(0); 41 } 42 43 PetscErrorCode KSPComputeEigenvalues_GMRES(KSP ksp, PetscInt nmax, PetscReal *r, PetscReal *c, PetscInt *neig) 44 { 45 #if !defined(PETSC_USE_COMPLEX) 46 KSP_GMRES *gmres = (KSP_GMRES *)ksp->data; 47 PetscInt n = gmres->it + 1, N = gmres->max_k + 1, i, *perm; 48 PetscBLASInt bn, bN, lwork, idummy, lierr = -1; 49 PetscScalar *R = gmres->Rsvd, *work = R + N * N; 50 PetscScalar *realpart = gmres->Dsvd, *imagpart = realpart + N, sdummy = 0; 51 52 PetscFunctionBegin; 53 PetscCall(PetscBLASIntCast(n, &bn)); 54 PetscCall(PetscBLASIntCast(N, &bN)); 55 PetscCall(PetscBLASIntCast(5 * N, &lwork)); 56 PetscCall(PetscBLASIntCast(N, &idummy)); 57 PetscCheck(nmax >= n, PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_SIZ, "Not enough room in work space r and c for eigenvalues"); 58 *neig = n; 59 60 if (!n) PetscFunctionReturn(0); 61 62 /* copy R matrix to work space */ 63 PetscCall(PetscArraycpy(R, gmres->hes_origin, N * N)); 64 65 /* compute eigenvalues */ 66 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 67 PetscCallBLAS("LAPACKgeev", LAPACKgeev_("N", "N", &bn, R, &bN, realpart, imagpart, &sdummy, &idummy, &sdummy, &idummy, work, &lwork, &lierr)); 68 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in LAPACK routine %d", (int)lierr); 69 PetscCall(PetscFPTrapPop()); 70 PetscCall(PetscMalloc1(n, &perm)); 71 for (i = 0; i < n; i++) perm[i] = i; 72 PetscCall(PetscSortRealWithPermutation(n, realpart, perm)); 73 for (i = 0; i < n; i++) { 74 r[i] = realpart[perm[i]]; 75 c[i] = imagpart[perm[i]]; 76 } 77 PetscCall(PetscFree(perm)); 78 #else 79 KSP_GMRES *gmres = (KSP_GMRES *)ksp->data; 80 PetscInt n = gmres->it + 1, N = gmres->max_k + 1, i, *perm; 81 PetscScalar *R = gmres->Rsvd, *work = R + N * N, *eigs = work + 5 * N, sdummy; 82 PetscBLASInt bn, bN, lwork, idummy, lierr = -1; 83 84 PetscFunctionBegin; 85 PetscCall(PetscBLASIntCast(n, &bn)); 86 PetscCall(PetscBLASIntCast(N, &bN)); 87 PetscCall(PetscBLASIntCast(5 * N, &lwork)); 88 PetscCall(PetscBLASIntCast(N, &idummy)); 89 PetscCheck(nmax >= n, PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_SIZ, "Not enough room in work space r and c for eigenvalues"); 90 *neig = n; 91 92 if (!n) PetscFunctionReturn(0); 93 94 /* copy R matrix to work space */ 95 PetscCall(PetscArraycpy(R, gmres->hes_origin, N * N)); 96 97 /* compute eigenvalues */ 98 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 99 PetscCallBLAS("LAPACKgeev", LAPACKgeev_("N", "N", &bn, R, &bN, eigs, &sdummy, &idummy, &sdummy, &idummy, work, &lwork, gmres->Dsvd, &lierr)); 100 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in LAPACK routine"); 101 PetscCall(PetscFPTrapPop()); 102 PetscCall(PetscMalloc1(n, &perm)); 103 for (i = 0; i < n; i++) perm[i] = i; 104 for (i = 0; i < n; i++) r[i] = PetscRealPart(eigs[i]); 105 PetscCall(PetscSortRealWithPermutation(n, r, perm)); 106 for (i = 0; i < n; i++) { 107 r[i] = PetscRealPart(eigs[perm[i]]); 108 c[i] = PetscImaginaryPart(eigs[perm[i]]); 109 } 110 PetscCall(PetscFree(perm)); 111 #endif 112 PetscFunctionReturn(0); 113 } 114 115 PetscErrorCode KSPComputeRitz_GMRES(KSP ksp, PetscBool ritz, PetscBool small, PetscInt *nrit, Vec S[], PetscReal *tetar, PetscReal *tetai) 116 { 117 KSP_GMRES *gmres = (KSP_GMRES *)ksp->data; 118 PetscInt NbrRitz, nb = 0, n; 119 PetscInt i, j, *perm; 120 PetscScalar *H, *Q, *Ht; /* H Hessenberg matrix; Q matrix of eigenvectors of H */ 121 PetscScalar *wr, *wi; /* Real and imaginary part of the Ritz values */ 122 PetscScalar *SR, *work; 123 PetscReal *modul; 124 PetscBLASInt bn, bN, lwork, idummy; 125 PetscScalar *t, sdummy = 0; 126 Mat A; 127 128 PetscFunctionBegin; 129 /* Express sizes in PetscBLASInt for LAPACK routines*/ 130 PetscCall(PetscBLASIntCast(gmres->fullcycle ? gmres->max_k : gmres->it + 1, &bn)); /* size of the Hessenberg matrix */ 131 PetscCall(PetscBLASIntCast(gmres->max_k + 1, &bN)); /* LDA of the Hessenberg matrix */ 132 PetscCall(PetscBLASIntCast(gmres->max_k + 1, &idummy)); 133 PetscCall(PetscBLASIntCast(5 * (gmres->max_k + 1) * (gmres->max_k + 1), &lwork)); 134 135 /* NbrRitz: number of (Harmonic) Ritz pairs to extract */ 136 NbrRitz = PetscMin(*nrit, bn); 137 PetscCall(KSPGetOperators(ksp, &A, NULL)); 138 PetscCall(MatGetSize(A, &n, NULL)); 139 NbrRitz = PetscMin(NbrRitz, n); 140 141 PetscCall(PetscMalloc4(bN * bN, &H, bn * bn, &Q, bn, &wr, bn, &wi)); 142 143 /* copy H matrix to work space */ 144 PetscCall(PetscArraycpy(H, gmres->fullcycle ? gmres->hes_ritz : gmres->hes_origin, bN * bN)); 145 146 /* Modify H to compute Harmonic Ritz pairs H = H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */ 147 if (!ritz) { 148 /* Transpose the Hessenberg matrix => Ht */ 149 PetscCall(PetscMalloc1(bn * bn, &Ht)); 150 for (i = 0; i < bn; i++) { 151 for (j = 0; j < bn; j++) Ht[i * bn + j] = PetscConj(H[j * bN + i]); 152 } 153 /* Solve the system H^T*t = h^2_{m+1,m}e_m */ 154 PetscCall(PetscCalloc1(bn, &t)); 155 /* t = h^2_{m+1,m}e_m */ 156 if (gmres->fullcycle) t[bn - 1] = PetscSqr(gmres->hes_ritz[(bn - 1) * bN + bn]); 157 else t[bn - 1] = PetscSqr(gmres->hes_origin[(bn - 1) * bN + bn]); 158 159 /* Call the LAPACK routine dgesv to compute t = H^{-T}*t */ 160 { 161 PetscBLASInt info; 162 PetscBLASInt nrhs = 1; 163 PetscBLASInt *ipiv; 164 PetscCall(PetscMalloc1(bn, &ipiv)); 165 PetscCallBLAS("LAPACKgesv", LAPACKgesv_(&bn, &nrhs, Ht, &bn, ipiv, t, &bn, &info)); 166 PetscCheck(!info, PetscObjectComm((PetscObject)ksp), PETSC_ERR_PLIB, "Error while calling the Lapack routine DGESV"); 167 PetscCall(PetscFree(ipiv)); 168 PetscCall(PetscFree(Ht)); 169 } 170 /* Form H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */ 171 for (i = 0; i < bn; i++) H[(bn - 1) * bn + i] += t[i]; 172 PetscCall(PetscFree(t)); 173 } 174 175 /* 176 Compute (Harmonic) Ritz pairs; 177 For a real Ritz eigenvector at wr(j) Q(:,j) columns contain the real right eigenvector 178 For a complex Ritz pair of eigenvectors at wr(j), wi(j), wr(j+1), and wi(j+1), Q(:,j) + i Q(:,j+1) and Q(:,j) - i Q(:,j+1) are the two eigenvectors 179 */ 180 { 181 PetscBLASInt info; 182 #if defined(PETSC_USE_COMPLEX) 183 PetscReal *rwork = NULL; 184 #endif 185 PetscCall(PetscMalloc1(lwork, &work)); 186 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 187 #if !defined(PETSC_USE_COMPLEX) 188 PetscCallBLAS("LAPACKgeev", LAPACKgeev_("N", "V", &bn, H, &bN, wr, wi, &sdummy, &idummy, Q, &bn, work, &lwork, &info)); 189 #else 190 PetscCall(PetscMalloc1(2 * n, &rwork)); 191 PetscCallBLAS("LAPACKgeev", LAPACKgeev_("N", "V", &bn, H, &bN, wr, &sdummy, &idummy, Q, &bn, work, &lwork, rwork, &info)); 192 PetscCall(PetscFree(rwork)); 193 #endif 194 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in LAPACK routine"); 195 PetscCall(PetscFPTrapPop()); 196 PetscCall(PetscFree(work)); 197 } 198 /* sort the (Harmonic) Ritz values */ 199 PetscCall(PetscMalloc2(bn, &modul, bn, &perm)); 200 #if defined(PETSC_USE_COMPLEX) 201 for (i = 0; i < bn; i++) modul[i] = PetscAbsScalar(wr[i]); 202 #else 203 for (i = 0; i < bn; i++) modul[i] = PetscSqrtReal(wr[i] * wr[i] + wi[i] * wi[i]); 204 #endif 205 for (i = 0; i < bn; i++) perm[i] = i; 206 PetscCall(PetscSortRealWithPermutation(bn, modul, perm)); 207 208 #if defined(PETSC_USE_COMPLEX) 209 /* sort extracted (Harmonic) Ritz pairs */ 210 nb = NbrRitz; 211 PetscCall(PetscMalloc1(nb * bn, &SR)); 212 for (i = 0; i < nb; i++) { 213 if (small) { 214 tetar[i] = PetscRealPart(wr[perm[i]]); 215 tetai[i] = PetscImaginaryPart(wr[perm[i]]); 216 PetscCall(PetscArraycpy(&SR[i * bn], &(Q[perm[i] * bn]), bn)); 217 } else { 218 tetar[i] = PetscRealPart(wr[perm[bn - nb + i]]); 219 tetai[i] = PetscImaginaryPart(wr[perm[bn - nb + i]]); 220 PetscCall(PetscArraycpy(&SR[i * bn], &(Q[perm[bn - nb + i] * bn]), bn)); /* permute columns of Q */ 221 } 222 } 223 #else 224 /* count the number of extracted (Harmonic) Ritz pairs (with complex conjugates) */ 225 if (small) { 226 while (nb < NbrRitz) { 227 if (!wi[perm[nb]]) nb += 1; 228 else { 229 if (nb < NbrRitz - 1) nb += 2; 230 else break; 231 } 232 } 233 PetscCall(PetscMalloc1(nb * bn, &SR)); 234 for (i = 0; i < nb; i++) { 235 tetar[i] = wr[perm[i]]; 236 tetai[i] = wi[perm[i]]; 237 PetscCall(PetscArraycpy(&SR[i * bn], &(Q[perm[i] * bn]), bn)); 238 } 239 } else { 240 while (nb < NbrRitz) { 241 if (wi[perm[bn - nb - 1]] == 0) nb += 1; 242 else { 243 if (nb < NbrRitz - 1) nb += 2; 244 else break; 245 } 246 } 247 PetscCall(PetscMalloc1(nb * bn, &SR)); /* bn rows, nb columns */ 248 for (i = 0; i < nb; i++) { 249 tetar[i] = wr[perm[bn - nb + i]]; 250 tetai[i] = wi[perm[bn - nb + i]]; 251 PetscCall(PetscArraycpy(&SR[i * bn], &(Q[perm[bn - nb + i] * bn]), bn)); /* permute columns of Q */ 252 } 253 } 254 #endif 255 PetscCall(PetscFree2(modul, perm)); 256 PetscCall(PetscFree4(H, Q, wr, wi)); 257 258 /* Form the (Harmonic) Ritz vectors S = SR*V, columns of VV correspond to the basis of the Krylov subspace */ 259 for (j = 0; j < nb; j++) { 260 PetscCall(VecZeroEntries(S[j])); 261 PetscCall(VecMAXPY(S[j], bn, &SR[j * bn], gmres->fullcycle ? gmres->vecb : &VEC_VV(0))); 262 } 263 264 PetscCall(PetscFree(SR)); 265 *nrit = nb; 266 PetscFunctionReturn(0); 267 } 268