1 2 /* 3 This file implements FBiCGStab-R. 4 FBiCGStab-R is a mathematically equivalent variant of FBiCGStab. Differences are: 5 (1) There are fewer MPI_Allreduce calls. 6 (2) The convergence occasionally is much faster than that of FBiCGStab. 7 */ 8 #include <../src/ksp/ksp/impls/bcgs/bcgsimpl.h> /*I "petscksp.h" I*/ 9 #include <petsc/private/vecimpl.h> 10 11 static PetscErrorCode KSPSetUp_FBCGSR(KSP ksp) 12 { 13 PetscFunctionBegin; 14 PetscCall(KSPSetWorkVecs(ksp, 8)); 15 PetscFunctionReturn(0); 16 } 17 18 static PetscErrorCode KSPSolve_FBCGSR(KSP ksp) 19 { 20 PetscInt i, j, N; 21 PetscScalar tau, sigma, alpha, omega, beta; 22 PetscReal rho; 23 PetscScalar xi1, xi2, xi3, xi4; 24 Vec X, B, P, P2, RP, R, V, S, T, S2; 25 PetscScalar *PETSC_RESTRICT rp, *PETSC_RESTRICT r, *PETSC_RESTRICT p; 26 PetscScalar *PETSC_RESTRICT v, *PETSC_RESTRICT s, *PETSC_RESTRICT t, *PETSC_RESTRICT s2; 27 PetscScalar insums[4], outsums[4]; 28 KSP_BCGS *bcgs = (KSP_BCGS *)ksp->data; 29 PC pc; 30 Mat mat; 31 32 PetscFunctionBegin; 33 PetscCheck(ksp->vec_rhs->petscnative, PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "Only coded for PETSc vectors"); 34 PetscCall(VecGetLocalSize(ksp->vec_sol, &N)); 35 36 X = ksp->vec_sol; 37 B = ksp->vec_rhs; 38 P2 = ksp->work[0]; 39 40 /* The followings are involved in modified inner product calculations and vector updates */ 41 RP = ksp->work[1]; 42 PetscCall(VecGetArray(RP, (PetscScalar **)&rp)); 43 PetscCall(VecRestoreArray(RP, NULL)); 44 R = ksp->work[2]; 45 PetscCall(VecGetArray(R, (PetscScalar **)&r)); 46 PetscCall(VecRestoreArray(R, NULL)); 47 P = ksp->work[3]; 48 PetscCall(VecGetArray(P, (PetscScalar **)&p)); 49 PetscCall(VecRestoreArray(P, NULL)); 50 V = ksp->work[4]; 51 PetscCall(VecGetArray(V, (PetscScalar **)&v)); 52 PetscCall(VecRestoreArray(V, NULL)); 53 S = ksp->work[5]; 54 PetscCall(VecGetArray(S, (PetscScalar **)&s)); 55 PetscCall(VecRestoreArray(S, NULL)); 56 T = ksp->work[6]; 57 PetscCall(VecGetArray(T, (PetscScalar **)&t)); 58 PetscCall(VecRestoreArray(T, NULL)); 59 S2 = ksp->work[7]; 60 PetscCall(VecGetArray(S2, (PetscScalar **)&s2)); 61 PetscCall(VecRestoreArray(S2, NULL)); 62 63 /* Only supports right preconditioning */ 64 PetscCheck(ksp->pc_side == PC_RIGHT, PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "KSP fbcgsr does not support %s", PCSides[ksp->pc_side]); 65 if (!ksp->guess_zero) { 66 if (!bcgs->guess) PetscCall(VecDuplicate(X, &bcgs->guess)); 67 PetscCall(VecCopy(X, bcgs->guess)); 68 } else { 69 PetscCall(VecSet(X, 0.0)); 70 } 71 72 /* Compute initial residual */ 73 PetscCall(KSPGetPC(ksp, &pc)); 74 PetscCall(PCSetUp(pc)); 75 PetscCall(PCGetOperators(pc, &mat, NULL)); 76 if (!ksp->guess_zero) { 77 PetscCall(KSP_MatMult(ksp, mat, X, P2)); /* P2 is used as temporary storage */ 78 PetscCall(VecCopy(B, R)); 79 PetscCall(VecAXPY(R, -1.0, P2)); 80 } else { 81 PetscCall(VecCopy(B, R)); 82 } 83 84 /* Test for nothing to do */ 85 PetscCall(VecNorm(R, NORM_2, &rho)); 86 PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp)); 87 ksp->its = 0; 88 if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = rho; 89 else ksp->rnorm = 0; 90 PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp)); 91 PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm)); 92 PetscCall(KSPMonitor(ksp, 0, ksp->rnorm)); 93 PetscCall((*ksp->converged)(ksp, 0, ksp->rnorm, &ksp->reason, ksp->cnvP)); 94 if (ksp->reason) PetscFunctionReturn(0); 95 96 /* Initialize iterates */ 97 PetscCall(VecCopy(R, RP)); /* rp <- r */ 98 PetscCall(VecCopy(R, P)); /* p <- r */ 99 100 /* Big loop */ 101 for (i = 0; i < ksp->max_it; i++) { 102 /* matmult and pc */ 103 PetscCall(KSP_PCApply(ksp, P, P2)); /* p2 <- K p */ 104 PetscCall(KSP_MatMult(ksp, mat, P2, V)); /* v <- A p2 */ 105 106 /* inner prodcuts */ 107 if (i == 0) { 108 tau = rho * rho; 109 PetscCall(VecDot(V, RP, &sigma)); /* sigma <- (v,rp) */ 110 } else { 111 PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0)); 112 tau = sigma = 0.0; 113 for (j = 0; j < N; j++) { 114 tau += r[j] * rp[j]; /* tau <- (r,rp) */ 115 sigma += v[j] * rp[j]; /* sigma <- (v,rp) */ 116 } 117 PetscCall(PetscLogFlops(4.0 * N)); 118 PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0)); 119 insums[0] = tau; 120 insums[1] = sigma; 121 PetscCall(PetscLogEventBegin(VEC_ReduceCommunication, 0, 0, 0, 0)); 122 PetscCall(MPIU_Allreduce(insums, outsums, 2, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)ksp))); 123 PetscCall(PetscLogEventEnd(VEC_ReduceCommunication, 0, 0, 0, 0)); 124 tau = outsums[0]; 125 sigma = outsums[1]; 126 } 127 128 /* scalar update */ 129 alpha = tau / sigma; 130 131 /* vector update */ 132 PetscCall(VecWAXPY(S, -alpha, V, R)); /* s <- r - alpha v */ 133 134 /* matmult and pc */ 135 PetscCall(KSP_PCApply(ksp, S, S2)); /* s2 <- K s */ 136 PetscCall(KSP_MatMult(ksp, mat, S2, T)); /* t <- A s2 */ 137 138 /* inner prodcuts */ 139 PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0)); 140 xi1 = xi2 = xi3 = xi4 = 0.0; 141 for (j = 0; j < N; j++) { 142 xi1 += s[j] * s[j]; /* xi1 <- (s,s) */ 143 xi2 += t[j] * s[j]; /* xi2 <- (t,s) */ 144 xi3 += t[j] * t[j]; /* xi3 <- (t,t) */ 145 xi4 += t[j] * rp[j]; /* xi4 <- (t,rp) */ 146 } 147 PetscCall(PetscLogFlops(8.0 * N)); 148 PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0)); 149 150 insums[0] = xi1; 151 insums[1] = xi2; 152 insums[2] = xi3; 153 insums[3] = xi4; 154 155 PetscCall(PetscLogEventBegin(VEC_ReduceCommunication, 0, 0, 0, 0)); 156 PetscCall(MPIU_Allreduce(insums, outsums, 4, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)ksp))); 157 PetscCall(PetscLogEventEnd(VEC_ReduceCommunication, 0, 0, 0, 0)); 158 xi1 = outsums[0]; 159 xi2 = outsums[1]; 160 xi3 = outsums[2]; 161 xi4 = outsums[3]; 162 163 /* test denominator */ 164 if ((xi3 == 0.0) || (sigma == 0.0)) { 165 PetscCheck(!ksp->errorifnotconverged, PetscObjectComm((PetscObject)ksp), PETSC_ERR_NOT_CONVERGED, "KSPSolve has failed due to zero inner product"); 166 ksp->reason = KSP_DIVERGED_BREAKDOWN; 167 PetscCall(PetscInfo(ksp, "KSPSolve has failed due to zero inner product\n")); 168 break; 169 } 170 171 /* scalar updates */ 172 omega = xi2 / xi3; 173 beta = -xi4 / sigma; 174 rho = PetscSqrtReal(PetscAbsScalar(xi1 - omega * xi2)); /* residual norm */ 175 176 /* vector updates */ 177 PetscCall(VecAXPBYPCZ(X, alpha, omega, 1.0, P2, S2)); /* x <- alpha * p2 + omega * s2 + x */ 178 179 /* convergence test */ 180 PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp)); 181 ksp->its++; 182 if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = rho; 183 else ksp->rnorm = 0; 184 PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp)); 185 PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm)); 186 PetscCall(KSPMonitor(ksp, i + 1, ksp->rnorm)); 187 PetscCall((*ksp->converged)(ksp, i + 1, ksp->rnorm, &ksp->reason, ksp->cnvP)); 188 if (ksp->reason) break; 189 190 /* vector updates */ 191 PetscCall(PetscLogEventBegin(VEC_Ops, 0, 0, 0, 0)); 192 for (j = 0; j < N; j++) { 193 r[j] = s[j] - omega * t[j]; /* r <- s - omega t */ 194 p[j] = r[j] + beta * (p[j] - omega * v[j]); /* p <- r + beta * (p - omega v) */ 195 } 196 PetscCall(PetscLogFlops(6.0 * N)); 197 PetscCall(PetscLogEventEnd(VEC_Ops, 0, 0, 0, 0)); 198 } 199 200 if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS; 201 PetscFunctionReturn(0); 202 } 203 204 /*MC 205 KSPFBCGSR - Implements a mathematically equivalent variant of flexible bi-CG-stab, `KSPFBCGS`. [](sec_flexibleksp) 206 207 Level: beginner 208 209 Notes: 210 This implementation requires fewer `MPI_Allreduce()` calls than `KSPFBCGS` and may converge faster 211 212 See `KSPPIPEBCGS` for a pipelined version of the algorithm 213 214 Flexible BiCGStab, unlike most Krylov methods, allows the preconditioner to be nonlinear, that is the action of the preconditioner to a vector need not be linear 215 in the vector entries. 216 217 Only supports right preconditioning 218 219 .seealso: [](chapter_ksp), [](sec_flexibleksp), `KSPFBCGSR`, `KSPPIPEBCGS`, `KSPBCGSL`, `KSPBCGS`, `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `KSPBICG`, `KSPFBCGSL`, `KSPSetPCSide()` 220 M*/ 221 PETSC_EXTERN PetscErrorCode KSPCreate_FBCGSR(KSP ksp) 222 { 223 KSP_BCGS *bcgs; 224 225 PetscFunctionBegin; 226 PetscCall(PetscNew(&bcgs)); 227 228 ksp->data = bcgs; 229 ksp->ops->setup = KSPSetUp_FBCGSR; 230 ksp->ops->solve = KSPSolve_FBCGSR; 231 ksp->ops->destroy = KSPDestroy_BCGS; 232 ksp->ops->reset = KSPReset_BCGS; 233 ksp->ops->buildsolution = KSPBuildSolution_BCGS; 234 ksp->ops->buildresidual = KSPBuildResidualDefault; 235 ksp->ops->setfromoptions = KSPSetFromOptions_BCGS; 236 ksp->pc_side = PC_RIGHT; /* set default PC side */ 237 238 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 3)); 239 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_RIGHT, 2)); 240 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_RIGHT, 1)); 241 PetscFunctionReturn(0); 242 } 243