1 /* 2 This file implements FBiCGStab-R. 3 FBiCGStab-R is a mathematically equivalent variant of FBiCGStab. Differences are: 4 (1) There are fewer MPI_Allreduce calls. 5 (2) The convergence occasionally is much faster than that of FBiCGStab. 6 */ 7 #include <../src/ksp/ksp/impls/bcgs/bcgsimpl.h> /*I "petscksp.h" I*/ 8 #include <petsc/private/vecimpl.h> 9 10 static PetscErrorCode KSPSetUp_FBCGSR(KSP ksp) 11 { 12 PetscFunctionBegin; 13 PetscCall(KSPSetWorkVecs(ksp, 8)); 14 PetscFunctionReturn(PETSC_SUCCESS); 15 } 16 17 static PetscErrorCode KSPSolve_FBCGSR(KSP ksp) 18 { 19 PetscInt i, j, N; 20 PetscScalar tau, sigma, alpha, omega, beta; 21 PetscReal rho; 22 PetscScalar xi1, xi2, xi3, xi4; 23 Vec X, B, P, P2, RP, R, V, S, T, S2; 24 PetscScalar *PETSC_RESTRICT rp, *PETSC_RESTRICT r, *PETSC_RESTRICT p; 25 PetscScalar *PETSC_RESTRICT v, *PETSC_RESTRICT s, *PETSC_RESTRICT t, *PETSC_RESTRICT s2; 26 PetscScalar insums[4], outsums[4]; 27 KSP_BCGS *bcgs = (KSP_BCGS *)ksp->data; 28 PC pc; 29 Mat mat; 30 31 PetscFunctionBegin; 32 PetscCheck(ksp->vec_rhs->petscnative, PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "Only coded for PETSc vectors"); 33 PetscCall(VecGetLocalSize(ksp->vec_sol, &N)); 34 35 X = ksp->vec_sol; 36 B = ksp->vec_rhs; 37 P2 = ksp->work[0]; 38 39 /* The following are involved in modified inner product calculations and vector updates */ 40 RP = ksp->work[1]; 41 PetscCall(VecGetArray(RP, (PetscScalar **)&rp)); 42 PetscCall(VecRestoreArray(RP, NULL)); 43 R = ksp->work[2]; 44 PetscCall(VecGetArray(R, (PetscScalar **)&r)); 45 PetscCall(VecRestoreArray(R, NULL)); 46 P = ksp->work[3]; 47 PetscCall(VecGetArray(P, (PetscScalar **)&p)); 48 PetscCall(VecRestoreArray(P, NULL)); 49 V = ksp->work[4]; 50 PetscCall(VecGetArray(V, (PetscScalar **)&v)); 51 PetscCall(VecRestoreArray(V, NULL)); 52 S = ksp->work[5]; 53 PetscCall(VecGetArray(S, (PetscScalar **)&s)); 54 PetscCall(VecRestoreArray(S, NULL)); 55 T = ksp->work[6]; 56 PetscCall(VecGetArray(T, (PetscScalar **)&t)); 57 PetscCall(VecRestoreArray(T, NULL)); 58 S2 = ksp->work[7]; 59 PetscCall(VecGetArray(S2, (PetscScalar **)&s2)); 60 PetscCall(VecRestoreArray(S2, NULL)); 61 62 /* Only supports right preconditioning */ 63 PetscCheck(ksp->pc_side == PC_RIGHT, PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "KSP fbcgsr does not support %s", PCSides[ksp->pc_side]); 64 if (!ksp->guess_zero) { 65 if (!bcgs->guess) PetscCall(VecDuplicate(X, &bcgs->guess)); 66 PetscCall(VecCopy(X, bcgs->guess)); 67 } else { 68 PetscCall(VecSet(X, 0.0)); 69 } 70 71 /* Compute initial residual */ 72 PetscCall(KSPGetPC(ksp, &pc)); 73 PetscCall(PCGetOperators(pc, &mat, NULL)); 74 if (!ksp->guess_zero) { 75 PetscCall(KSP_MatMult(ksp, mat, X, P2)); /* P2 is used as temporary storage */ 76 PetscCall(VecCopy(B, R)); 77 PetscCall(VecAXPY(R, -1.0, P2)); 78 } else { 79 PetscCall(VecCopy(B, R)); 80 } 81 82 /* Test for nothing to do */ 83 PetscCall(VecNorm(R, NORM_2, &rho)); 84 PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp)); 85 ksp->its = 0; 86 if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = rho; 87 else ksp->rnorm = 0; 88 PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp)); 89 PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm)); 90 PetscCall(KSPMonitor(ksp, 0, ksp->rnorm)); 91 PetscCall((*ksp->converged)(ksp, 0, ksp->rnorm, &ksp->reason, ksp->cnvP)); 92 if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS); 93 94 /* Initialize iterates */ 95 PetscCall(VecCopy(R, RP)); /* rp <- r */ 96 PetscCall(VecCopy(R, P)); /* p <- r */ 97 98 /* Big loop */ 99 for (i = 0; i < ksp->max_it; i++) { 100 /* matmult and pc */ 101 PetscCall(KSP_PCApply(ksp, P, P2)); /* p2 <- K p */ 102 PetscCall(KSP_MatMult(ksp, mat, P2, V)); /* v <- A p2 */ 103 104 /* inner products */ 105 if (i == 0) { 106 tau = rho * rho; 107 PetscCall(VecDot(V, RP, &sigma)); /* sigma <- (v,rp) */ 108 } else { 109 PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0)); 110 tau = sigma = 0.0; 111 for (j = 0; j < N; j++) { 112 tau += r[j] * rp[j]; /* tau <- (r,rp) */ 113 sigma += v[j] * rp[j]; /* sigma <- (v,rp) */ 114 } 115 PetscCall(PetscLogFlops(4.0 * N)); 116 PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0)); 117 insums[0] = tau; 118 insums[1] = sigma; 119 PetscCall(PetscLogEventBegin(VEC_ReduceCommunication, 0, 0, 0, 0)); 120 PetscCallMPI(MPIU_Allreduce(insums, outsums, 2, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)ksp))); 121 PetscCall(PetscLogEventEnd(VEC_ReduceCommunication, 0, 0, 0, 0)); 122 tau = outsums[0]; 123 sigma = outsums[1]; 124 } 125 126 /* scalar update */ 127 alpha = tau / sigma; 128 129 /* vector update */ 130 PetscCall(VecWAXPY(S, -alpha, V, R)); /* s <- r - alpha v */ 131 132 /* matmult and pc */ 133 PetscCall(KSP_PCApply(ksp, S, S2)); /* s2 <- K s */ 134 PetscCall(KSP_MatMult(ksp, mat, S2, T)); /* t <- A s2 */ 135 136 /* inner products */ 137 PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0)); 138 xi1 = xi2 = xi3 = xi4 = 0.0; 139 for (j = 0; j < N; j++) { 140 xi1 += s[j] * s[j]; /* xi1 <- (s,s) */ 141 xi2 += t[j] * s[j]; /* xi2 <- (t,s) */ 142 xi3 += t[j] * t[j]; /* xi3 <- (t,t) */ 143 xi4 += t[j] * rp[j]; /* xi4 <- (t,rp) */ 144 } 145 PetscCall(PetscLogFlops(8.0 * N)); 146 PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0)); 147 148 insums[0] = xi1; 149 insums[1] = xi2; 150 insums[2] = xi3; 151 insums[3] = xi4; 152 153 PetscCall(PetscLogEventBegin(VEC_ReduceCommunication, 0, 0, 0, 0)); 154 PetscCallMPI(MPIU_Allreduce(insums, outsums, 4, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)ksp))); 155 PetscCall(PetscLogEventEnd(VEC_ReduceCommunication, 0, 0, 0, 0)); 156 xi1 = outsums[0]; 157 xi2 = outsums[1]; 158 xi3 = outsums[2]; 159 xi4 = outsums[3]; 160 161 /* test denominator */ 162 if ((xi3 == 0.0) || (sigma == 0.0)) { 163 PetscCheck(!ksp->errorifnotconverged, PetscObjectComm((PetscObject)ksp), PETSC_ERR_NOT_CONVERGED, "KSPSolve has failed due to zero inner product"); 164 ksp->reason = KSP_DIVERGED_BREAKDOWN; 165 PetscCall(PetscInfo(ksp, "KSPSolve has failed due to zero inner product\n")); 166 break; 167 } 168 169 /* scalar updates */ 170 omega = xi2 / xi3; 171 beta = -xi4 / sigma; 172 rho = PetscSqrtReal(PetscAbsScalar(xi1 - omega * xi2)); /* residual norm */ 173 174 /* vector updates */ 175 PetscCall(VecAXPBYPCZ(X, alpha, omega, 1.0, P2, S2)); /* x <- alpha * p2 + omega * s2 + x */ 176 177 /* convergence test */ 178 PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp)); 179 ksp->its++; 180 if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = rho; 181 else ksp->rnorm = 0; 182 PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp)); 183 PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm)); 184 PetscCall(KSPMonitor(ksp, i + 1, ksp->rnorm)); 185 PetscCall((*ksp->converged)(ksp, i + 1, ksp->rnorm, &ksp->reason, ksp->cnvP)); 186 if (ksp->reason) break; 187 188 /* vector updates */ 189 PetscCall(PetscLogEventBegin(VEC_Ops, 0, 0, 0, 0)); 190 for (j = 0; j < N; j++) { 191 r[j] = s[j] - omega * t[j]; /* r <- s - omega t */ 192 p[j] = r[j] + beta * (p[j] - omega * v[j]); /* p <- r + beta * (p - omega v) */ 193 } 194 PetscCall(PetscLogFlops(6.0 * N)); 195 PetscCall(PetscLogEventEnd(VEC_Ops, 0, 0, 0, 0)); 196 } 197 198 if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS; 199 PetscFunctionReturn(PETSC_SUCCESS); 200 } 201 202 /*MC 203 KSPFBCGSR - Implements a mathematically equivalent variant of flexible bi-CG-stab, `KSPFBCGS`. [](sec_flexibleksp) 204 205 Level: beginner 206 207 Notes: 208 This implementation requires fewer `MPI_Allreduce()` calls than `KSPFBCGS` and may converge faster 209 210 See `KSPPIPEBCGS` for a pipelined version of the algorithm 211 212 Flexible BiCGStab, unlike most Krylov methods, allows the preconditioner to be nonlinear, that is the action of the preconditioner to a vector need not be linear 213 in the vector entries. 214 215 Only supports right preconditioning 216 217 .seealso: [](ch_ksp), [](sec_flexibleksp), `KSPFBCGSR`, `KSPPIPEBCGS`, `KSPBCGSL`, `KSPBCGS`, `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `KSPBICG`, `KSPFBCGS`, `KSPSetPCSide()` 218 M*/ 219 PETSC_EXTERN PetscErrorCode KSPCreate_FBCGSR(KSP ksp) 220 { 221 KSP_BCGS *bcgs; 222 223 PetscFunctionBegin; 224 PetscCall(PetscNew(&bcgs)); 225 226 ksp->data = bcgs; 227 ksp->ops->setup = KSPSetUp_FBCGSR; 228 ksp->ops->solve = KSPSolve_FBCGSR; 229 ksp->ops->destroy = KSPDestroy_BCGS; 230 ksp->ops->reset = KSPReset_BCGS; 231 ksp->ops->buildsolution = KSPBuildSolution_BCGS; 232 ksp->ops->buildresidual = KSPBuildResidualDefault; 233 ksp->ops->setfromoptions = KSPSetFromOptions_BCGS; 234 ksp->pc_side = PC_RIGHT; /* set default PC side */ 235 236 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 3)); 237 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_RIGHT, 2)); 238 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_RIGHT, 1)); 239 PetscFunctionReturn(PETSC_SUCCESS); 240 } 241