1 2 /* 3 This file implements FBiCGStab-R. 4 Only allow right preconditioning. 5 FBiCGStab-R is a mathematically equivalent variant of FBiCGStab. Differences are: 6 (1) There are fewer MPI_Allreduce calls. 7 (2) The convergence occasionally is much faster than that of FBiCGStab. 8 */ 9 #include <../src/ksp/ksp/impls/bcgs/bcgsimpl.h> /*I "petscksp.h" I*/ 10 #include <petsc/private/vecimpl.h> 11 12 static PetscErrorCode KSPSetUp_FBCGSR(KSP ksp) 13 { 14 PetscFunctionBegin; 15 PetscCall(KSPSetWorkVecs(ksp, 8)); 16 PetscFunctionReturn(0); 17 } 18 19 static PetscErrorCode KSPSolve_FBCGSR(KSP ksp) 20 { 21 PetscInt i, j, N; 22 PetscScalar tau, sigma, alpha, omega, beta; 23 PetscReal rho; 24 PetscScalar xi1, xi2, xi3, xi4; 25 Vec X, B, P, P2, RP, R, V, S, T, S2; 26 PetscScalar *PETSC_RESTRICT rp, *PETSC_RESTRICT r, *PETSC_RESTRICT p; 27 PetscScalar *PETSC_RESTRICT v, *PETSC_RESTRICT s, *PETSC_RESTRICT t, *PETSC_RESTRICT s2; 28 PetscScalar insums[4], outsums[4]; 29 KSP_BCGS *bcgs = (KSP_BCGS *)ksp->data; 30 PC pc; 31 Mat mat; 32 33 PetscFunctionBegin; 34 PetscCheck(ksp->vec_rhs->petscnative, PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "Only coded for PETSc vectors"); 35 PetscCall(VecGetLocalSize(ksp->vec_sol, &N)); 36 37 X = ksp->vec_sol; 38 B = ksp->vec_rhs; 39 P2 = ksp->work[0]; 40 41 /* The followings are involved in modified inner product calculations and vector updates */ 42 RP = ksp->work[1]; 43 PetscCall(VecGetArray(RP, (PetscScalar **)&rp)); 44 PetscCall(VecRestoreArray(RP, NULL)); 45 R = ksp->work[2]; 46 PetscCall(VecGetArray(R, (PetscScalar **)&r)); 47 PetscCall(VecRestoreArray(R, NULL)); 48 P = ksp->work[3]; 49 PetscCall(VecGetArray(P, (PetscScalar **)&p)); 50 PetscCall(VecRestoreArray(P, NULL)); 51 V = ksp->work[4]; 52 PetscCall(VecGetArray(V, (PetscScalar **)&v)); 53 PetscCall(VecRestoreArray(V, NULL)); 54 S = ksp->work[5]; 55 PetscCall(VecGetArray(S, (PetscScalar **)&s)); 56 PetscCall(VecRestoreArray(S, NULL)); 57 T = ksp->work[6]; 58 PetscCall(VecGetArray(T, (PetscScalar **)&t)); 59 PetscCall(VecRestoreArray(T, NULL)); 60 S2 = ksp->work[7]; 61 PetscCall(VecGetArray(S2, (PetscScalar **)&s2)); 62 PetscCall(VecRestoreArray(S2, NULL)); 63 64 /* Only supports right preconditioning */ 65 PetscCheck(ksp->pc_side == PC_RIGHT, PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "KSP fbcgsr does not support %s", PCSides[ksp->pc_side]); 66 if (!ksp->guess_zero) { 67 if (!bcgs->guess) PetscCall(VecDuplicate(X, &bcgs->guess)); 68 PetscCall(VecCopy(X, bcgs->guess)); 69 } else { 70 PetscCall(VecSet(X, 0.0)); 71 } 72 73 /* Compute initial residual */ 74 PetscCall(KSPGetPC(ksp, &pc)); 75 PetscCall(PCSetUp(pc)); 76 PetscCall(PCGetOperators(pc, &mat, NULL)); 77 if (!ksp->guess_zero) { 78 PetscCall(KSP_MatMult(ksp, mat, X, P2)); /* P2 is used as temporary storage */ 79 PetscCall(VecCopy(B, R)); 80 PetscCall(VecAXPY(R, -1.0, P2)); 81 } else { 82 PetscCall(VecCopy(B, R)); 83 } 84 85 /* Test for nothing to do */ 86 PetscCall(VecNorm(R, NORM_2, &rho)); 87 PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp)); 88 ksp->its = 0; 89 if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = rho; 90 else ksp->rnorm = 0; 91 PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp)); 92 PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm)); 93 PetscCall(KSPMonitor(ksp, 0, ksp->rnorm)); 94 PetscCall((*ksp->converged)(ksp, 0, ksp->rnorm, &ksp->reason, ksp->cnvP)); 95 if (ksp->reason) PetscFunctionReturn(0); 96 97 /* Initialize iterates */ 98 PetscCall(VecCopy(R, RP)); /* rp <- r */ 99 PetscCall(VecCopy(R, P)); /* p <- r */ 100 101 /* Big loop */ 102 for (i = 0; i < ksp->max_it; i++) { 103 /* matmult and pc */ 104 PetscCall(KSP_PCApply(ksp, P, P2)); /* p2 <- K p */ 105 PetscCall(KSP_MatMult(ksp, mat, P2, V)); /* v <- A p2 */ 106 107 /* inner prodcuts */ 108 if (i == 0) { 109 tau = rho * rho; 110 PetscCall(VecDot(V, RP, &sigma)); /* sigma <- (v,rp) */ 111 } else { 112 PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0)); 113 tau = sigma = 0.0; 114 for (j = 0; j < N; j++) { 115 tau += r[j] * rp[j]; /* tau <- (r,rp) */ 116 sigma += v[j] * rp[j]; /* sigma <- (v,rp) */ 117 } 118 PetscCall(PetscLogFlops(4.0 * N)); 119 PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0)); 120 insums[0] = tau; 121 insums[1] = sigma; 122 PetscCall(PetscLogEventBegin(VEC_ReduceCommunication, 0, 0, 0, 0)); 123 PetscCall(MPIU_Allreduce(insums, outsums, 2, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)ksp))); 124 PetscCall(PetscLogEventEnd(VEC_ReduceCommunication, 0, 0, 0, 0)); 125 tau = outsums[0]; 126 sigma = outsums[1]; 127 } 128 129 /* scalar update */ 130 alpha = tau / sigma; 131 132 /* vector update */ 133 PetscCall(VecWAXPY(S, -alpha, V, R)); /* s <- r - alpha v */ 134 135 /* matmult and pc */ 136 PetscCall(KSP_PCApply(ksp, S, S2)); /* s2 <- K s */ 137 PetscCall(KSP_MatMult(ksp, mat, S2, T)); /* t <- A s2 */ 138 139 /* inner prodcuts */ 140 PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0)); 141 xi1 = xi2 = xi3 = xi4 = 0.0; 142 for (j = 0; j < N; j++) { 143 xi1 += s[j] * s[j]; /* xi1 <- (s,s) */ 144 xi2 += t[j] * s[j]; /* xi2 <- (t,s) */ 145 xi3 += t[j] * t[j]; /* xi3 <- (t,t) */ 146 xi4 += t[j] * rp[j]; /* xi4 <- (t,rp) */ 147 } 148 PetscCall(PetscLogFlops(8.0 * N)); 149 PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0)); 150 151 insums[0] = xi1; 152 insums[1] = xi2; 153 insums[2] = xi3; 154 insums[3] = xi4; 155 156 PetscCall(PetscLogEventBegin(VEC_ReduceCommunication, 0, 0, 0, 0)); 157 PetscCall(MPIU_Allreduce(insums, outsums, 4, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)ksp))); 158 PetscCall(PetscLogEventEnd(VEC_ReduceCommunication, 0, 0, 0, 0)); 159 xi1 = outsums[0]; 160 xi2 = outsums[1]; 161 xi3 = outsums[2]; 162 xi4 = outsums[3]; 163 164 /* test denominator */ 165 if ((xi3 == 0.0) || (sigma == 0.0)) { 166 PetscCheck(!ksp->errorifnotconverged, PetscObjectComm((PetscObject)ksp), PETSC_ERR_NOT_CONVERGED, "KSPSolve has failed due to zero inner product"); 167 ksp->reason = KSP_DIVERGED_BREAKDOWN; 168 PetscCall(PetscInfo(ksp, "KSPSolve has failed due to zero inner product\n")); 169 break; 170 } 171 172 /* scalar updates */ 173 omega = xi2 / xi3; 174 beta = -xi4 / sigma; 175 rho = PetscSqrtReal(PetscAbsScalar(xi1 - omega * xi2)); /* residual norm */ 176 177 /* vector updates */ 178 PetscCall(VecAXPBYPCZ(X, alpha, omega, 1.0, P2, S2)); /* x <- alpha * p2 + omega * s2 + x */ 179 180 /* convergence test */ 181 PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp)); 182 ksp->its++; 183 if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = rho; 184 else ksp->rnorm = 0; 185 PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp)); 186 PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm)); 187 PetscCall(KSPMonitor(ksp, i + 1, ksp->rnorm)); 188 PetscCall((*ksp->converged)(ksp, i + 1, ksp->rnorm, &ksp->reason, ksp->cnvP)); 189 if (ksp->reason) break; 190 191 /* vector updates */ 192 PetscCall(PetscLogEventBegin(VEC_Ops, 0, 0, 0, 0)); 193 for (j = 0; j < N; j++) { 194 r[j] = s[j] - omega * t[j]; /* r <- s - omega t */ 195 p[j] = r[j] + beta * (p[j] - omega * v[j]); /* p <- r + beta * (p - omega v) */ 196 } 197 PetscCall(PetscLogFlops(6.0 * N)); 198 PetscCall(PetscLogEventEnd(VEC_Ops, 0, 0, 0, 0)); 199 } 200 201 if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS; 202 PetscFunctionReturn(0); 203 } 204 205 /*MC 206 KSPFBCGSR - Implements a mathematically equivalent variant of FBiCGSTab. 207 208 Options Database Keys: 209 see KSPSolve() 210 211 Level: beginner 212 213 Notes: 214 Only allow right preconditioning 215 216 .seealso: `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `KSPBICG`, `KSPFBCGSL`, `KSPSetPCSide()` 217 M*/ 218 PETSC_EXTERN PetscErrorCode KSPCreate_FBCGSR(KSP ksp) 219 { 220 KSP_BCGS *bcgs; 221 222 PetscFunctionBegin; 223 PetscCall(PetscNew(&bcgs)); 224 225 ksp->data = bcgs; 226 ksp->ops->setup = KSPSetUp_FBCGSR; 227 ksp->ops->solve = KSPSolve_FBCGSR; 228 ksp->ops->destroy = KSPDestroy_BCGS; 229 ksp->ops->reset = KSPReset_BCGS; 230 ksp->ops->buildsolution = KSPBuildSolution_BCGS; 231 ksp->ops->buildresidual = KSPBuildResidualDefault; 232 ksp->ops->setfromoptions = KSPSetFromOptions_BCGS; 233 ksp->pc_side = PC_RIGHT; /* set default PC side */ 234 235 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 3)); 236 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_RIGHT, 2)); 237 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_RIGHT, 1)); 238 PetscFunctionReturn(0); 239 } 240