1 2 /* 3 This file implements FBiCGStab-R. 4 Only allow right preconditioning. 5 FBiCGStab-R is a mathematically equivalent variant of FBiCGStab. Differences are: 6 (1) There are fewer MPI_Allreduce calls. 7 (2) The convergence occasionally is much faster than that of FBiCGStab. 8 */ 9 #include <../src/ksp/ksp/impls/bcgs/bcgsimpl.h> /*I "petscksp.h" I*/ 10 #include <petsc/private/vecimpl.h> 11 12 static PetscErrorCode KSPSetUp_FBCGSR(KSP ksp) { 13 PetscFunctionBegin; 14 PetscCall(KSPSetWorkVecs(ksp, 8)); 15 PetscFunctionReturn(0); 16 } 17 18 static PetscErrorCode KSPSolve_FBCGSR(KSP ksp) { 19 PetscInt i, j, N; 20 PetscScalar tau, sigma, alpha, omega, beta; 21 PetscReal rho; 22 PetscScalar xi1, xi2, xi3, xi4; 23 Vec X, B, P, P2, RP, R, V, S, T, S2; 24 PetscScalar *PETSC_RESTRICT rp, *PETSC_RESTRICT r, *PETSC_RESTRICT p; 25 PetscScalar *PETSC_RESTRICT v, *PETSC_RESTRICT s, *PETSC_RESTRICT t, *PETSC_RESTRICT s2; 26 PetscScalar insums[4], outsums[4]; 27 KSP_BCGS *bcgs = (KSP_BCGS *)ksp->data; 28 PC pc; 29 Mat mat; 30 31 PetscFunctionBegin; 32 PetscCheck(ksp->vec_rhs->petscnative, PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "Only coded for PETSc vectors"); 33 PetscCall(VecGetLocalSize(ksp->vec_sol, &N)); 34 35 X = ksp->vec_sol; 36 B = ksp->vec_rhs; 37 P2 = ksp->work[0]; 38 39 /* The followings are involved in modified inner product calculations and vector updates */ 40 RP = ksp->work[1]; 41 PetscCall(VecGetArray(RP, (PetscScalar **)&rp)); 42 PetscCall(VecRestoreArray(RP, NULL)); 43 R = ksp->work[2]; 44 PetscCall(VecGetArray(R, (PetscScalar **)&r)); 45 PetscCall(VecRestoreArray(R, NULL)); 46 P = ksp->work[3]; 47 PetscCall(VecGetArray(P, (PetscScalar **)&p)); 48 PetscCall(VecRestoreArray(P, NULL)); 49 V = ksp->work[4]; 50 PetscCall(VecGetArray(V, (PetscScalar **)&v)); 51 PetscCall(VecRestoreArray(V, NULL)); 52 S = ksp->work[5]; 53 PetscCall(VecGetArray(S, (PetscScalar **)&s)); 54 PetscCall(VecRestoreArray(S, NULL)); 55 T = ksp->work[6]; 56 PetscCall(VecGetArray(T, (PetscScalar **)&t)); 57 PetscCall(VecRestoreArray(T, NULL)); 58 S2 = ksp->work[7]; 59 PetscCall(VecGetArray(S2, (PetscScalar **)&s2)); 60 PetscCall(VecRestoreArray(S2, NULL)); 61 62 /* Only supports right preconditioning */ 63 PetscCheck(ksp->pc_side == PC_RIGHT, PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "KSP fbcgsr does not support %s", PCSides[ksp->pc_side]); 64 if (!ksp->guess_zero) { 65 if (!bcgs->guess) PetscCall(VecDuplicate(X, &bcgs->guess)); 66 PetscCall(VecCopy(X, bcgs->guess)); 67 } else { 68 PetscCall(VecSet(X, 0.0)); 69 } 70 71 /* Compute initial residual */ 72 PetscCall(KSPGetPC(ksp, &pc)); 73 PetscCall(PCSetUp(pc)); 74 PetscCall(PCGetOperators(pc, &mat, NULL)); 75 if (!ksp->guess_zero) { 76 PetscCall(KSP_MatMult(ksp, mat, X, P2)); /* P2 is used as temporary storage */ 77 PetscCall(VecCopy(B, R)); 78 PetscCall(VecAXPY(R, -1.0, P2)); 79 } else { 80 PetscCall(VecCopy(B, R)); 81 } 82 83 /* Test for nothing to do */ 84 PetscCall(VecNorm(R, NORM_2, &rho)); 85 PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp)); 86 ksp->its = 0; 87 if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = rho; 88 else ksp->rnorm = 0; 89 PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp)); 90 PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm)); 91 PetscCall(KSPMonitor(ksp, 0, ksp->rnorm)); 92 PetscCall((*ksp->converged)(ksp, 0, ksp->rnorm, &ksp->reason, ksp->cnvP)); 93 if (ksp->reason) PetscFunctionReturn(0); 94 95 /* Initialize iterates */ 96 PetscCall(VecCopy(R, RP)); /* rp <- r */ 97 PetscCall(VecCopy(R, P)); /* p <- r */ 98 99 /* Big loop */ 100 for (i = 0; i < ksp->max_it; i++) { 101 /* matmult and pc */ 102 PetscCall(KSP_PCApply(ksp, P, P2)); /* p2 <- K p */ 103 PetscCall(KSP_MatMult(ksp, mat, P2, V)); /* v <- A p2 */ 104 105 /* inner prodcuts */ 106 if (i == 0) { 107 tau = rho * rho; 108 PetscCall(VecDot(V, RP, &sigma)); /* sigma <- (v,rp) */ 109 } else { 110 PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0)); 111 tau = sigma = 0.0; 112 for (j = 0; j < N; j++) { 113 tau += r[j] * rp[j]; /* tau <- (r,rp) */ 114 sigma += v[j] * rp[j]; /* sigma <- (v,rp) */ 115 } 116 PetscCall(PetscLogFlops(4.0 * N)); 117 PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0)); 118 insums[0] = tau; 119 insums[1] = sigma; 120 PetscCall(PetscLogEventBegin(VEC_ReduceCommunication, 0, 0, 0, 0)); 121 PetscCall(MPIU_Allreduce(insums, outsums, 2, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)ksp))); 122 PetscCall(PetscLogEventEnd(VEC_ReduceCommunication, 0, 0, 0, 0)); 123 tau = outsums[0]; 124 sigma = outsums[1]; 125 } 126 127 /* scalar update */ 128 alpha = tau / sigma; 129 130 /* vector update */ 131 PetscCall(VecWAXPY(S, -alpha, V, R)); /* s <- r - alpha v */ 132 133 /* matmult and pc */ 134 PetscCall(KSP_PCApply(ksp, S, S2)); /* s2 <- K s */ 135 PetscCall(KSP_MatMult(ksp, mat, S2, T)); /* t <- A s2 */ 136 137 /* inner prodcuts */ 138 PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0)); 139 xi1 = xi2 = xi3 = xi4 = 0.0; 140 for (j = 0; j < N; j++) { 141 xi1 += s[j] * s[j]; /* xi1 <- (s,s) */ 142 xi2 += t[j] * s[j]; /* xi2 <- (t,s) */ 143 xi3 += t[j] * t[j]; /* xi3 <- (t,t) */ 144 xi4 += t[j] * rp[j]; /* xi4 <- (t,rp) */ 145 } 146 PetscCall(PetscLogFlops(8.0 * N)); 147 PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0)); 148 149 insums[0] = xi1; 150 insums[1] = xi2; 151 insums[2] = xi3; 152 insums[3] = xi4; 153 154 PetscCall(PetscLogEventBegin(VEC_ReduceCommunication, 0, 0, 0, 0)); 155 PetscCall(MPIU_Allreduce(insums, outsums, 4, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)ksp))); 156 PetscCall(PetscLogEventEnd(VEC_ReduceCommunication, 0, 0, 0, 0)); 157 xi1 = outsums[0]; 158 xi2 = outsums[1]; 159 xi3 = outsums[2]; 160 xi4 = outsums[3]; 161 162 /* test denominator */ 163 if ((xi3 == 0.0) || (sigma == 0.0)) { 164 PetscCheck(!ksp->errorifnotconverged, PetscObjectComm((PetscObject)ksp), PETSC_ERR_NOT_CONVERGED, "KSPSolve has failed due to zero inner product"); 165 ksp->reason = KSP_DIVERGED_BREAKDOWN; 166 PetscCall(PetscInfo(ksp, "KSPSolve has failed due to zero inner product\n")); 167 break; 168 } 169 170 /* scalar updates */ 171 omega = xi2 / xi3; 172 beta = -xi4 / sigma; 173 rho = PetscSqrtReal(PetscAbsScalar(xi1 - omega * xi2)); /* residual norm */ 174 175 /* vector updates */ 176 PetscCall(VecAXPBYPCZ(X, alpha, omega, 1.0, P2, S2)); /* x <- alpha * p2 + omega * s2 + x */ 177 178 /* convergence test */ 179 PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp)); 180 ksp->its++; 181 if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = rho; 182 else ksp->rnorm = 0; 183 PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp)); 184 PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm)); 185 PetscCall(KSPMonitor(ksp, i + 1, ksp->rnorm)); 186 PetscCall((*ksp->converged)(ksp, i + 1, ksp->rnorm, &ksp->reason, ksp->cnvP)); 187 if (ksp->reason) break; 188 189 /* vector updates */ 190 PetscCall(PetscLogEventBegin(VEC_Ops, 0, 0, 0, 0)); 191 for (j = 0; j < N; j++) { 192 r[j] = s[j] - omega * t[j]; /* r <- s - omega t */ 193 p[j] = r[j] + beta * (p[j] - omega * v[j]); /* p <- r + beta * (p - omega v) */ 194 } 195 PetscCall(PetscLogFlops(6.0 * N)); 196 PetscCall(PetscLogEventEnd(VEC_Ops, 0, 0, 0, 0)); 197 } 198 199 if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS; 200 PetscFunctionReturn(0); 201 } 202 203 /*MC 204 KSPFBCGSR - Implements a mathematically equivalent variant of FBiCGSTab. 205 206 Options Database Keys: 207 see KSPSolve() 208 209 Level: beginner 210 211 Notes: 212 Only allow right preconditioning 213 214 .seealso: `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `KSPBICG`, `KSPFBCGSL`, `KSPSetPCSide()` 215 M*/ 216 PETSC_EXTERN PetscErrorCode KSPCreate_FBCGSR(KSP ksp) { 217 KSP_BCGS *bcgs; 218 219 PetscFunctionBegin; 220 PetscCall(PetscNew(&bcgs)); 221 222 ksp->data = bcgs; 223 ksp->ops->setup = KSPSetUp_FBCGSR; 224 ksp->ops->solve = KSPSolve_FBCGSR; 225 ksp->ops->destroy = KSPDestroy_BCGS; 226 ksp->ops->reset = KSPReset_BCGS; 227 ksp->ops->buildsolution = KSPBuildSolution_BCGS; 228 ksp->ops->buildresidual = KSPBuildResidualDefault; 229 ksp->ops->setfromoptions = KSPSetFromOptions_BCGS; 230 ksp->pc_side = PC_RIGHT; /* set default PC side */ 231 232 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 3)); 233 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_RIGHT, 2)); 234 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_RIGHT, 1)); 235 PetscFunctionReturn(0); 236 } 237