1 2 /* 3 This file implements FBiCGStab-R. 4 Only allow right preconditioning. 5 FBiCGStab-R is a mathematically equivalent variant of FBiCGStab. Differences are: 6 (1) There are fewer MPI_Allreduce calls. 7 (2) The convergence occasionally is much faster than that of FBiCGStab. 8 */ 9 #include <../src/ksp/ksp/impls/bcgs/bcgsimpl.h> /*I "petscksp.h" I*/ 10 #include <petsc/private/vecimpl.h> 11 12 static PetscErrorCode KSPSetUp_FBCGSR(KSP ksp) 13 { 14 PetscFunctionBegin; 15 PetscCall(KSPSetWorkVecs(ksp,8)); 16 PetscFunctionReturn(0); 17 } 18 19 static PetscErrorCode KSPSolve_FBCGSR(KSP ksp) 20 { 21 PetscInt i,j,N; 22 PetscScalar tau,sigma,alpha,omega,beta; 23 PetscReal rho; 24 PetscScalar xi1,xi2,xi3,xi4; 25 Vec X,B,P,P2,RP,R,V,S,T,S2; 26 PetscScalar *PETSC_RESTRICT rp, *PETSC_RESTRICT r, *PETSC_RESTRICT p; 27 PetscScalar *PETSC_RESTRICT v, *PETSC_RESTRICT s, *PETSC_RESTRICT t, *PETSC_RESTRICT s2; 28 PetscScalar insums[4],outsums[4]; 29 KSP_BCGS *bcgs = (KSP_BCGS*)ksp->data; 30 PC pc; 31 Mat mat; 32 33 PetscFunctionBegin; 34 PetscCheck(ksp->vec_rhs->petscnative,PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"Only coded for PETSc vectors"); 35 PetscCall(VecGetLocalSize(ksp->vec_sol,&N)); 36 37 X = ksp->vec_sol; 38 B = ksp->vec_rhs; 39 P2 = ksp->work[0]; 40 41 /* The followings are involved in modified inner product calculations and vector updates */ 42 RP = ksp->work[1]; PetscCall(VecGetArray(RP,(PetscScalar**)&rp)); PetscCall(VecRestoreArray(RP,NULL)); 43 R = ksp->work[2]; PetscCall(VecGetArray(R,(PetscScalar**)&r)); PetscCall(VecRestoreArray(R,NULL)); 44 P = ksp->work[3]; PetscCall(VecGetArray(P,(PetscScalar**)&p)); PetscCall(VecRestoreArray(P,NULL)); 45 V = ksp->work[4]; PetscCall(VecGetArray(V,(PetscScalar**)&v)); PetscCall(VecRestoreArray(V,NULL)); 46 S = ksp->work[5]; PetscCall(VecGetArray(S,(PetscScalar**)&s)); PetscCall(VecRestoreArray(S,NULL)); 47 T = ksp->work[6]; PetscCall(VecGetArray(T,(PetscScalar**)&t)); PetscCall(VecRestoreArray(T,NULL)); 48 S2 = ksp->work[7]; PetscCall(VecGetArray(S2,(PetscScalar**)&s2)); PetscCall(VecRestoreArray(S2,NULL)); 49 50 /* Only supports right preconditioning */ 51 PetscCheck(ksp->pc_side == PC_RIGHT,PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"KSP fbcgsr does not support %s",PCSides[ksp->pc_side]); 52 if (!ksp->guess_zero) { 53 if (!bcgs->guess) { 54 PetscCall(VecDuplicate(X,&bcgs->guess)); 55 } 56 PetscCall(VecCopy(X,bcgs->guess)); 57 } else { 58 PetscCall(VecSet(X,0.0)); 59 } 60 61 /* Compute initial residual */ 62 PetscCall(KSPGetPC(ksp,&pc)); 63 PetscCall(PCSetUp(pc)); 64 PetscCall(PCGetOperators(pc,&mat,NULL)); 65 if (!ksp->guess_zero) { 66 PetscCall(KSP_MatMult(ksp,mat,X,P2)); /* P2 is used as temporary storage */ 67 PetscCall(VecCopy(B,R)); 68 PetscCall(VecAXPY(R,-1.0,P2)); 69 } else { 70 PetscCall(VecCopy(B,R)); 71 } 72 73 /* Test for nothing to do */ 74 PetscCall(VecNorm(R,NORM_2,&rho)); 75 PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp)); 76 ksp->its = 0; 77 if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = rho; 78 else ksp->rnorm = 0; 79 PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp)); 80 PetscCall(KSPLogResidualHistory(ksp,ksp->rnorm)); 81 PetscCall(KSPMonitor(ksp,0,ksp->rnorm)); 82 PetscCall((*ksp->converged)(ksp,0,ksp->rnorm,&ksp->reason,ksp->cnvP)); 83 if (ksp->reason) PetscFunctionReturn(0); 84 85 /* Initialize iterates */ 86 PetscCall(VecCopy(R,RP)); /* rp <- r */ 87 PetscCall(VecCopy(R,P)); /* p <- r */ 88 89 /* Big loop */ 90 for (i=0; i<ksp->max_it; i++) { 91 92 /* matmult and pc */ 93 PetscCall(KSP_PCApply(ksp,P,P2)); /* p2 <- K p */ 94 PetscCall(KSP_MatMult(ksp,mat,P2,V)); /* v <- A p2 */ 95 96 /* inner prodcuts */ 97 if (i==0) { 98 tau = rho*rho; 99 PetscCall(VecDot(V,RP,&sigma)); /* sigma <- (v,rp) */ 100 } else { 101 PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0)); 102 tau = sigma = 0.0; 103 for (j=0; j<N; j++) { 104 tau += r[j]*rp[j]; /* tau <- (r,rp) */ 105 sigma += v[j]*rp[j]; /* sigma <- (v,rp) */ 106 } 107 PetscCall(PetscLogFlops(4.0*N)); 108 PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0)); 109 insums[0] = tau; 110 insums[1] = sigma; 111 PetscCall(PetscLogEventBegin(VEC_ReduceCommunication,0,0,0,0)); 112 PetscCall(MPIU_Allreduce(insums,outsums,2,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)ksp))); 113 PetscCall(PetscLogEventEnd(VEC_ReduceCommunication,0,0,0,0)); 114 tau = outsums[0]; 115 sigma = outsums[1]; 116 } 117 118 /* scalar update */ 119 alpha = tau / sigma; 120 121 /* vector update */ 122 PetscCall(VecWAXPY(S,-alpha,V,R)); /* s <- r - alpha v */ 123 124 /* matmult and pc */ 125 PetscCall(KSP_PCApply(ksp,S,S2)); /* s2 <- K s */ 126 PetscCall(KSP_MatMult(ksp,mat,S2,T)); /* t <- A s2 */ 127 128 /* inner prodcuts */ 129 PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0)); 130 xi1 = xi2 = xi3 = xi4 = 0.0; 131 for (j=0; j<N; j++) { 132 xi1 += s[j]*s[j]; /* xi1 <- (s,s) */ 133 xi2 += t[j]*s[j]; /* xi2 <- (t,s) */ 134 xi3 += t[j]*t[j]; /* xi3 <- (t,t) */ 135 xi4 += t[j]*rp[j]; /* xi4 <- (t,rp) */ 136 } 137 PetscCall(PetscLogFlops(8.0*N)); 138 PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0)); 139 140 insums[0] = xi1; 141 insums[1] = xi2; 142 insums[2] = xi3; 143 insums[3] = xi4; 144 145 PetscCall(PetscLogEventBegin(VEC_ReduceCommunication,0,0,0,0)); 146 PetscCall(MPIU_Allreduce(insums,outsums,4,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)ksp))); 147 PetscCall(PetscLogEventEnd(VEC_ReduceCommunication,0,0,0,0)); 148 xi1 = outsums[0]; 149 xi2 = outsums[1]; 150 xi3 = outsums[2]; 151 xi4 = outsums[3]; 152 153 /* test denominator */ 154 if ((xi3 == 0.0) || (sigma == 0.0)) { 155 PetscCheck(!ksp->errorifnotconverged,PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"KSPSolve has failed due to zero inner product"); 156 ksp->reason = KSP_DIVERGED_BREAKDOWN; 157 PetscCall(PetscInfo(ksp,"KSPSolve has failed due to zero inner product\n")); 158 break; 159 } 160 161 /* scalar updates */ 162 omega = xi2 / xi3; 163 beta = -xi4 / sigma; 164 rho = PetscSqrtReal(PetscAbsScalar(xi1 - omega * xi2)); /* residual norm */ 165 166 /* vector updates */ 167 PetscCall(VecAXPBYPCZ(X,alpha,omega,1.0,P2,S2)); /* x <- alpha * p2 + omega * s2 + x */ 168 169 /* convergence test */ 170 PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp)); 171 ksp->its++; 172 if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = rho; 173 else ksp->rnorm = 0; 174 PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp)); 175 PetscCall(KSPLogResidualHistory(ksp,ksp->rnorm)); 176 PetscCall(KSPMonitor(ksp,i+1,ksp->rnorm)); 177 PetscCall((*ksp->converged)(ksp,i+1,ksp->rnorm,&ksp->reason,ksp->cnvP)); 178 if (ksp->reason) break; 179 180 /* vector updates */ 181 PetscCall(PetscLogEventBegin(VEC_Ops,0,0,0,0)); 182 for (j=0; j<N; j++) { 183 r[j] = s[j] - omega * t[j]; /* r <- s - omega t */ 184 p[j] = r[j] + beta * (p[j] - omega * v[j]); /* p <- r + beta * (p - omega v) */ 185 } 186 PetscCall(PetscLogFlops(6.0*N)); 187 PetscCall(PetscLogEventEnd(VEC_Ops,0,0,0,0)); 188 189 } 190 191 if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS; 192 PetscFunctionReturn(0); 193 } 194 195 /*MC 196 KSPFBCGSR - Implements a mathematically equivalent variant of FBiCGSTab. 197 198 Options Database Keys: 199 see KSPSolve() 200 201 Level: beginner 202 203 Notes: 204 Only allow right preconditioning 205 206 .seealso: `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `KSPBICG`, `KSPFBCGSL`, `KSPSetPCSide()` 207 M*/ 208 PETSC_EXTERN PetscErrorCode KSPCreate_FBCGSR(KSP ksp) 209 { 210 KSP_BCGS *bcgs; 211 212 PetscFunctionBegin; 213 PetscCall(PetscNewLog(ksp,&bcgs)); 214 215 ksp->data = bcgs; 216 ksp->ops->setup = KSPSetUp_FBCGSR; 217 ksp->ops->solve = KSPSolve_FBCGSR; 218 ksp->ops->destroy = KSPDestroy_BCGS; 219 ksp->ops->reset = KSPReset_BCGS; 220 ksp->ops->buildsolution = KSPBuildSolution_BCGS; 221 ksp->ops->buildresidual = KSPBuildResidualDefault; 222 ksp->ops->setfromoptions = KSPSetFromOptions_BCGS; 223 ksp->pc_side = PC_RIGHT; /* set default PC side */ 224 225 PetscCall(KSPSetSupportedNorm(ksp,KSP_NORM_PRECONDITIONED,PC_LEFT,3)); 226 PetscCall(KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_RIGHT,2)); 227 PetscCall(KSPSetSupportedNorm(ksp,KSP_NORM_NONE,PC_RIGHT,1)); 228 PetscFunctionReturn(0); 229 } 230