1 2 /* 3 This file implements FBiCGStab-R. 4 Only allow right preconditioning. 5 FBiCGStab-R is a mathematically equivalent variant of FBiCGStab. Differences are: 6 (1) There are fewer MPI_Allreduce calls. 7 (2) The convergence occasionally is much faster than that of FBiCGStab. 8 */ 9 #include <../src/ksp/ksp/impls/bcgs/bcgsimpl.h> /*I "petscksp.h" I*/ 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "KSPSetUp_FBCGSR" 13 PetscErrorCode KSPSetUp_FBCGSR(KSP ksp) 14 { 15 PetscErrorCode ierr; 16 17 PetscFunctionBegin; 18 ierr = KSPDefaultGetWork(ksp,8);CHKERRQ(ierr); 19 PetscFunctionReturn(0); 20 } 21 22 #include <petsc-private/pcimpl.h> /*I "petscksp.h" I*/ 23 #undef __FUNCT__ 24 #define __FUNCT__ "KSPSolve_FBCGSR" 25 PetscErrorCode KSPSolve_FBCGSR(KSP ksp) 26 { 27 PetscErrorCode ierr; 28 PetscInt i,j,N; 29 PetscScalar tau,sigma,alpha,omega,beta; 30 PetscReal rho; 31 PetscScalar xi1,xi2,xi3,xi4; 32 Vec X,B,P,P2,RP,R,V,S,T,S2; 33 PetscScalar *PETSC_RESTRICT rp, *PETSC_RESTRICT r, *PETSC_RESTRICT p; 34 PetscScalar *PETSC_RESTRICT v, *PETSC_RESTRICT s, *PETSC_RESTRICT t, *PETSC_RESTRICT s2; 35 PetscScalar insums[4],outsums[4]; 36 KSP_BCGS *bcgs = (KSP_BCGS*)ksp->data; 37 PC pc; 38 39 PetscFunctionBegin; 40 if (!ksp->vec_rhs->petscnative) SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"Only coded for PETSc vectors"); 41 ierr = VecGetLocalSize(ksp->vec_sol,&N);CHKERRQ(ierr); 42 43 X = ksp->vec_sol; 44 B = ksp->vec_rhs; 45 P2 = ksp->work[0]; 46 47 /* The followings are involved in modified inner product calculations and vector updates */ 48 RP = ksp->work[1]; ierr = VecGetArray(RP,(PetscScalar**)&rp);CHKERRQ(ierr); ierr = VecRestoreArray(RP,PETSC_NULL);CHKERRQ(ierr); 49 R = ksp->work[2]; ierr = VecGetArray(R,(PetscScalar**)&r);CHKERRQ(ierr); ierr = VecRestoreArray(R,PETSC_NULL);CHKERRQ(ierr); 50 P = ksp->work[3]; ierr = VecGetArray(P,(PetscScalar**)&p);CHKERRQ(ierr); ierr = VecRestoreArray(P,PETSC_NULL);CHKERRQ(ierr); 51 V = ksp->work[4]; ierr = VecGetArray(V,(PetscScalar**)&v);CHKERRQ(ierr); ierr = VecRestoreArray(V,PETSC_NULL);CHKERRQ(ierr); 52 S = ksp->work[5]; ierr = VecGetArray(S,(PetscScalar**)&s);CHKERRQ(ierr); ierr = VecRestoreArray(S,PETSC_NULL);CHKERRQ(ierr); 53 T = ksp->work[6]; ierr = VecGetArray(T,(PetscScalar**)&t);CHKERRQ(ierr); ierr = VecRestoreArray(T,PETSC_NULL);CHKERRQ(ierr); 54 S2 = ksp->work[7]; ierr = VecGetArray(S2,(PetscScalar**)&s2);CHKERRQ(ierr); ierr = VecRestoreArray(S2,PETSC_NULL);CHKERRQ(ierr); 55 56 /* Only supports right preconditioning */ 57 if (ksp->pc_side != PC_RIGHT) 58 SETERRQ1(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"KSP fbcgsr does not support %s",PCSides[ksp->pc_side]); 59 if (!ksp->guess_zero) { 60 if (!bcgs->guess) { 61 ierr = VecDuplicate(X,&bcgs->guess);CHKERRQ(ierr); 62 } 63 ierr = VecCopy(X,bcgs->guess);CHKERRQ(ierr); 64 } else { 65 ierr = VecSet(X,0.0);CHKERRQ(ierr); 66 } 67 68 /* Compute initial residual */ 69 ierr = KSPGetPC(ksp,&pc);CHKERRQ(ierr); 70 ierr = PCSetUp(pc);CHKERRQ(ierr); 71 if (!ksp->guess_zero) { 72 ierr = MatMult(pc->mat,X,P2);CHKERRQ(ierr); /* P2 is used as temporary storage */ 73 ierr = VecCopy(B,R);CHKERRQ(ierr); 74 ierr = VecAXPY(R,-1.0,P2);CHKERRQ(ierr); 75 } 76 else { 77 ierr = VecCopy(B,R);CHKERRQ(ierr); 78 } 79 80 /* Test for nothing to do */ 81 if (ksp->normtype != KSP_NORM_NONE) { 82 ierr = VecNorm(R,NORM_2,&rho);CHKERRQ(ierr); 83 } 84 ierr = PetscObjectTakeAccess(ksp);CHKERRQ(ierr); 85 ksp->its = 0; 86 ksp->rnorm = rho; 87 ierr = PetscObjectGrantAccess(ksp);CHKERRQ(ierr); 88 KSPLogResidualHistory(ksp,rho); 89 ierr = KSPMonitor(ksp,0,rho);CHKERRQ(ierr); 90 ierr = (*ksp->converged)(ksp,0,rho,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); 91 if (ksp->reason) PetscFunctionReturn(0); 92 93 /* Initialize iterates */ 94 ierr = VecCopy(R,RP);CHKERRQ(ierr); /* rp <- r */ 95 ierr = VecCopy(R,P);CHKERRQ(ierr); /* p <- r */ 96 97 /* Big loop */ 98 for (i=0; i<ksp->max_it; i++) { 99 100 /* matmult and pc */ 101 ierr = PCApply(pc,P,P2);CHKERRQ(ierr); /* p2 <- K p */ 102 ierr = MatMult(pc->mat,P2,V);CHKERRQ(ierr); /* v <- A p2 */ 103 104 /* inner prodcuts */ 105 if (i==0) { 106 tau = rho*rho; 107 ierr = VecDot(V,RP,&sigma);CHKERRQ(ierr); /* sigma <- (v,rp) */ 108 } 109 else { 110 ierr = PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);CHKERRQ(ierr); 111 tau = sigma = 0.0; 112 for (j=0; j<N; j++) { 113 tau += r[j]*rp[j]; /* tau <- (r,rp) */ 114 sigma += v[j]*rp[j]; /* sigma <- (v,rp) */ 115 } 116 PetscLogFlops(4.0*N); 117 ierr = PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);CHKERRQ(ierr); 118 insums[0] = tau; 119 insums[1] = sigma; 120 ierr = PetscLogEventBarrierBegin(VEC_ReduceBarrier,0,0,0,0,((PetscObject)ksp)->comm);CHKERRQ(ierr); 121 ierr = MPI_Allreduce(insums,outsums,2,MPIU_SCALAR,MPIU_SUM,((PetscObject)ksp)->comm);CHKERRQ(ierr); 122 ierr = PetscLogEventBarrierEnd(VEC_ReduceBarrier,0,0,0,0,((PetscObject)ksp)->comm);CHKERRQ(ierr); 123 tau = outsums[0]; 124 sigma = outsums[1]; 125 } 126 127 /* scalar update */ 128 alpha = tau / sigma; 129 130 /* vector update */ 131 ierr = VecWAXPY(S,-alpha,V,R);CHKERRQ(ierr); /* s <- r - alpha v */ 132 133 /* matmult and pc */ 134 ierr = PCApply(pc,S,S2);CHKERRQ(ierr); /* s2 <- K s */ 135 ierr = MatMult(pc->mat,S2,T);CHKERRQ(ierr); /* t <- A s2 */ 136 137 /* inner prodcuts */ 138 ierr = PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);CHKERRQ(ierr); 139 xi1 = xi2 = xi3 = xi4 = 0.0; 140 for (j=0; j<N; j++) { 141 xi1 += s[j]*s[j]; /* xi1 <- (s,s) */ 142 xi2 += t[j]*s[j]; /* xi2 <- (t,s) */ 143 xi3 += t[j]*t[j]; /* xi3 <- (t,t) */ 144 xi4 += t[j]*rp[j]; /* xi4 <- (t,rp) */ 145 } 146 PetscLogFlops(8.0*N); 147 ierr = PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);CHKERRQ(ierr); 148 insums[0] = xi1; 149 insums[1] = xi2; 150 insums[2] = xi3; 151 insums[3] = xi4; 152 ierr = PetscLogEventBarrierBegin(VEC_ReduceBarrier,0,0,0,0,((PetscObject)ksp)->comm);CHKERRQ(ierr); 153 ierr = MPI_Allreduce(insums,outsums,4,MPIU_SCALAR,MPIU_SUM,((PetscObject)ksp)->comm);CHKERRQ(ierr); 154 ierr = PetscLogEventBarrierEnd(VEC_ReduceBarrier,0,0,0,0,((PetscObject)ksp)->comm);CHKERRQ(ierr); 155 xi1 = outsums[0]; 156 xi2 = outsums[1]; 157 xi3 = outsums[2]; 158 xi4 = outsums[3]; 159 160 /* test denominator */ 161 if (xi3 == 0.0) SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_PLIB,"Divide by zero"); 162 if (sigma == 0.0) SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_PLIB,"Divide by zero"); 163 164 /* scalar updates */ 165 omega = xi2 / xi3; 166 beta = - xi4 / sigma; 167 rho = PetscSqrtReal(PetscAbsScalar(xi1 - omega * xi2)); /* residual norm */ 168 169 /* vector updates */ 170 ierr = VecAXPBYPCZ(X,alpha,omega,1.0,P2,S2);CHKERRQ(ierr); /* x <- alpha * p2 + omega * s2 + x */ 171 172 /* convergence test */ 173 ierr = PetscObjectTakeAccess(ksp);CHKERRQ(ierr); 174 ksp->its++; 175 ksp->rnorm = rho; 176 ierr = PetscObjectGrantAccess(ksp);CHKERRQ(ierr); 177 KSPLogResidualHistory(ksp,rho); 178 ierr = KSPMonitor(ksp,i+1,rho);CHKERRQ(ierr); 179 ierr = (*ksp->converged)(ksp,i+1,rho,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); 180 if (ksp->reason) break; 181 182 /* vector updates */ 183 ierr = PetscLogEventBegin(VEC_Ops,0,0,0,0);CHKERRQ(ierr); 184 for (j=0; j<N; j++) { 185 r[j] = s[j] - omega * t[j]; /* r <- s - omega t */ 186 p[j] = r[j] + beta * (p[j] - omega * v[j]); /* p <- r + beta * (p - omega v) */ 187 } 188 PetscLogFlops(6.0*N); 189 ierr = PetscLogEventEnd(VEC_Ops,0,0,0,0);CHKERRQ(ierr); 190 191 } 192 193 if (i >= ksp->max_it) { 194 ksp->reason = KSP_DIVERGED_ITS; 195 } 196 197 PetscFunctionReturn(0); 198 } 199 200 /*MC 201 KSPFBCGSR - Implements a mathematically equivalent variant of FBiCGSTab. 202 203 Options Database Keys: 204 . see KSPSolve() 205 206 Level: beginner 207 208 Notes: Only allow right preconditioning 209 210 .seealso: KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPBICG, KSPFBCGSL, KSPSetPCSide() 211 M*/ 212 EXTERN_C_BEGIN 213 #undef __FUNCT__ 214 #define __FUNCT__ "KSPCreate_FBCGSR" 215 PetscErrorCode KSPCreate_FBCGSR(KSP ksp) 216 { 217 PetscErrorCode ierr; 218 KSP_BCGS *bcgs; 219 220 PetscFunctionBegin; 221 ierr = PetscNewLog(ksp,KSP_BCGS,&bcgs);CHKERRQ(ierr); 222 ksp->data = bcgs; 223 ksp->ops->setup = KSPSetUp_FBCGSR; 224 ksp->ops->solve = KSPSolve_FBCGSR; 225 ksp->ops->destroy = KSPDestroy_BCGS; 226 ksp->ops->reset = KSPReset_BCGS; 227 ksp->ops->buildsolution = KSPBuildSolution_BCGS; 228 ksp->ops->buildresidual = KSPDefaultBuildResidual; 229 ksp->ops->setfromoptions = KSPSetFromOptions_BCGS; 230 ksp->ops->view = KSPView_BCGS; 231 ksp->pc_side = PC_RIGHT; /* set default PC side */ 232 233 ierr = KSPSetSupportedNorm(ksp,KSP_NORM_PRECONDITIONED,PC_LEFT,2);CHKERRQ(ierr); 234 ierr = KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_RIGHT,1);CHKERRQ(ierr); 235 PetscFunctionReturn(0); 236 } 237 EXTERN_C_END 238