xref: /petsc/src/ksp/ksp/impls/bcgs/fbcgsr/fbcgsr.c (revision 0ec63f53104f4b8d70d10e7c493a96759e264eea)
1 
2 /*
3     This file implements FBiCGStab-R.
4     Only allow right preconditioning.
5     FBiCGStab-R is a mathematically equivalent variant of FBiCGStab. Differences are:
6       (1) There are fewer MPI_Allreduce calls.
7       (2) The convergence occasionally is much faster than that of FBiCGStab.
8 */
9 #include <../src/ksp/ksp/impls/bcgs/bcgsimpl.h>       /*I  "petscksp.h"  I*/
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "KSPSetUp_FBCGSR"
13 PetscErrorCode KSPSetUp_FBCGSR(KSP ksp)
14 {
15   PetscErrorCode ierr;
16 
17   PetscFunctionBegin;
18   ierr = KSPDefaultGetWork(ksp,8);CHKERRQ(ierr);
19   PetscFunctionReturn(0);
20 }
21 
22 #include <petsc-private/pcimpl.h>            /*I "petscksp.h" I*/
23 #undef __FUNCT__
24 #define __FUNCT__ "KSPSolve_FBCGSR"
25 PetscErrorCode  KSPSolve_FBCGSR(KSP ksp)
26 {
27   PetscErrorCode    ierr;
28   PetscInt          i,j,N;
29   PetscScalar       tau,sigma,alpha,omega,beta;
30   PetscReal         rho;
31   PetscScalar       xi1,xi2,xi3,xi4;
32   Vec               X,B,P,P2,RP,R,V,S,T,S2;
33   PetscScalar       *PETSC_RESTRICT rp, *PETSC_RESTRICT r, *PETSC_RESTRICT p;
34   PetscScalar       *PETSC_RESTRICT v, *PETSC_RESTRICT s, *PETSC_RESTRICT t, *PETSC_RESTRICT s2;
35   PetscScalar       insums[4],outsums[4];
36   KSP_BCGS          *bcgs = (KSP_BCGS*)ksp->data;
37   PC                pc;
38 
39   PetscFunctionBegin;
40   if (!ksp->vec_rhs->petscnative) SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"Only coded for PETSc vectors");
41   ierr = VecGetLocalSize(ksp->vec_sol,&N);CHKERRQ(ierr);
42 
43   X   = ksp->vec_sol;
44   B   = ksp->vec_rhs;
45   P2  = ksp->work[0];
46 
47   /* The followings are involved in modified inner product calculations and vector updates */
48   RP  = ksp->work[1]; ierr = VecGetArray(RP,(PetscScalar**)&rp);CHKERRQ(ierr); ierr = VecRestoreArray(RP,PETSC_NULL);CHKERRQ(ierr);
49   R   = ksp->work[2]; ierr = VecGetArray(R,(PetscScalar**)&r);CHKERRQ(ierr);   ierr = VecRestoreArray(R,PETSC_NULL);CHKERRQ(ierr);
50   P   = ksp->work[3]; ierr = VecGetArray(P,(PetscScalar**)&p);CHKERRQ(ierr);   ierr = VecRestoreArray(P,PETSC_NULL);CHKERRQ(ierr);
51   V   = ksp->work[4]; ierr = VecGetArray(V,(PetscScalar**)&v);CHKERRQ(ierr);   ierr = VecRestoreArray(V,PETSC_NULL);CHKERRQ(ierr);
52   S   = ksp->work[5]; ierr = VecGetArray(S,(PetscScalar**)&s);CHKERRQ(ierr);   ierr = VecRestoreArray(S,PETSC_NULL);CHKERRQ(ierr);
53   T   = ksp->work[6]; ierr = VecGetArray(T,(PetscScalar**)&t);CHKERRQ(ierr);   ierr = VecRestoreArray(T,PETSC_NULL);CHKERRQ(ierr);
54   S2  = ksp->work[7]; ierr = VecGetArray(S2,(PetscScalar**)&s2);CHKERRQ(ierr); ierr = VecRestoreArray(S2,PETSC_NULL);CHKERRQ(ierr);
55 
56   /* Only supports right preconditioning */
57   if (ksp->pc_side != PC_RIGHT)
58     SETERRQ1(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"KSP fbcgsr does not support %s",PCSides[ksp->pc_side]);
59   if (!ksp->guess_zero) {
60     if (!bcgs->guess) {
61       ierr = VecDuplicate(X,&bcgs->guess);CHKERRQ(ierr);
62     }
63     ierr = VecCopy(X,bcgs->guess);CHKERRQ(ierr);
64   } else {
65     ierr = VecSet(X,0.0);CHKERRQ(ierr);
66   }
67 
68   /* Compute initial residual */
69   ierr = KSPGetPC(ksp,&pc);CHKERRQ(ierr);
70   ierr = PCSetUp(pc);CHKERRQ(ierr);
71   if (!ksp->guess_zero) {
72     ierr = MatMult(pc->mat,X,P2);CHKERRQ(ierr); /* P2 is used as temporary storage */
73     ierr = VecCopy(B,R);CHKERRQ(ierr);
74     ierr = VecAXPY(R,-1.0,P2);CHKERRQ(ierr);
75   }
76   else {
77     ierr = VecCopy(B,R);CHKERRQ(ierr);
78   }
79 
80   /* Test for nothing to do */
81   if (ksp->normtype != KSP_NORM_NONE) {
82     ierr = VecNorm(R,NORM_2,&rho);CHKERRQ(ierr);
83   }
84   ierr = PetscObjectTakeAccess(ksp);CHKERRQ(ierr);
85   ksp->its   = 0;
86   ksp->rnorm = rho;
87   ierr = PetscObjectGrantAccess(ksp);CHKERRQ(ierr);
88   KSPLogResidualHistory(ksp,rho);
89   ierr = KSPMonitor(ksp,0,rho);CHKERRQ(ierr);
90   ierr = (*ksp->converged)(ksp,0,rho,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);
91   if (ksp->reason) PetscFunctionReturn(0);
92 
93   /* Initialize iterates */
94   ierr = VecCopy(R,RP);CHKERRQ(ierr); /* rp <- r */
95   ierr = VecCopy(R,P);CHKERRQ(ierr); /* p <- r */
96 
97   /* Big loop */
98   for (i=0; i<ksp->max_it; i++) {
99 
100     /* matmult and pc */
101     ierr = PCApply(pc,P,P2);CHKERRQ(ierr); /* p2 <- K p */
102     ierr = MatMult(pc->mat,P2,V);CHKERRQ(ierr); /* v <- A p2 */
103 
104     /* inner prodcuts */
105     if (i==0) {
106       tau = rho*rho;
107       ierr = VecDot(V,RP,&sigma);CHKERRQ(ierr); /* sigma <- (v,rp) */
108     }
109     else {
110       ierr = PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);CHKERRQ(ierr);
111       tau = sigma = 0.0;
112       for (j=0; j<N; j++) {
113         tau += r[j]*rp[j]; /* tau <- (r,rp) */
114         sigma += v[j]*rp[j]; /* sigma <- (v,rp) */
115       }
116       PetscLogFlops(4.0*N);
117       ierr = PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);CHKERRQ(ierr);
118       insums[0] = tau;
119       insums[1] = sigma;
120       ierr = PetscLogEventBarrierBegin(VEC_ReduceBarrier,0,0,0,0,((PetscObject)ksp)->comm);CHKERRQ(ierr);
121       ierr = MPI_Allreduce(insums,outsums,2,MPIU_SCALAR,MPIU_SUM,((PetscObject)ksp)->comm);CHKERRQ(ierr);
122       ierr = PetscLogEventBarrierEnd(VEC_ReduceBarrier,0,0,0,0,((PetscObject)ksp)->comm);CHKERRQ(ierr);
123       tau = outsums[0];
124       sigma = outsums[1];
125     }
126 
127     /* scalar update */
128     alpha = tau / sigma;
129 
130     /* vector update */
131     ierr = VecWAXPY(S,-alpha,V,R);CHKERRQ(ierr);  /* s <- r - alpha v */
132 
133     /* matmult and pc */
134     ierr = PCApply(pc,S,S2);CHKERRQ(ierr); /* s2 <- K s */
135     ierr = MatMult(pc->mat,S2,T);CHKERRQ(ierr); /* t <- A s2 */
136 
137     /* inner prodcuts */
138     ierr = PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);CHKERRQ(ierr);
139     xi1 = xi2 = xi3 = xi4 = 0.0;
140     for (j=0; j<N; j++) {
141       xi1 += s[j]*s[j]; /* xi1 <- (s,s) */
142       xi2 += t[j]*s[j]; /* xi2 <- (t,s) */
143       xi3 += t[j]*t[j]; /* xi3 <- (t,t) */
144       xi4 += t[j]*rp[j]; /* xi4 <- (t,rp) */
145     }
146     PetscLogFlops(8.0*N);
147     ierr = PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);CHKERRQ(ierr);
148     insums[0] = xi1;
149     insums[1] = xi2;
150     insums[2] = xi3;
151     insums[3] = xi4;
152     ierr = PetscLogEventBarrierBegin(VEC_ReduceBarrier,0,0,0,0,((PetscObject)ksp)->comm);CHKERRQ(ierr);
153     ierr = MPI_Allreduce(insums,outsums,4,MPIU_SCALAR,MPIU_SUM,((PetscObject)ksp)->comm);CHKERRQ(ierr);
154     ierr = PetscLogEventBarrierEnd(VEC_ReduceBarrier,0,0,0,0,((PetscObject)ksp)->comm);CHKERRQ(ierr);
155     xi1 = outsums[0];
156     xi2 = outsums[1];
157     xi3 = outsums[2];
158     xi4 = outsums[3];
159 
160     /* test denominator */
161     if (xi3 == 0.0) SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_PLIB,"Divide by zero");
162     if (sigma == 0.0) SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_PLIB,"Divide by zero");
163 
164     /* scalar updates */
165     omega = xi2 / xi3;
166     beta = - xi4 / sigma;
167     rho = PetscSqrtReal(PetscAbsScalar(xi1 - omega * xi2)); /* residual norm */
168 
169     /* vector updates */
170     ierr = VecAXPBYPCZ(X,alpha,omega,1.0,P2,S2);CHKERRQ(ierr); /* x <- alpha * p2 + omega * s2 + x */
171 
172     /* convergence test */
173     ierr = PetscObjectTakeAccess(ksp);CHKERRQ(ierr);
174     ksp->its++;
175     ksp->rnorm = rho;
176     ierr = PetscObjectGrantAccess(ksp);CHKERRQ(ierr);
177     KSPLogResidualHistory(ksp,rho);
178     ierr = KSPMonitor(ksp,i+1,rho);CHKERRQ(ierr);
179     ierr = (*ksp->converged)(ksp,i+1,rho,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);
180     if (ksp->reason) break;
181 
182     /* vector updates */
183     ierr = PetscLogEventBegin(VEC_Ops,0,0,0,0);CHKERRQ(ierr);
184     for (j=0; j<N; j++) {
185       r[j] = s[j] - omega * t[j]; /* r <- s - omega t */
186       p[j] = r[j] + beta * (p[j] - omega * v[j]); /* p <- r + beta * (p - omega v) */
187     }
188     PetscLogFlops(6.0*N);
189     ierr = PetscLogEventEnd(VEC_Ops,0,0,0,0);CHKERRQ(ierr);
190 
191   }
192 
193   if (i >= ksp->max_it) {
194     ksp->reason = KSP_DIVERGED_ITS;
195   }
196 
197   PetscFunctionReturn(0);
198 }
199 
200 /*MC
201      KSPFBCGSR - Implements a mathematically equivalent variant of FBiCGSTab.
202 
203    Options Database Keys:
204 .   see KSPSolve()
205 
206    Level: beginner
207 
208    Notes: Only allow right preconditioning
209 
210 .seealso:  KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPBICG, KSPFBCGSL, KSPSetPCSide()
211 M*/
212 EXTERN_C_BEGIN
213 #undef __FUNCT__
214 #define __FUNCT__ "KSPCreate_FBCGSR"
215 PetscErrorCode  KSPCreate_FBCGSR(KSP ksp)
216 {
217   PetscErrorCode ierr;
218   KSP_BCGS       *bcgs;
219 
220   PetscFunctionBegin;
221   ierr = PetscNewLog(ksp,KSP_BCGS,&bcgs);CHKERRQ(ierr);
222   ksp->data                 = bcgs;
223   ksp->ops->setup           = KSPSetUp_FBCGSR;
224   ksp->ops->solve           = KSPSolve_FBCGSR;
225   ksp->ops->destroy         = KSPDestroy_BCGS;
226   ksp->ops->reset           = KSPReset_BCGS;
227   ksp->ops->buildsolution   = KSPBuildSolution_BCGS;
228   ksp->ops->buildresidual   = KSPDefaultBuildResidual;
229   ksp->ops->setfromoptions  = KSPSetFromOptions_BCGS;
230   ksp->ops->view            = KSPView_BCGS;
231   ksp->pc_side              = PC_RIGHT; /* set default PC side */
232 
233   ierr = KSPSetSupportedNorm(ksp,KSP_NORM_PRECONDITIONED,PC_LEFT,2);CHKERRQ(ierr);
234   ierr = KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_RIGHT,1);CHKERRQ(ierr);
235   PetscFunctionReturn(0);
236 }
237 EXTERN_C_END
238