xref: /petsc/src/ksp/ksp/impls/bcgs/fbcgsr/fbcgsr.c (revision 9d13fa56c5c6523e02c36edc0e4e22bf2d0334a8)
1 
2 /*
3     This file implements FBiCGStab-R.
4     FBiCGStab-R is a mathematically equivalent variant of FBiCGStab. Differences are:
5       (1) There are fewer MPI_Allreduce calls.
6       (2) The convergence occasionally is much faster than that of FBiCGStab.
7 */
8 #include <../src/ksp/ksp/impls/bcgs/bcgsimpl.h> /*I  "petscksp.h"  I*/
9 #include <petsc/private/vecimpl.h>
10 
11 static PetscErrorCode KSPSetUp_FBCGSR(KSP ksp)
12 {
13   PetscFunctionBegin;
14   PetscCall(KSPSetWorkVecs(ksp, 8));
15   PetscFunctionReturn(PETSC_SUCCESS);
16 }
17 
18 static PetscErrorCode KSPSolve_FBCGSR(KSP ksp)
19 {
20   PetscInt                    i, j, N;
21   PetscScalar                 tau, sigma, alpha, omega, beta;
22   PetscReal                   rho;
23   PetscScalar                 xi1, xi2, xi3, xi4;
24   Vec                         X, B, P, P2, RP, R, V, S, T, S2;
25   PetscScalar *PETSC_RESTRICT rp, *PETSC_RESTRICT r, *PETSC_RESTRICT p;
26   PetscScalar *PETSC_RESTRICT v, *PETSC_RESTRICT s, *PETSC_RESTRICT t, *PETSC_RESTRICT s2;
27   PetscScalar insums[4], outsums[4];
28   KSP_BCGS   *bcgs = (KSP_BCGS *)ksp->data;
29   PC          pc;
30   Mat         mat;
31 
32   PetscFunctionBegin;
33   PetscCheck(ksp->vec_rhs->petscnative, PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "Only coded for PETSc vectors");
34   PetscCall(VecGetLocalSize(ksp->vec_sol, &N));
35 
36   X  = ksp->vec_sol;
37   B  = ksp->vec_rhs;
38   P2 = ksp->work[0];
39 
40   /* The following are involved in modified inner product calculations and vector updates */
41   RP = ksp->work[1];
42   PetscCall(VecGetArray(RP, (PetscScalar **)&rp));
43   PetscCall(VecRestoreArray(RP, NULL));
44   R = ksp->work[2];
45   PetscCall(VecGetArray(R, (PetscScalar **)&r));
46   PetscCall(VecRestoreArray(R, NULL));
47   P = ksp->work[3];
48   PetscCall(VecGetArray(P, (PetscScalar **)&p));
49   PetscCall(VecRestoreArray(P, NULL));
50   V = ksp->work[4];
51   PetscCall(VecGetArray(V, (PetscScalar **)&v));
52   PetscCall(VecRestoreArray(V, NULL));
53   S = ksp->work[5];
54   PetscCall(VecGetArray(S, (PetscScalar **)&s));
55   PetscCall(VecRestoreArray(S, NULL));
56   T = ksp->work[6];
57   PetscCall(VecGetArray(T, (PetscScalar **)&t));
58   PetscCall(VecRestoreArray(T, NULL));
59   S2 = ksp->work[7];
60   PetscCall(VecGetArray(S2, (PetscScalar **)&s2));
61   PetscCall(VecRestoreArray(S2, NULL));
62 
63   /* Only supports right preconditioning */
64   PetscCheck(ksp->pc_side == PC_RIGHT, PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "KSP fbcgsr does not support %s", PCSides[ksp->pc_side]);
65   if (!ksp->guess_zero) {
66     if (!bcgs->guess) PetscCall(VecDuplicate(X, &bcgs->guess));
67     PetscCall(VecCopy(X, bcgs->guess));
68   } else {
69     PetscCall(VecSet(X, 0.0));
70   }
71 
72   /* Compute initial residual */
73   PetscCall(KSPGetPC(ksp, &pc));
74   PetscCall(PCGetOperators(pc, &mat, NULL));
75   if (!ksp->guess_zero) {
76     PetscCall(KSP_MatMult(ksp, mat, X, P2)); /* P2 is used as temporary storage */
77     PetscCall(VecCopy(B, R));
78     PetscCall(VecAXPY(R, -1.0, P2));
79   } else {
80     PetscCall(VecCopy(B, R));
81   }
82 
83   /* Test for nothing to do */
84   PetscCall(VecNorm(R, NORM_2, &rho));
85   PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
86   ksp->its = 0;
87   if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = rho;
88   else ksp->rnorm = 0;
89   PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
90   PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm));
91   PetscCall(KSPMonitor(ksp, 0, ksp->rnorm));
92   PetscCall((*ksp->converged)(ksp, 0, ksp->rnorm, &ksp->reason, ksp->cnvP));
93   if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);
94 
95   /* Initialize iterates */
96   PetscCall(VecCopy(R, RP)); /* rp <- r */
97   PetscCall(VecCopy(R, P));  /* p <- r */
98 
99   /* Big loop */
100   for (i = 0; i < ksp->max_it; i++) {
101     /* matmult and pc */
102     PetscCall(KSP_PCApply(ksp, P, P2));      /* p2 <- K p */
103     PetscCall(KSP_MatMult(ksp, mat, P2, V)); /* v <- A p2 */
104 
105     /* inner prodcuts */
106     if (i == 0) {
107       tau = rho * rho;
108       PetscCall(VecDot(V, RP, &sigma)); /* sigma <- (v,rp) */
109     } else {
110       PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
111       tau = sigma = 0.0;
112       for (j = 0; j < N; j++) {
113         tau += r[j] * rp[j];   /* tau <- (r,rp) */
114         sigma += v[j] * rp[j]; /* sigma <- (v,rp) */
115       }
116       PetscCall(PetscLogFlops(4.0 * N));
117       PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
118       insums[0] = tau;
119       insums[1] = sigma;
120       PetscCall(PetscLogEventBegin(VEC_ReduceCommunication, 0, 0, 0, 0));
121       PetscCall(MPIU_Allreduce(insums, outsums, 2, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)ksp)));
122       PetscCall(PetscLogEventEnd(VEC_ReduceCommunication, 0, 0, 0, 0));
123       tau   = outsums[0];
124       sigma = outsums[1];
125     }
126 
127     /* scalar update */
128     alpha = tau / sigma;
129 
130     /* vector update */
131     PetscCall(VecWAXPY(S, -alpha, V, R)); /* s <- r - alpha v */
132 
133     /* matmult and pc */
134     PetscCall(KSP_PCApply(ksp, S, S2));      /* s2 <- K s */
135     PetscCall(KSP_MatMult(ksp, mat, S2, T)); /* t <- A s2 */
136 
137     /* inner prodcuts */
138     PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
139     xi1 = xi2 = xi3 = xi4 = 0.0;
140     for (j = 0; j < N; j++) {
141       xi1 += s[j] * s[j];  /* xi1 <- (s,s) */
142       xi2 += t[j] * s[j];  /* xi2 <- (t,s) */
143       xi3 += t[j] * t[j];  /* xi3 <- (t,t) */
144       xi4 += t[j] * rp[j]; /* xi4 <- (t,rp) */
145     }
146     PetscCall(PetscLogFlops(8.0 * N));
147     PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
148 
149     insums[0] = xi1;
150     insums[1] = xi2;
151     insums[2] = xi3;
152     insums[3] = xi4;
153 
154     PetscCall(PetscLogEventBegin(VEC_ReduceCommunication, 0, 0, 0, 0));
155     PetscCall(MPIU_Allreduce(insums, outsums, 4, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)ksp)));
156     PetscCall(PetscLogEventEnd(VEC_ReduceCommunication, 0, 0, 0, 0));
157     xi1 = outsums[0];
158     xi2 = outsums[1];
159     xi3 = outsums[2];
160     xi4 = outsums[3];
161 
162     /* test denominator */
163     if ((xi3 == 0.0) || (sigma == 0.0)) {
164       PetscCheck(!ksp->errorifnotconverged, PetscObjectComm((PetscObject)ksp), PETSC_ERR_NOT_CONVERGED, "KSPSolve has failed due to zero inner product");
165       ksp->reason = KSP_DIVERGED_BREAKDOWN;
166       PetscCall(PetscInfo(ksp, "KSPSolve has failed due to zero inner product\n"));
167       break;
168     }
169 
170     /* scalar updates */
171     omega = xi2 / xi3;
172     beta  = -xi4 / sigma;
173     rho   = PetscSqrtReal(PetscAbsScalar(xi1 - omega * xi2)); /* residual norm */
174 
175     /* vector updates */
176     PetscCall(VecAXPBYPCZ(X, alpha, omega, 1.0, P2, S2)); /* x <- alpha * p2 + omega * s2 + x */
177 
178     /* convergence test */
179     PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
180     ksp->its++;
181     if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = rho;
182     else ksp->rnorm = 0;
183     PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
184     PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm));
185     PetscCall(KSPMonitor(ksp, i + 1, ksp->rnorm));
186     PetscCall((*ksp->converged)(ksp, i + 1, ksp->rnorm, &ksp->reason, ksp->cnvP));
187     if (ksp->reason) break;
188 
189     /* vector updates */
190     PetscCall(PetscLogEventBegin(VEC_Ops, 0, 0, 0, 0));
191     for (j = 0; j < N; j++) {
192       r[j] = s[j] - omega * t[j];                 /* r <- s - omega t */
193       p[j] = r[j] + beta * (p[j] - omega * v[j]); /* p <- r + beta * (p - omega v) */
194     }
195     PetscCall(PetscLogFlops(6.0 * N));
196     PetscCall(PetscLogEventEnd(VEC_Ops, 0, 0, 0, 0));
197   }
198 
199   if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
200   PetscFunctionReturn(PETSC_SUCCESS);
201 }
202 
203 /*MC
204      KSPFBCGSR - Implements a mathematically equivalent variant of flexible bi-CG-stab, `KSPFBCGS`. [](sec_flexibleksp)
205 
206    Level: beginner
207 
208    Notes:
209    This implementation requires fewer `MPI_Allreduce()` calls than `KSPFBCGS` and may converge faster
210 
211    See `KSPPIPEBCGS` for a pipelined version of the algorithm
212 
213    Flexible BiCGStab, unlike most Krylov methods, allows the preconditioner to be nonlinear, that is the action of the preconditioner to a vector need not be linear
214    in the vector entries.
215 
216    Only supports right preconditioning
217 
218 .seealso: [](ch_ksp),  [](sec_flexibleksp), `KSPFBCGSR`, `KSPPIPEBCGS`, `KSPBCGSL`, `KSPBCGS`, `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `KSPBICG`, `KSPFBCGS`, `KSPSetPCSide()`
219 M*/
220 PETSC_EXTERN PetscErrorCode KSPCreate_FBCGSR(KSP ksp)
221 {
222   KSP_BCGS *bcgs;
223 
224   PetscFunctionBegin;
225   PetscCall(PetscNew(&bcgs));
226 
227   ksp->data                = bcgs;
228   ksp->ops->setup          = KSPSetUp_FBCGSR;
229   ksp->ops->solve          = KSPSolve_FBCGSR;
230   ksp->ops->destroy        = KSPDestroy_BCGS;
231   ksp->ops->reset          = KSPReset_BCGS;
232   ksp->ops->buildsolution  = KSPBuildSolution_BCGS;
233   ksp->ops->buildresidual  = KSPBuildResidualDefault;
234   ksp->ops->setfromoptions = KSPSetFromOptions_BCGS;
235   ksp->pc_side             = PC_RIGHT; /* set default PC side */
236 
237   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 3));
238   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_RIGHT, 2));
239   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_RIGHT, 1));
240   PetscFunctionReturn(PETSC_SUCCESS);
241 }
242