xref: /petsc/src/ksp/pc/impls/sor/sor.tex (revision 1c6d4a299348c3e3fbd4bbe907a6e5e502df3fdf)
1*1c6d4a29SBarry Smith\documentclass[11pt,english,pdftex]{article}
2*1c6d4a29SBarry Smith\usepackage{hanging} % added DRE
3*1c6d4a29SBarry Smith\usepackage{times}
4*1c6d4a29SBarry Smith\usepackage{url}
5*1c6d4a29SBarry Smith\usepackage[T1]{fontenc}
6*1c6d4a29SBarry Smith\usepackage[latin1]{inputenc}
7*1c6d4a29SBarry Smith\usepackage{geometry}
8*1c6d4a29SBarry Smith\geometry{verbose,letterpaper,tmargin=1.in,bmargin=1.in,lmargin=1.0in,rmargin=1.0in}
9*1c6d4a29SBarry Smith
10*1c6d4a29SBarry Smith\begin{document}
11*1c6d4a29SBarry SmithConsider the matrix problem $ A x = b$, where $A = L + U + D$.
12*1c6d4a29SBarry Smith
13*1c6d4a29SBarry Smith\section*{Notes on SOR Implementation}
14*1c6d4a29SBarry Smith
15*1c6d4a29SBarry Smith
16*1c6d4a29SBarry SmithSymmetric successive over-relaxation as a simple iterative solver can be written as the two-step process
17*1c6d4a29SBarry Smith\[
18*1c6d4a29SBarry Smithx_i^{n+1/2} =  x_i^n + \omega A_{ii}^{-1}( b_i - \sum_{j < i} A_{ij} x_j^{n+1/2} - \sum_{j \ge i} A_{ij} x_j^{n}) = (1 - \omega) x_i^n + \omega A_{ii}^{-1}( b_i - \sum_{j < i} A_{ij} x_j^{n+1/2} - \sum_{j > i} A_{ij} x_j^{n})
19*1c6d4a29SBarry Smith\]
20*1c6d4a29SBarry Smithfor $ i=1,2,...n$. Followed by
21*1c6d4a29SBarry Smith\[
22*1c6d4a29SBarry Smithx_i^{n+1} = x_i^{n+1/2} + \omega A_{ii}^{-1}( b_i - \sum_{j \le i} A_{ij} x_j^{n+1/2}  - \sum_{j > i} A_{ij} x_j^{n+1}) = (1 - \omega) x_i^{n+1/2} + \omega A_{ii}^{-1}( b_i - \sum_{j < i} A_{ij} x_j^{n+1/2}  - \sum_{j > i} A_{ij} x_j^{n+1})
23*1c6d4a29SBarry Smith\]
24*1c6d4a29SBarry Smithfor $ i=n,n-1,....1$. It is called over-relaxation because generally $ \omega $ is greater then one, though on occasion underrelaxation with $ \omega < 1$  has the fastest convergence.
25*1c6d4a29SBarry Smith
26*1c6d4a29SBarry SmithTo use this as a preconditioner, just start with $x^0 = $ to obtain
27*1c6d4a29SBarry Smith\[
28*1c6d4a29SBarry Smithx_i^{1/2} =  \omega A_{ii}^{-1}( b_i - \sum_{j < i} A_{ij} x_j^{1/2})
29*1c6d4a29SBarry Smith\]
30*1c6d4a29SBarry Smithfor $ i=1,2,...n$. Followed by
31*1c6d4a29SBarry Smith\[
32*1c6d4a29SBarry Smithx_i = (1 - \omega) x_i^{1/2} + \omega A_{ii}^{-1}( b_i - \sum_{j < i} A_{ij} x_j^{1/2} - \sum_{j > i} A_{ij} x_j)
33*1c6d4a29SBarry Smith\]
34*1c6d4a29SBarry Smithfor $ i=n,n-1,....1$.
35*1c6d4a29SBarry Smith
36*1c6d4a29SBarry SmithRewriting in matrix form
37*1c6d4a29SBarry Smith\[
38*1c6d4a29SBarry Smithx^{1/2} = \omega (L + D)^{-1} b
39*1c6d4a29SBarry Smith\]
40*1c6d4a29SBarry Smith\[
41*1c6d4a29SBarry Smithx = (1 - \omega) x^{1/2} + \omega (U + D)^{-1}(b - L x^{1/2}) = x^{1/2} + \omega (U+D)^{-1}(b - A x^{1/2}).
42*1c6d4a29SBarry Smith\]
43*1c6d4a29SBarry Smith
44*1c6d4a29SBarry SmithFor the SBAIJ matrix format
45*1c6d4a29SBarry Smith\begin{verbatim}
46*1c6d4a29SBarry Smithv  = aa + 1;
47*1c6d4a29SBarry Smith      vj = aj + 1;
48*1c6d4a29SBarry Smith      for (i=0; i<m; i++){
49*1c6d4a29SBarry Smith        nz = ai[i+1] - ai[i] - 1;
50*1c6d4a29SBarry Smith        tmp = - (x[i] = omega*t[i]*aidiag[i]);
51*1c6d4a29SBarry Smith        for (j=0; j<nz; j++) {
52*1c6d4a29SBarry Smith          t[vj[j]] += tmp*v[j];
53*1c6d4a29SBarry Smith        }
54*1c6d4a29SBarry Smith        v  += nz + 1;
55*1c6d4a29SBarry Smith        vj += nz + 1;
56*1c6d4a29SBarry Smith      }
57*1c6d4a29SBarry Smith\end{verbatim}
58*1c6d4a29SBarry Smiththe array $t$ starts with the value of $b $ and is updated a column of the matrix at at time to contain the value of $ (b - L x^{1/2})$ that
59*1c6d4a29SBarry Smithare then needed in the upper triangular solve
60*1c6d4a29SBarry Smith\begin{verbatim}
61*1c6d4a29SBarry Smith      v  = aa + ai[m-1] + 1;
62*1c6d4a29SBarry Smith      vj = aj + ai[m-1] + 1;
63*1c6d4a29SBarry Smith      nz = 0;
64*1c6d4a29SBarry Smith      for (i=m-1; i>=0; i--){
65*1c6d4a29SBarry Smith        sum = 0.0;
66*1c6d4a29SBarry Smith        nz2 = ai[i] - ai[i-1] - 1;
67*1c6d4a29SBarry Smith	PETSC_Prefetch(v-nz2-1,0,1);
68*1c6d4a29SBarry Smith	PETSC_Prefetch(vj-nz2-1,0,1);
69*1c6d4a29SBarry Smith        PetscSparseDensePlusDot(sum,x,v,vj,nz);
70*1c6d4a29SBarry Smith        sum = t[i] - sum;
71*1c6d4a29SBarry Smith        x[i] =   (1-omega)*x[i] + omega*sum*aidiag[i];
72*1c6d4a29SBarry Smith        nz  = nz2;
73*1c6d4a29SBarry Smith        v  -= nz + 1;
74*1c6d4a29SBarry Smith        vj -= nz + 1;
75*1c6d4a29SBarry Smith      }
76*1c6d4a29SBarry Smith\end{verbatim}
77*1c6d4a29SBarry SmithSince the values in $ aa[]$ and $ aj[]$ are visited ``backwards'', the prefetch is used to load the needed previous row of matrix values and column indices into cache before they are needed.
78*1c6d4a29SBarry Smith
79*1c6d4a29SBarry SmithFor the AIJ format $t$ is updated a row at a time to contain $ (b - Lx^{1/2}).$
80*1c6d4a29SBarry Smith
81*1c6d4a29SBarry Smith
82*1c6d4a29SBarry Smith\section*{Notes on Sequential Eisenstat Implementation}
83*1c6d4a29SBarry Smith
84*1c6d4a29SBarry Smith
85*1c6d4a29SBarry Smith\[
86*1c6d4a29SBarry Smith  x = \omega (L + D)^{-1}b
87*1c6d4a29SBarry Smith\]
88*1c6d4a29SBarry Smithis the same as
89*1c6d4a29SBarry Smith\[
90*1c6d4a29SBarry Smith   x_i = \omega D_{ii}^{-1}(b_i - \sum_{j<i} A_{ij} x_j)
91*1c6d4a29SBarry Smith\]
92*1c6d4a29SBarry Smith\[
93*1c6d4a29SBarry Smith   x_i = (D_{ii}/\omega)^{-1}(b_i - \sum_{j<i} A_{ij} x_j)
94*1c6d4a29SBarry Smith\]
95*1c6d4a29SBarry Smithresulting in
96*1c6d4a29SBarry Smith\[
97*1c6d4a29SBarry Smith  x =  (L + D/\omega)^{-1}b
98*1c6d4a29SBarry Smith\]
99*1c6d4a29SBarry Smith
100*1c6d4a29SBarry SmithRather than applying the left preconditioner obtained by apply the two step process $ (L + D/\omega)^{-1} $ and then $ (U + D/\omega)^{-1} $
101*1c6d4a29SBarry Smithone can apply the two ``halves'' of the preconditioner symmetrically to the system resulting in
102*1c6d4a29SBarry Smith\[
103*1c6d4a29SBarry Smith (L + D/\omega)^{-1} A (U + D/\omega)^{-1} y = (L + D/\omega)^{-1} b.
104*1c6d4a29SBarry Smith\]
105*1c6d4a29SBarry SmithThen after this system is solved, $ x = (U + D/\omega)^{-1} y$. If an initial guess that is nonzero is supplied then the
106*1c6d4a29SBarry Smithinitial guess for $ y$ must be computed via $ y = (U + D/\omega) x$.
107*1c6d4a29SBarry Smith\begin{eqnarray*}
108*1c6d4a29SBarry Smith (L + D/\omega)^{-1} A (U + D/\omega)^{-1} & =  & (L + D/\omega)^{-1} (L + D + U) (U + D/\omega)^{-1} \\
109*1c6d4a29SBarry Smith  & = &  (L + D/\omega)^{-1} (L + D/\omega + U + D/\omega + D - 2D/\omega) (U + D/\omega)^{-1} \\
110*1c6d4a29SBarry Smith  & = &  (U + D/\omega)^{-1} + (L+D/\omega)^{-1}(I + \frac{\omega - 2}{\omega}D(U + D/\omega)^{-1}).
111*1c6d4a29SBarry Smith\end{eqnarray*}
112*1c6d4a29SBarry Smith
113*1c6d4a29SBarry Smith\end{document}
114