xref: /petsc/src/mat/impls/baij/seq/baijsolvnat2.c (revision 503c0ea9b45bcfbcebbb1ea5341243bbc69f0bea)
1 #include <../src/mat/impls/baij/seq/baij.h>
2 #include <petsc/private/kernels/blockinvert.h>
3 
4 /*
5       Special case where the matrix was ILU(0) factored in the natural
6    ordering. This eliminates the need for the column and row permutation.
7 */
8 PetscErrorCode MatSolve_SeqBAIJ_2_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
9 {
10   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
11   const PetscInt    n  =a->mbs,*vi,*ai=a->i,*aj=a->j,*diag=a->diag;
12   const MatScalar   *aa=a->a,*v;
13   PetscScalar       *x,s1,s2,x1,x2;
14   const PetscScalar *b;
15   PetscInt          jdx,idt,idx,nz,i;
16 
17   PetscFunctionBegin;
18   PetscCall(VecGetArrayRead(bb,&b));
19   PetscCall(VecGetArray(xx,&x));
20 
21   /* forward solve the lower triangular */
22   idx  = 0;
23   x[0] = b[0]; x[1] = b[1];
24   for (i=1; i<n; i++) {
25     v    =  aa      + 4*ai[i];
26     vi   =  aj      + ai[i];
27     nz   =  diag[i] - ai[i];
28     idx +=  2;
29     s1   =  b[idx];s2 = b[1+idx];
30     while (nz--) {
31       jdx = 2*(*vi++);
32       x1  = x[jdx];x2 = x[1+jdx];
33       s1 -= v[0]*x1 + v[2]*x2;
34       s2 -= v[1]*x1 + v[3]*x2;
35       v  += 4;
36     }
37     x[idx]   = s1;
38     x[1+idx] = s2;
39   }
40   /* backward solve the upper triangular */
41   for (i=n-1; i>=0; i--) {
42     v   = aa + 4*diag[i] + 4;
43     vi  = aj + diag[i] + 1;
44     nz  = ai[i+1] - diag[i] - 1;
45     idt = 2*i;
46     s1  = x[idt];  s2 = x[1+idt];
47     while (nz--) {
48       idx = 2*(*vi++);
49       x1  = x[idx];   x2 = x[1+idx];
50       s1 -= v[0]*x1 + v[2]*x2;
51       s2 -= v[1]*x1 + v[3]*x2;
52       v  += 4;
53     }
54     v        = aa +  4*diag[i];
55     x[idt]   = v[0]*s1 + v[2]*s2;
56     x[1+idt] = v[1]*s1 + v[3]*s2;
57   }
58 
59   PetscCall(VecRestoreArrayRead(bb,&b));
60   PetscCall(VecRestoreArray(xx,&x));
61   PetscCall(PetscLogFlops(2.0*4*(a->nz) - 2.0*A->cmap->n));
62   PetscFunctionReturn(0);
63 }
64 
65 PetscErrorCode MatSolve_SeqBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
66 {
67   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
68   const PetscInt    n  = a->mbs,*vi,*ai=a->i,*aj=a->j,*adiag=a->diag;
69   PetscInt          i,k,nz,idx,idt,jdx;
70   const MatScalar   *aa=a->a,*v;
71   PetscScalar       *x,s1,s2,x1,x2;
72   const PetscScalar *b;
73 
74   PetscFunctionBegin;
75   PetscCall(VecGetArrayRead(bb,&b));
76   PetscCall(VecGetArray(xx,&x));
77   /* forward solve the lower triangular */
78   idx  = 0;
79   x[0] = b[idx]; x[1] = b[1+idx];
80   for (i=1; i<n; i++) {
81     v   = aa + 4*ai[i];
82     vi  = aj + ai[i];
83     nz  = ai[i+1] - ai[i];
84     idx = 2*i;
85     s1  = b[idx];s2 = b[1+idx];
86     PetscPrefetchBlock(vi+nz,nz,0,PETSC_PREFETCH_HINT_NTA);
87     PetscPrefetchBlock(v+4*nz,4*nz,0,PETSC_PREFETCH_HINT_NTA);
88     for (k=0; k<nz; k++) {
89       jdx = 2*vi[k];
90       x1  = x[jdx];x2 = x[1+jdx];
91       s1 -= v[0]*x1 + v[2]*x2;
92       s2 -= v[1]*x1 + v[3]*x2;
93       v  +=  4;
94     }
95     x[idx]   = s1;
96     x[1+idx] = s2;
97   }
98 
99   /* backward solve the upper triangular */
100   for (i=n-1; i>=0; i--) {
101     v   = aa + 4*(adiag[i+1]+1);
102     vi  = aj + adiag[i+1]+1;
103     nz  = adiag[i] - adiag[i+1]-1;
104     idt = 2*i;
105     s1  = x[idt];  s2 = x[1+idt];
106     PetscPrefetchBlock(vi+nz,nz,0,PETSC_PREFETCH_HINT_NTA);
107     PetscPrefetchBlock(v+4*nz,4*nz,0,PETSC_PREFETCH_HINT_NTA);
108     for (k=0; k<nz; k++) {
109       idx = 2*vi[k];
110       x1  = x[idx];   x2 = x[1+idx];
111       s1 -= v[0]*x1 + v[2]*x2;
112       s2 -= v[1]*x1 + v[3]*x2;
113       v  += 4;
114     }
115     /* x = inv_diagonal*x */
116     x[idt]   = v[0]*s1 + v[2]*s2;
117     x[1+idt] = v[1]*s1 + v[3]*s2;
118   }
119 
120   PetscCall(VecRestoreArrayRead(bb,&b));
121   PetscCall(VecRestoreArray(xx,&x));
122   PetscCall(PetscLogFlops(2.0*4*(a->nz) - 2.0*A->cmap->n));
123   PetscFunctionReturn(0);
124 }
125 
126 PetscErrorCode MatForwardSolve_SeqBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
127 {
128   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
129   const PetscInt    n  = a->mbs,*vi,*ai=a->i,*aj=a->j;
130   PetscInt          i,k,nz,idx,jdx;
131   const MatScalar   *aa=a->a,*v;
132   PetscScalar       *x,s1,s2,x1,x2;
133   const PetscScalar *b;
134 
135   PetscFunctionBegin;
136   PetscCall(VecGetArrayRead(bb,&b));
137   PetscCall(VecGetArray(xx,&x));
138   /* forward solve the lower triangular */
139   idx  = 0;
140   x[0] = b[idx]; x[1] = b[1+idx];
141   for (i=1; i<n; i++) {
142     v   = aa + 4*ai[i];
143     vi  = aj + ai[i];
144     nz  = ai[i+1] - ai[i];
145     idx = 2*i;
146     s1  = b[idx];s2 = b[1+idx];
147     PetscPrefetchBlock(vi+nz,nz,0,PETSC_PREFETCH_HINT_NTA);
148     PetscPrefetchBlock(v+4*nz,4*nz,0,PETSC_PREFETCH_HINT_NTA);
149     for (k=0; k<nz; k++) {
150       jdx = 2*vi[k];
151       x1  = x[jdx];x2 = x[1+jdx];
152       s1 -= v[0]*x1 + v[2]*x2;
153       s2 -= v[1]*x1 + v[3]*x2;
154       v  +=  4;
155     }
156     x[idx]   = s1;
157     x[1+idx] = s2;
158   }
159 
160   PetscCall(VecRestoreArrayRead(bb,&b));
161   PetscCall(VecRestoreArray(xx,&x));
162   PetscCall(PetscLogFlops(4.0*(a->nz) - A->cmap->n));
163   PetscFunctionReturn(0);
164 }
165 
166 PetscErrorCode MatBackwardSolve_SeqBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
167 {
168   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
169   const PetscInt    n  = a->mbs,*vi,*aj=a->j,*adiag=a->diag;
170   PetscInt          i,k,nz,idx,idt;
171   const MatScalar   *aa=a->a,*v;
172   PetscScalar       *x,s1,s2,x1,x2;
173   const PetscScalar *b;
174 
175   PetscFunctionBegin;
176   PetscCall(VecGetArrayRead(bb,&b));
177   PetscCall(VecGetArray(xx,&x));
178 
179   /* backward solve the upper triangular */
180   for (i=n-1; i>=0; i--) {
181     v   = aa + 4*(adiag[i+1]+1);
182     vi  = aj + adiag[i+1]+1;
183     nz  = adiag[i] - adiag[i+1]-1;
184     idt = 2*i;
185     s1  = b[idt];  s2 = b[1+idt];
186     PetscPrefetchBlock(vi+nz,nz,0,PETSC_PREFETCH_HINT_NTA);
187     PetscPrefetchBlock(v+4*nz,4*nz,0,PETSC_PREFETCH_HINT_NTA);
188     for (k=0; k<nz; k++) {
189       idx = 2*vi[k];
190       x1  = x[idx];   x2 = x[1+idx];
191       s1 -= v[0]*x1 + v[2]*x2;
192       s2 -= v[1]*x1 + v[3]*x2;
193       v  += 4;
194     }
195     /* x = inv_diagonal*x */
196     x[idt]   = v[0]*s1 + v[2]*s2;
197     x[1+idt] = v[1]*s1 + v[3]*s2;
198   }
199 
200   PetscCall(VecRestoreArrayRead(bb,&b));
201   PetscCall(VecRestoreArray(xx,&x));
202   PetscCall(PetscLogFlops(4.0*a->nz - A->cmap->n));
203   PetscFunctionReturn(0);
204 }
205