xref: /petsc/src/mat/impls/baij/seq/baijsolvnat5.c (revision 503c0ea9b45bcfbcebbb1ea5341243bbc69f0bea)
1 #include <../src/mat/impls/baij/seq/baij.h>
2 #include <petsc/private/kernels/blockinvert.h>
3 
4 PetscErrorCode MatSolve_SeqBAIJ_5_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
5 {
6   Mat_SeqBAIJ       *a   = (Mat_SeqBAIJ*)A->data;
7   const PetscInt    *diag=a->diag,n=a->mbs,*vi,*ai=a->i,*aj=a->j;
8   PetscInt          i,nz,idx,idt,jdx;
9   const MatScalar   *aa=a->a,*v;
10   PetscScalar       *x,s1,s2,s3,s4,s5,x1,x2,x3,x4,x5;
11   const PetscScalar *b;
12 
13   PetscFunctionBegin;
14   PetscCall(VecGetArrayRead(bb,&b));
15   PetscCall(VecGetArray(xx,&x));
16   /* forward solve the lower triangular */
17   idx  = 0;
18   x[0] = b[idx]; x[1] = b[1+idx]; x[2] = b[2+idx]; x[3] = b[3+idx];x[4] = b[4+idx];
19   for (i=1; i<n; i++) {
20     v   =  aa + 25*ai[i];
21     vi  =  aj + ai[i];
22     nz  =  diag[i] - ai[i];
23     idx =  5*i;
24     s1  =  b[idx];s2 = b[1+idx];s3 = b[2+idx];s4 = b[3+idx];s5 = b[4+idx];
25     while (nz--) {
26       jdx = 5*(*vi++);
27       x1  = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];x4 = x[3+jdx];x5 = x[4+jdx];
28       s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
29       s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
30       s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
31       s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
32       s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
33       v  += 25;
34     }
35     x[idx]   = s1;
36     x[1+idx] = s2;
37     x[2+idx] = s3;
38     x[3+idx] = s4;
39     x[4+idx] = s5;
40   }
41   /* backward solve the upper triangular */
42   for (i=n-1; i>=0; i--) {
43     v   = aa + 25*diag[i] + 25;
44     vi  = aj + diag[i] + 1;
45     nz  = ai[i+1] - diag[i] - 1;
46     idt = 5*i;
47     s1  = x[idt];  s2 = x[1+idt];
48     s3  = x[2+idt];s4 = x[3+idt]; s5 = x[4+idt];
49     while (nz--) {
50       idx = 5*(*vi++);
51       x1  = x[idx];   x2 = x[1+idx];x3    = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
52       s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
53       s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
54       s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
55       s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
56       s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
57       v  += 25;
58     }
59     v        = aa + 25*diag[i];
60     x[idt]   = v[0]*s1 + v[5]*s2 + v[10]*s3  + v[15]*s4 + v[20]*s5;
61     x[1+idt] = v[1]*s1 + v[6]*s2 + v[11]*s3  + v[16]*s4 + v[21]*s5;
62     x[2+idt] = v[2]*s1 + v[7]*s2 + v[12]*s3  + v[17]*s4 + v[22]*s5;
63     x[3+idt] = v[3]*s1 + v[8]*s2 + v[13]*s3  + v[18]*s4 + v[23]*s5;
64     x[4+idt] = v[4]*s1 + v[9]*s2 + v[14]*s3  + v[19]*s4 + v[24]*s5;
65   }
66 
67   PetscCall(VecRestoreArrayRead(bb,&b));
68   PetscCall(VecRestoreArray(xx,&x));
69   PetscCall(PetscLogFlops(2.0*25*(a->nz) - 5.0*A->cmap->n));
70   PetscFunctionReturn(0);
71 }
72 
73 PetscErrorCode MatSolve_SeqBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx)
74 {
75   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
76   const PetscInt    n  = a->mbs,*vi,*ai=a->i,*aj=a->j,*adiag=a->diag;
77   PetscInt          i,k,nz,idx,idt,jdx;
78   const MatScalar   *aa=a->a,*v;
79   PetscScalar       *x,s1,s2,s3,s4,s5,x1,x2,x3,x4,x5;
80   const PetscScalar *b;
81 
82   PetscFunctionBegin;
83   PetscCall(VecGetArrayRead(bb,&b));
84   PetscCall(VecGetArray(xx,&x));
85   /* forward solve the lower triangular */
86   idx  = 0;
87   x[0] = b[idx]; x[1] = b[1+idx]; x[2] = b[2+idx]; x[3] = b[3+idx];x[4] = b[4+idx];
88   for (i=1; i<n; i++) {
89     v   = aa + 25*ai[i];
90     vi  = aj + ai[i];
91     nz  = ai[i+1] - ai[i];
92     idx = 5*i;
93     s1  = b[idx];s2 = b[1+idx];s3 = b[2+idx];s4 = b[3+idx];s5 = b[4+idx];
94     for (k=0; k<nz; k++) {
95       jdx = 5*vi[k];
96       x1  = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];x4 = x[3+jdx];x5 = x[4+jdx];
97       s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
98       s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
99       s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
100       s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
101       s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
102       v  += 25;
103     }
104     x[idx]   = s1;
105     x[1+idx] = s2;
106     x[2+idx] = s3;
107     x[3+idx] = s4;
108     x[4+idx] = s5;
109   }
110 
111   /* backward solve the upper triangular */
112   for (i=n-1; i>=0; i--) {
113     v   = aa + 25*(adiag[i+1]+1);
114     vi  = aj + adiag[i+1]+1;
115     nz  = adiag[i] - adiag[i+1]-1;
116     idt = 5*i;
117     s1  = x[idt];  s2 = x[1+idt];
118     s3  = x[2+idt];s4 = x[3+idt]; s5 = x[4+idt];
119     for (k=0; k<nz; k++) {
120       idx = 5*vi[k];
121       x1  = x[idx];   x2 = x[1+idx];x3    = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
122       s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
123       s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
124       s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
125       s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
126       s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
127       v  += 25;
128     }
129     /* x = inv_diagonal*x */
130     x[idt]   = v[0]*s1 + v[5]*s2 + v[10]*s3  + v[15]*s4 + v[20]*s5;
131     x[1+idt] = v[1]*s1 + v[6]*s2 + v[11]*s3  + v[16]*s4 + v[21]*s5;
132     x[2+idt] = v[2]*s1 + v[7]*s2 + v[12]*s3  + v[17]*s4 + v[22]*s5;
133     x[3+idt] = v[3]*s1 + v[8]*s2 + v[13]*s3  + v[18]*s4 + v[23]*s5;
134     x[4+idt] = v[4]*s1 + v[9]*s2 + v[14]*s3  + v[19]*s4 + v[24]*s5;
135   }
136 
137   PetscCall(VecRestoreArrayRead(bb,&b));
138   PetscCall(VecRestoreArray(xx,&x));
139   PetscCall(PetscLogFlops(2.0*25*(a->nz) - 5.0*A->cmap->n));
140   PetscFunctionReturn(0);
141 }
142