xref: /petsc/src/mat/impls/baij/seq/baijsolvnat2.c (revision 9371c9d470a9602b6d10a8bf50c9b2280a79e45a)
12c733ed4SBarry Smith #include <../src/mat/impls/baij/seq/baij.h>
22c733ed4SBarry Smith #include <petsc/private/kernels/blockinvert.h>
32c733ed4SBarry Smith 
42c733ed4SBarry Smith /*
52c733ed4SBarry Smith       Special case where the matrix was ILU(0) factored in the natural
62c733ed4SBarry Smith    ordering. This eliminates the need for the column and row permutation.
72c733ed4SBarry Smith */
8*9371c9d4SSatish Balay PetscErrorCode MatSolve_SeqBAIJ_2_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx) {
92c733ed4SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
102c733ed4SBarry Smith   const PetscInt     n = a->mbs, *vi, *ai = a->i, *aj = a->j, *diag = a->diag;
112c733ed4SBarry Smith   const MatScalar   *aa = a->a, *v;
122c733ed4SBarry Smith   PetscScalar       *x, s1, s2, x1, x2;
132c733ed4SBarry Smith   const PetscScalar *b;
142c733ed4SBarry Smith   PetscInt           jdx, idt, idx, nz, i;
152c733ed4SBarry Smith 
162c733ed4SBarry Smith   PetscFunctionBegin;
179566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
189566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
192c733ed4SBarry Smith 
202c733ed4SBarry Smith   /* forward solve the lower triangular */
212c733ed4SBarry Smith   idx  = 0;
22*9371c9d4SSatish Balay   x[0] = b[0];
23*9371c9d4SSatish Balay   x[1] = b[1];
242c733ed4SBarry Smith   for (i = 1; i < n; i++) {
252c733ed4SBarry Smith     v  = aa + 4 * ai[i];
262c733ed4SBarry Smith     vi = aj + ai[i];
272c733ed4SBarry Smith     nz = diag[i] - ai[i];
282c733ed4SBarry Smith     idx += 2;
29*9371c9d4SSatish Balay     s1 = b[idx];
30*9371c9d4SSatish Balay     s2 = b[1 + idx];
312c733ed4SBarry Smith     while (nz--) {
322c733ed4SBarry Smith       jdx = 2 * (*vi++);
33*9371c9d4SSatish Balay       x1  = x[jdx];
34*9371c9d4SSatish Balay       x2  = x[1 + jdx];
352c733ed4SBarry Smith       s1 -= v[0] * x1 + v[2] * x2;
362c733ed4SBarry Smith       s2 -= v[1] * x1 + v[3] * x2;
372c733ed4SBarry Smith       v += 4;
382c733ed4SBarry Smith     }
392c733ed4SBarry Smith     x[idx]     = s1;
402c733ed4SBarry Smith     x[1 + idx] = s2;
412c733ed4SBarry Smith   }
422c733ed4SBarry Smith   /* backward solve the upper triangular */
432c733ed4SBarry Smith   for (i = n - 1; i >= 0; i--) {
442c733ed4SBarry Smith     v   = aa + 4 * diag[i] + 4;
452c733ed4SBarry Smith     vi  = aj + diag[i] + 1;
462c733ed4SBarry Smith     nz  = ai[i + 1] - diag[i] - 1;
472c733ed4SBarry Smith     idt = 2 * i;
48*9371c9d4SSatish Balay     s1  = x[idt];
49*9371c9d4SSatish Balay     s2  = x[1 + idt];
502c733ed4SBarry Smith     while (nz--) {
512c733ed4SBarry Smith       idx = 2 * (*vi++);
52*9371c9d4SSatish Balay       x1  = x[idx];
53*9371c9d4SSatish Balay       x2  = x[1 + idx];
542c733ed4SBarry Smith       s1 -= v[0] * x1 + v[2] * x2;
552c733ed4SBarry Smith       s2 -= v[1] * x1 + v[3] * x2;
562c733ed4SBarry Smith       v += 4;
572c733ed4SBarry Smith     }
582c733ed4SBarry Smith     v          = aa + 4 * diag[i];
592c733ed4SBarry Smith     x[idt]     = v[0] * s1 + v[2] * s2;
602c733ed4SBarry Smith     x[1 + idt] = v[1] * s1 + v[3] * s2;
612c733ed4SBarry Smith   }
622c733ed4SBarry Smith 
639566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
649566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
659566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * 4 * (a->nz) - 2.0 * A->cmap->n));
662c733ed4SBarry Smith   PetscFunctionReturn(0);
672c733ed4SBarry Smith }
682c733ed4SBarry Smith 
69*9371c9d4SSatish Balay PetscErrorCode MatSolve_SeqBAIJ_2_NaturalOrdering(Mat A, Vec bb, Vec xx) {
702c733ed4SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
712c733ed4SBarry Smith   const PetscInt     n = a->mbs, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
722c733ed4SBarry Smith   PetscInt           i, k, nz, idx, idt, jdx;
732c733ed4SBarry Smith   const MatScalar   *aa = a->a, *v;
742c733ed4SBarry Smith   PetscScalar       *x, s1, s2, x1, x2;
752c733ed4SBarry Smith   const PetscScalar *b;
762c733ed4SBarry Smith 
772c733ed4SBarry Smith   PetscFunctionBegin;
789566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
799566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
802c733ed4SBarry Smith   /* forward solve the lower triangular */
812c733ed4SBarry Smith   idx  = 0;
82*9371c9d4SSatish Balay   x[0] = b[idx];
83*9371c9d4SSatish Balay   x[1] = b[1 + idx];
842c733ed4SBarry Smith   for (i = 1; i < n; i++) {
852c733ed4SBarry Smith     v   = aa + 4 * ai[i];
862c733ed4SBarry Smith     vi  = aj + ai[i];
872c733ed4SBarry Smith     nz  = ai[i + 1] - ai[i];
882c733ed4SBarry Smith     idx = 2 * i;
89*9371c9d4SSatish Balay     s1  = b[idx];
90*9371c9d4SSatish Balay     s2  = b[1 + idx];
912c733ed4SBarry Smith     PetscPrefetchBlock(vi + nz, nz, 0, PETSC_PREFETCH_HINT_NTA);
922c733ed4SBarry Smith     PetscPrefetchBlock(v + 4 * nz, 4 * nz, 0, PETSC_PREFETCH_HINT_NTA);
932c733ed4SBarry Smith     for (k = 0; k < nz; k++) {
942c733ed4SBarry Smith       jdx = 2 * vi[k];
95*9371c9d4SSatish Balay       x1  = x[jdx];
96*9371c9d4SSatish Balay       x2  = x[1 + jdx];
972c733ed4SBarry Smith       s1 -= v[0] * x1 + v[2] * x2;
982c733ed4SBarry Smith       s2 -= v[1] * x1 + v[3] * x2;
992c733ed4SBarry Smith       v += 4;
1002c733ed4SBarry Smith     }
1012c733ed4SBarry Smith     x[idx]     = s1;
1022c733ed4SBarry Smith     x[1 + idx] = s2;
1032c733ed4SBarry Smith   }
1042c733ed4SBarry Smith 
1052c733ed4SBarry Smith   /* backward solve the upper triangular */
1062c733ed4SBarry Smith   for (i = n - 1; i >= 0; i--) {
1072c733ed4SBarry Smith     v   = aa + 4 * (adiag[i + 1] + 1);
1082c733ed4SBarry Smith     vi  = aj + adiag[i + 1] + 1;
1092c733ed4SBarry Smith     nz  = adiag[i] - adiag[i + 1] - 1;
1102c733ed4SBarry Smith     idt = 2 * i;
111*9371c9d4SSatish Balay     s1  = x[idt];
112*9371c9d4SSatish Balay     s2  = x[1 + idt];
1132c733ed4SBarry Smith     PetscPrefetchBlock(vi + nz, nz, 0, PETSC_PREFETCH_HINT_NTA);
1142c733ed4SBarry Smith     PetscPrefetchBlock(v + 4 * nz, 4 * nz, 0, PETSC_PREFETCH_HINT_NTA);
1152c733ed4SBarry Smith     for (k = 0; k < nz; k++) {
1162c733ed4SBarry Smith       idx = 2 * vi[k];
117*9371c9d4SSatish Balay       x1  = x[idx];
118*9371c9d4SSatish Balay       x2  = x[1 + idx];
1192c733ed4SBarry Smith       s1 -= v[0] * x1 + v[2] * x2;
1202c733ed4SBarry Smith       s2 -= v[1] * x1 + v[3] * x2;
1212c733ed4SBarry Smith       v += 4;
1222c733ed4SBarry Smith     }
1232c733ed4SBarry Smith     /* x = inv_diagonal*x */
1242c733ed4SBarry Smith     x[idt]     = v[0] * s1 + v[2] * s2;
1252c733ed4SBarry Smith     x[1 + idt] = v[1] * s1 + v[3] * s2;
1262c733ed4SBarry Smith   }
1272c733ed4SBarry Smith 
1289566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
1299566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
1309566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * 4 * (a->nz) - 2.0 * A->cmap->n));
1312c733ed4SBarry Smith   PetscFunctionReturn(0);
1322c733ed4SBarry Smith }
1332c733ed4SBarry Smith 
134*9371c9d4SSatish Balay PetscErrorCode MatForwardSolve_SeqBAIJ_2_NaturalOrdering(Mat A, Vec bb, Vec xx) {
1352c733ed4SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
1362c733ed4SBarry Smith   const PetscInt     n = a->mbs, *vi, *ai = a->i, *aj = a->j;
1372c733ed4SBarry Smith   PetscInt           i, k, nz, idx, jdx;
1382c733ed4SBarry Smith   const MatScalar   *aa = a->a, *v;
1392c733ed4SBarry Smith   PetscScalar       *x, s1, s2, x1, x2;
1402c733ed4SBarry Smith   const PetscScalar *b;
1412c733ed4SBarry Smith 
1422c733ed4SBarry Smith   PetscFunctionBegin;
1439566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
1449566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
1452c733ed4SBarry Smith   /* forward solve the lower triangular */
1462c733ed4SBarry Smith   idx  = 0;
147*9371c9d4SSatish Balay   x[0] = b[idx];
148*9371c9d4SSatish Balay   x[1] = b[1 + idx];
1492c733ed4SBarry Smith   for (i = 1; i < n; i++) {
1502c733ed4SBarry Smith     v   = aa + 4 * ai[i];
1512c733ed4SBarry Smith     vi  = aj + ai[i];
1522c733ed4SBarry Smith     nz  = ai[i + 1] - ai[i];
1532c733ed4SBarry Smith     idx = 2 * i;
154*9371c9d4SSatish Balay     s1  = b[idx];
155*9371c9d4SSatish Balay     s2  = b[1 + idx];
1562c733ed4SBarry Smith     PetscPrefetchBlock(vi + nz, nz, 0, PETSC_PREFETCH_HINT_NTA);
1572c733ed4SBarry Smith     PetscPrefetchBlock(v + 4 * nz, 4 * nz, 0, PETSC_PREFETCH_HINT_NTA);
1582c733ed4SBarry Smith     for (k = 0; k < nz; k++) {
1592c733ed4SBarry Smith       jdx = 2 * vi[k];
160*9371c9d4SSatish Balay       x1  = x[jdx];
161*9371c9d4SSatish Balay       x2  = x[1 + jdx];
1622c733ed4SBarry Smith       s1 -= v[0] * x1 + v[2] * x2;
1632c733ed4SBarry Smith       s2 -= v[1] * x1 + v[3] * x2;
1642c733ed4SBarry Smith       v += 4;
1652c733ed4SBarry Smith     }
1662c733ed4SBarry Smith     x[idx]     = s1;
1672c733ed4SBarry Smith     x[1 + idx] = s2;
1682c733ed4SBarry Smith   }
1692c733ed4SBarry Smith 
1709566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
1719566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
1729566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(4.0 * (a->nz) - A->cmap->n));
1732c733ed4SBarry Smith   PetscFunctionReturn(0);
1742c733ed4SBarry Smith }
1752c733ed4SBarry Smith 
176*9371c9d4SSatish Balay PetscErrorCode MatBackwardSolve_SeqBAIJ_2_NaturalOrdering(Mat A, Vec bb, Vec xx) {
1772c733ed4SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
1782c733ed4SBarry Smith   const PetscInt     n = a->mbs, *vi, *aj = a->j, *adiag = a->diag;
1792c733ed4SBarry Smith   PetscInt           i, k, nz, idx, idt;
1802c733ed4SBarry Smith   const MatScalar   *aa = a->a, *v;
1812c733ed4SBarry Smith   PetscScalar       *x, s1, s2, x1, x2;
1822c733ed4SBarry Smith   const PetscScalar *b;
1832c733ed4SBarry Smith 
1842c733ed4SBarry Smith   PetscFunctionBegin;
1859566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
1869566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
1872c733ed4SBarry Smith 
1882c733ed4SBarry Smith   /* backward solve the upper triangular */
1892c733ed4SBarry Smith   for (i = n - 1; i >= 0; i--) {
1902c733ed4SBarry Smith     v   = aa + 4 * (adiag[i + 1] + 1);
1912c733ed4SBarry Smith     vi  = aj + adiag[i + 1] + 1;
1922c733ed4SBarry Smith     nz  = adiag[i] - adiag[i + 1] - 1;
1932c733ed4SBarry Smith     idt = 2 * i;
194*9371c9d4SSatish Balay     s1  = b[idt];
195*9371c9d4SSatish Balay     s2  = b[1 + idt];
1962c733ed4SBarry Smith     PetscPrefetchBlock(vi + nz, nz, 0, PETSC_PREFETCH_HINT_NTA);
1972c733ed4SBarry Smith     PetscPrefetchBlock(v + 4 * nz, 4 * nz, 0, PETSC_PREFETCH_HINT_NTA);
1982c733ed4SBarry Smith     for (k = 0; k < nz; k++) {
1992c733ed4SBarry Smith       idx = 2 * vi[k];
200*9371c9d4SSatish Balay       x1  = x[idx];
201*9371c9d4SSatish Balay       x2  = x[1 + idx];
2022c733ed4SBarry Smith       s1 -= v[0] * x1 + v[2] * x2;
2032c733ed4SBarry Smith       s2 -= v[1] * x1 + v[3] * x2;
2042c733ed4SBarry Smith       v += 4;
2052c733ed4SBarry Smith     }
2062c733ed4SBarry Smith     /* x = inv_diagonal*x */
2072c733ed4SBarry Smith     x[idt]     = v[0] * s1 + v[2] * s2;
2082c733ed4SBarry Smith     x[1 + idt] = v[1] * s1 + v[3] * s2;
2092c733ed4SBarry Smith   }
2102c733ed4SBarry Smith 
2119566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
2129566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
2139566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(4.0 * a->nz - A->cmap->n));
2142c733ed4SBarry Smith   PetscFunctionReturn(0);
2152c733ed4SBarry Smith }
216