1 #include <../src/mat/impls/baij/seq/baij.h> 2 #include <petsc/private/kernels/blockinvert.h> 3 4 /* 5 Special case where the matrix was ILU(0) factored in the natural 6 ordering. This eliminates the need for the column and row permutation. 7 */ 8 PetscErrorCode MatSolve_SeqBAIJ_2_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx) 9 { 10 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 11 const PetscInt n = a->mbs, *vi, *ai = a->i, *aj = a->j, *diag = a->diag; 12 const MatScalar *aa = a->a, *v; 13 PetscScalar *x, s1, s2, x1, x2; 14 const PetscScalar *b; 15 PetscInt jdx, idt, idx, nz, i; 16 17 PetscFunctionBegin; 18 PetscCall(VecGetArrayRead(bb, &b)); 19 PetscCall(VecGetArray(xx, &x)); 20 21 /* forward solve the lower triangular */ 22 idx = 0; 23 x[0] = b[0]; 24 x[1] = b[1]; 25 for (i = 1; i < n; i++) { 26 v = aa + 4 * ai[i]; 27 vi = aj + ai[i]; 28 nz = diag[i] - ai[i]; 29 idx += 2; 30 s1 = b[idx]; 31 s2 = b[1 + idx]; 32 while (nz--) { 33 jdx = 2 * (*vi++); 34 x1 = x[jdx]; 35 x2 = x[1 + jdx]; 36 s1 -= v[0] * x1 + v[2] * x2; 37 s2 -= v[1] * x1 + v[3] * x2; 38 v += 4; 39 } 40 x[idx] = s1; 41 x[1 + idx] = s2; 42 } 43 /* backward solve the upper triangular */ 44 for (i = n - 1; i >= 0; i--) { 45 v = aa + 4 * diag[i] + 4; 46 vi = aj + diag[i] + 1; 47 nz = ai[i + 1] - diag[i] - 1; 48 idt = 2 * i; 49 s1 = x[idt]; 50 s2 = x[1 + idt]; 51 while (nz--) { 52 idx = 2 * (*vi++); 53 x1 = x[idx]; 54 x2 = x[1 + idx]; 55 s1 -= v[0] * x1 + v[2] * x2; 56 s2 -= v[1] * x1 + v[3] * x2; 57 v += 4; 58 } 59 v = aa + 4 * diag[i]; 60 x[idt] = v[0] * s1 + v[2] * s2; 61 x[1 + idt] = v[1] * s1 + v[3] * s2; 62 } 63 64 PetscCall(VecRestoreArrayRead(bb, &b)); 65 PetscCall(VecRestoreArray(xx, &x)); 66 PetscCall(PetscLogFlops(2.0 * 4 * (a->nz) - 2.0 * A->cmap->n)); 67 PetscFunctionReturn(0); 68 } 69 70 PetscErrorCode MatSolve_SeqBAIJ_2_NaturalOrdering(Mat A, Vec bb, Vec xx) 71 { 72 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 73 const PetscInt n = a->mbs, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag; 74 PetscInt i, k, nz, idx, idt, jdx; 75 const MatScalar *aa = a->a, *v; 76 PetscScalar *x, s1, s2, x1, x2; 77 const PetscScalar *b; 78 79 PetscFunctionBegin; 80 PetscCall(VecGetArrayRead(bb, &b)); 81 PetscCall(VecGetArray(xx, &x)); 82 /* forward solve the lower triangular */ 83 idx = 0; 84 x[0] = b[idx]; 85 x[1] = b[1 + idx]; 86 for (i = 1; i < n; i++) { 87 v = aa + 4 * ai[i]; 88 vi = aj + ai[i]; 89 nz = ai[i + 1] - ai[i]; 90 idx = 2 * i; 91 s1 = b[idx]; 92 s2 = b[1 + idx]; 93 PetscPrefetchBlock(vi + nz, nz, 0, PETSC_PREFETCH_HINT_NTA); 94 PetscPrefetchBlock(v + 4 * nz, 4 * nz, 0, PETSC_PREFETCH_HINT_NTA); 95 for (k = 0; k < nz; k++) { 96 jdx = 2 * vi[k]; 97 x1 = x[jdx]; 98 x2 = x[1 + jdx]; 99 s1 -= v[0] * x1 + v[2] * x2; 100 s2 -= v[1] * x1 + v[3] * x2; 101 v += 4; 102 } 103 x[idx] = s1; 104 x[1 + idx] = s2; 105 } 106 107 /* backward solve the upper triangular */ 108 for (i = n - 1; i >= 0; i--) { 109 v = aa + 4 * (adiag[i + 1] + 1); 110 vi = aj + adiag[i + 1] + 1; 111 nz = adiag[i] - adiag[i + 1] - 1; 112 idt = 2 * i; 113 s1 = x[idt]; 114 s2 = x[1 + idt]; 115 PetscPrefetchBlock(vi + nz, nz, 0, PETSC_PREFETCH_HINT_NTA); 116 PetscPrefetchBlock(v + 4 * nz, 4 * nz, 0, PETSC_PREFETCH_HINT_NTA); 117 for (k = 0; k < nz; k++) { 118 idx = 2 * vi[k]; 119 x1 = x[idx]; 120 x2 = x[1 + idx]; 121 s1 -= v[0] * x1 + v[2] * x2; 122 s2 -= v[1] * x1 + v[3] * x2; 123 v += 4; 124 } 125 /* x = inv_diagonal*x */ 126 x[idt] = v[0] * s1 + v[2] * s2; 127 x[1 + idt] = v[1] * s1 + v[3] * s2; 128 } 129 130 PetscCall(VecRestoreArrayRead(bb, &b)); 131 PetscCall(VecRestoreArray(xx, &x)); 132 PetscCall(PetscLogFlops(2.0 * 4 * (a->nz) - 2.0 * A->cmap->n)); 133 PetscFunctionReturn(0); 134 } 135 136 PetscErrorCode MatForwardSolve_SeqBAIJ_2_NaturalOrdering(Mat A, Vec bb, Vec xx) 137 { 138 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 139 const PetscInt n = a->mbs, *vi, *ai = a->i, *aj = a->j; 140 PetscInt i, k, nz, idx, jdx; 141 const MatScalar *aa = a->a, *v; 142 PetscScalar *x, s1, s2, x1, x2; 143 const PetscScalar *b; 144 145 PetscFunctionBegin; 146 PetscCall(VecGetArrayRead(bb, &b)); 147 PetscCall(VecGetArray(xx, &x)); 148 /* forward solve the lower triangular */ 149 idx = 0; 150 x[0] = b[idx]; 151 x[1] = b[1 + idx]; 152 for (i = 1; i < n; i++) { 153 v = aa + 4 * ai[i]; 154 vi = aj + ai[i]; 155 nz = ai[i + 1] - ai[i]; 156 idx = 2 * i; 157 s1 = b[idx]; 158 s2 = b[1 + idx]; 159 PetscPrefetchBlock(vi + nz, nz, 0, PETSC_PREFETCH_HINT_NTA); 160 PetscPrefetchBlock(v + 4 * nz, 4 * nz, 0, PETSC_PREFETCH_HINT_NTA); 161 for (k = 0; k < nz; k++) { 162 jdx = 2 * vi[k]; 163 x1 = x[jdx]; 164 x2 = x[1 + jdx]; 165 s1 -= v[0] * x1 + v[2] * x2; 166 s2 -= v[1] * x1 + v[3] * x2; 167 v += 4; 168 } 169 x[idx] = s1; 170 x[1 + idx] = s2; 171 } 172 173 PetscCall(VecRestoreArrayRead(bb, &b)); 174 PetscCall(VecRestoreArray(xx, &x)); 175 PetscCall(PetscLogFlops(4.0 * (a->nz) - A->cmap->n)); 176 PetscFunctionReturn(0); 177 } 178 179 PetscErrorCode MatBackwardSolve_SeqBAIJ_2_NaturalOrdering(Mat A, Vec bb, Vec xx) 180 { 181 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 182 const PetscInt n = a->mbs, *vi, *aj = a->j, *adiag = a->diag; 183 PetscInt i, k, nz, idx, idt; 184 const MatScalar *aa = a->a, *v; 185 PetscScalar *x, s1, s2, x1, x2; 186 const PetscScalar *b; 187 188 PetscFunctionBegin; 189 PetscCall(VecGetArrayRead(bb, &b)); 190 PetscCall(VecGetArray(xx, &x)); 191 192 /* backward solve the upper triangular */ 193 for (i = n - 1; i >= 0; i--) { 194 v = aa + 4 * (adiag[i + 1] + 1); 195 vi = aj + adiag[i + 1] + 1; 196 nz = adiag[i] - adiag[i + 1] - 1; 197 idt = 2 * i; 198 s1 = b[idt]; 199 s2 = b[1 + idt]; 200 PetscPrefetchBlock(vi + nz, nz, 0, PETSC_PREFETCH_HINT_NTA); 201 PetscPrefetchBlock(v + 4 * nz, 4 * nz, 0, PETSC_PREFETCH_HINT_NTA); 202 for (k = 0; k < nz; k++) { 203 idx = 2 * vi[k]; 204 x1 = x[idx]; 205 x2 = x[1 + idx]; 206 s1 -= v[0] * x1 + v[2] * x2; 207 s2 -= v[1] * x1 + v[3] * x2; 208 v += 4; 209 } 210 /* x = inv_diagonal*x */ 211 x[idt] = v[0] * s1 + v[2] * s2; 212 x[1 + idt] = v[1] * s1 + v[3] * s2; 213 } 214 215 PetscCall(VecRestoreArrayRead(bb, &b)); 216 PetscCall(VecRestoreArray(xx, &x)); 217 PetscCall(PetscLogFlops(4.0 * a->nz - A->cmap->n)); 218 PetscFunctionReturn(0); 219 } 220