1 #include <../src/mat/impls/baij/seq/baij.h> 2 #include <petsc/private/kernels/blockinvert.h> 3 4 /* 5 Special case where the matrix was ILU(0) factored in the natural 6 ordering. This eliminates the need for the column and row permutation. 7 */ 8 PetscErrorCode MatSolve_SeqBAIJ_1_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx) { 9 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 10 const PetscInt n = a->mbs, *vi, *ai = a->i, *aj = a->j, *diag = a->diag; 11 const MatScalar *aa = a->a, *v; 12 PetscScalar *x; 13 const PetscScalar *b; 14 PetscScalar s1, x1; 15 PetscInt jdx, idt, idx, nz, i; 16 17 PetscFunctionBegin; 18 PetscCall(VecGetArrayRead(bb, &b)); 19 PetscCall(VecGetArray(xx, &x)); 20 21 /* forward solve the lower triangular */ 22 idx = 0; 23 x[0] = b[0]; 24 for (i = 1; i < n; i++) { 25 v = aa + ai[i]; 26 vi = aj + ai[i]; 27 nz = diag[i] - ai[i]; 28 idx += 1; 29 s1 = b[idx]; 30 while (nz--) { 31 jdx = *vi++; 32 x1 = x[jdx]; 33 s1 -= v[0] * x1; 34 v += 1; 35 } 36 x[idx] = s1; 37 } 38 /* backward solve the upper triangular */ 39 for (i = n - 1; i >= 0; i--) { 40 v = aa + diag[i] + 1; 41 vi = aj + diag[i] + 1; 42 nz = ai[i + 1] - diag[i] - 1; 43 idt = i; 44 s1 = x[idt]; 45 while (nz--) { 46 idx = *vi++; 47 x1 = x[idx]; 48 s1 -= v[0] * x1; 49 v += 1; 50 } 51 v = aa + diag[i]; 52 x[idt] = v[0] * s1; 53 } 54 PetscCall(VecRestoreArrayRead(bb, &b)); 55 PetscCall(VecRestoreArray(xx, &x)); 56 PetscCall(PetscLogFlops(2.0 * (a->nz) - A->cmap->n)); 57 PetscFunctionReturn(0); 58 } 59 60 PetscErrorCode MatForwardSolve_SeqBAIJ_1_NaturalOrdering(Mat A, Vec bb, Vec xx) { 61 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 62 const PetscInt n = a->mbs, *ai = a->i, *aj = a->j, *vi; 63 PetscScalar *x, sum; 64 const PetscScalar *b; 65 const MatScalar *aa = a->a, *v; 66 PetscInt i, nz; 67 68 PetscFunctionBegin; 69 if (!n) PetscFunctionReturn(0); 70 71 PetscCall(VecGetArrayRead(bb, &b)); 72 PetscCall(VecGetArray(xx, &x)); 73 74 /* forward solve the lower triangular */ 75 x[0] = b[0]; 76 v = aa; 77 vi = aj; 78 for (i = 1; i < n; i++) { 79 nz = ai[i + 1] - ai[i]; 80 sum = b[i]; 81 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 82 v += nz; 83 vi += nz; 84 x[i] = sum; 85 } 86 PetscCall(PetscLogFlops(a->nz - A->cmap->n)); 87 PetscCall(VecRestoreArrayRead(bb, &b)); 88 PetscCall(VecRestoreArray(xx, &x)); 89 PetscFunctionReturn(0); 90 } 91 92 PetscErrorCode MatBackwardSolve_SeqBAIJ_1_NaturalOrdering(Mat A, Vec bb, Vec xx) { 93 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 94 const PetscInt n = a->mbs, *aj = a->j, *adiag = a->diag, *vi; 95 PetscScalar *x, sum; 96 const PetscScalar *b; 97 const MatScalar *aa = a->a, *v; 98 PetscInt i, nz; 99 100 PetscFunctionBegin; 101 if (!n) PetscFunctionReturn(0); 102 103 PetscCall(VecGetArrayRead(bb, &b)); 104 PetscCall(VecGetArray(xx, &x)); 105 106 /* backward solve the upper triangular */ 107 for (i = n - 1; i >= 0; i--) { 108 v = aa + adiag[i + 1] + 1; 109 vi = aj + adiag[i + 1] + 1; 110 nz = adiag[i] - adiag[i + 1] - 1; 111 sum = b[i]; 112 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 113 x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */ 114 } 115 116 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 117 PetscCall(VecRestoreArrayRead(bb, &b)); 118 PetscCall(VecRestoreArray(xx, &x)); 119 PetscFunctionReturn(0); 120 } 121 122 PetscErrorCode MatSolve_SeqBAIJ_1_NaturalOrdering(Mat A, Vec bb, Vec xx) { 123 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 124 const PetscInt n = a->mbs, *ai = a->i, *aj = a->j, *adiag = a->diag, *vi; 125 PetscScalar *x, sum; 126 const PetscScalar *b; 127 const MatScalar *aa = a->a, *v; 128 PetscInt i, nz; 129 130 PetscFunctionBegin; 131 if (!n) PetscFunctionReturn(0); 132 133 PetscCall(VecGetArrayRead(bb, &b)); 134 PetscCall(VecGetArray(xx, &x)); 135 136 /* forward solve the lower triangular */ 137 x[0] = b[0]; 138 v = aa; 139 vi = aj; 140 for (i = 1; i < n; i++) { 141 nz = ai[i + 1] - ai[i]; 142 sum = b[i]; 143 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 144 v += nz; 145 vi += nz; 146 x[i] = sum; 147 } 148 149 /* backward solve the upper triangular */ 150 for (i = n - 1; i >= 0; i--) { 151 v = aa + adiag[i + 1] + 1; 152 vi = aj + adiag[i + 1] + 1; 153 nz = adiag[i] - adiag[i + 1] - 1; 154 sum = x[i]; 155 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 156 x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */ 157 } 158 159 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 160 PetscCall(VecRestoreArrayRead(bb, &b)); 161 PetscCall(VecRestoreArray(xx, &x)); 162 PetscFunctionReturn(0); 163 } 164