1 #include <../src/mat/impls/baij/seq/baij.h> 2 #include <petsc/private/kernels/blockinvert.h> 3 4 PetscErrorCode MatSolve_SeqBAIJ_5_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx) 5 { 6 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 7 const PetscInt *diag = a->diag, n = a->mbs, *vi, *ai = a->i, *aj = a->j; 8 PetscInt i, nz, idx, idt, jdx; 9 const MatScalar *aa = a->a, *v; 10 PetscScalar *x, s1, s2, s3, s4, s5, x1, x2, x3, x4, x5; 11 const PetscScalar *b; 12 13 PetscFunctionBegin; 14 PetscCall(VecGetArrayRead(bb, &b)); 15 PetscCall(VecGetArray(xx, &x)); 16 /* forward solve the lower triangular */ 17 idx = 0; 18 x[0] = b[idx]; 19 x[1] = b[1 + idx]; 20 x[2] = b[2 + idx]; 21 x[3] = b[3 + idx]; 22 x[4] = b[4 + idx]; 23 for (i = 1; i < n; i++) { 24 v = aa + 25 * ai[i]; 25 vi = aj + ai[i]; 26 nz = diag[i] - ai[i]; 27 idx = 5 * i; 28 s1 = b[idx]; 29 s2 = b[1 + idx]; 30 s3 = b[2 + idx]; 31 s4 = b[3 + idx]; 32 s5 = b[4 + idx]; 33 while (nz--) { 34 jdx = 5 * (*vi++); 35 x1 = x[jdx]; 36 x2 = x[1 + jdx]; 37 x3 = x[2 + jdx]; 38 x4 = x[3 + jdx]; 39 x5 = x[4 + jdx]; 40 s1 -= v[0] * x1 + v[5] * x2 + v[10] * x3 + v[15] * x4 + v[20] * x5; 41 s2 -= v[1] * x1 + v[6] * x2 + v[11] * x3 + v[16] * x4 + v[21] * x5; 42 s3 -= v[2] * x1 + v[7] * x2 + v[12] * x3 + v[17] * x4 + v[22] * x5; 43 s4 -= v[3] * x1 + v[8] * x2 + v[13] * x3 + v[18] * x4 + v[23] * x5; 44 s5 -= v[4] * x1 + v[9] * x2 + v[14] * x3 + v[19] * x4 + v[24] * x5; 45 v += 25; 46 } 47 x[idx] = s1; 48 x[1 + idx] = s2; 49 x[2 + idx] = s3; 50 x[3 + idx] = s4; 51 x[4 + idx] = s5; 52 } 53 /* backward solve the upper triangular */ 54 for (i = n - 1; i >= 0; i--) { 55 v = aa + 25 * diag[i] + 25; 56 vi = aj + diag[i] + 1; 57 nz = ai[i + 1] - diag[i] - 1; 58 idt = 5 * i; 59 s1 = x[idt]; 60 s2 = x[1 + idt]; 61 s3 = x[2 + idt]; 62 s4 = x[3 + idt]; 63 s5 = x[4 + idt]; 64 while (nz--) { 65 idx = 5 * (*vi++); 66 x1 = x[idx]; 67 x2 = x[1 + idx]; 68 x3 = x[2 + idx]; 69 x4 = x[3 + idx]; 70 x5 = x[4 + idx]; 71 s1 -= v[0] * x1 + v[5] * x2 + v[10] * x3 + v[15] * x4 + v[20] * x5; 72 s2 -= v[1] * x1 + v[6] * x2 + v[11] * x3 + v[16] * x4 + v[21] * x5; 73 s3 -= v[2] * x1 + v[7] * x2 + v[12] * x3 + v[17] * x4 + v[22] * x5; 74 s4 -= v[3] * x1 + v[8] * x2 + v[13] * x3 + v[18] * x4 + v[23] * x5; 75 s5 -= v[4] * x1 + v[9] * x2 + v[14] * x3 + v[19] * x4 + v[24] * x5; 76 v += 25; 77 } 78 v = aa + 25 * diag[i]; 79 x[idt] = v[0] * s1 + v[5] * s2 + v[10] * s3 + v[15] * s4 + v[20] * s5; 80 x[1 + idt] = v[1] * s1 + v[6] * s2 + v[11] * s3 + v[16] * s4 + v[21] * s5; 81 x[2 + idt] = v[2] * s1 + v[7] * s2 + v[12] * s3 + v[17] * s4 + v[22] * s5; 82 x[3 + idt] = v[3] * s1 + v[8] * s2 + v[13] * s3 + v[18] * s4 + v[23] * s5; 83 x[4 + idt] = v[4] * s1 + v[9] * s2 + v[14] * s3 + v[19] * s4 + v[24] * s5; 84 } 85 86 PetscCall(VecRestoreArrayRead(bb, &b)); 87 PetscCall(VecRestoreArray(xx, &x)); 88 PetscCall(PetscLogFlops(2.0 * 25 * (a->nz) - 5.0 * A->cmap->n)); 89 PetscFunctionReturn(PETSC_SUCCESS); 90 } 91 92 PetscErrorCode MatSolve_SeqBAIJ_5_NaturalOrdering(Mat A, Vec bb, Vec xx) 93 { 94 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 95 const PetscInt n = a->mbs, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag; 96 PetscInt i, k, nz, idx, idt, jdx; 97 const MatScalar *aa = a->a, *v; 98 PetscScalar *x, s1, s2, s3, s4, s5, x1, x2, x3, x4, x5; 99 const PetscScalar *b; 100 101 PetscFunctionBegin; 102 PetscCall(VecGetArrayRead(bb, &b)); 103 PetscCall(VecGetArray(xx, &x)); 104 /* forward solve the lower triangular */ 105 idx = 0; 106 x[0] = b[idx]; 107 x[1] = b[1 + idx]; 108 x[2] = b[2 + idx]; 109 x[3] = b[3 + idx]; 110 x[4] = b[4 + idx]; 111 for (i = 1; i < n; i++) { 112 v = aa + 25 * ai[i]; 113 vi = aj + ai[i]; 114 nz = ai[i + 1] - ai[i]; 115 idx = 5 * i; 116 s1 = b[idx]; 117 s2 = b[1 + idx]; 118 s3 = b[2 + idx]; 119 s4 = b[3 + idx]; 120 s5 = b[4 + idx]; 121 for (k = 0; k < nz; k++) { 122 jdx = 5 * vi[k]; 123 x1 = x[jdx]; 124 x2 = x[1 + jdx]; 125 x3 = x[2 + jdx]; 126 x4 = x[3 + jdx]; 127 x5 = x[4 + jdx]; 128 s1 -= v[0] * x1 + v[5] * x2 + v[10] * x3 + v[15] * x4 + v[20] * x5; 129 s2 -= v[1] * x1 + v[6] * x2 + v[11] * x3 + v[16] * x4 + v[21] * x5; 130 s3 -= v[2] * x1 + v[7] * x2 + v[12] * x3 + v[17] * x4 + v[22] * x5; 131 s4 -= v[3] * x1 + v[8] * x2 + v[13] * x3 + v[18] * x4 + v[23] * x5; 132 s5 -= v[4] * x1 + v[9] * x2 + v[14] * x3 + v[19] * x4 + v[24] * x5; 133 v += 25; 134 } 135 x[idx] = s1; 136 x[1 + idx] = s2; 137 x[2 + idx] = s3; 138 x[3 + idx] = s4; 139 x[4 + idx] = s5; 140 } 141 142 /* backward solve the upper triangular */ 143 for (i = n - 1; i >= 0; i--) { 144 v = aa + 25 * (adiag[i + 1] + 1); 145 vi = aj + adiag[i + 1] + 1; 146 nz = adiag[i] - adiag[i + 1] - 1; 147 idt = 5 * i; 148 s1 = x[idt]; 149 s2 = x[1 + idt]; 150 s3 = x[2 + idt]; 151 s4 = x[3 + idt]; 152 s5 = x[4 + idt]; 153 for (k = 0; k < nz; k++) { 154 idx = 5 * vi[k]; 155 x1 = x[idx]; 156 x2 = x[1 + idx]; 157 x3 = x[2 + idx]; 158 x4 = x[3 + idx]; 159 x5 = x[4 + idx]; 160 s1 -= v[0] * x1 + v[5] * x2 + v[10] * x3 + v[15] * x4 + v[20] * x5; 161 s2 -= v[1] * x1 + v[6] * x2 + v[11] * x3 + v[16] * x4 + v[21] * x5; 162 s3 -= v[2] * x1 + v[7] * x2 + v[12] * x3 + v[17] * x4 + v[22] * x5; 163 s4 -= v[3] * x1 + v[8] * x2 + v[13] * x3 + v[18] * x4 + v[23] * x5; 164 s5 -= v[4] * x1 + v[9] * x2 + v[14] * x3 + v[19] * x4 + v[24] * x5; 165 v += 25; 166 } 167 /* x = inv_diagonal*x */ 168 x[idt] = v[0] * s1 + v[5] * s2 + v[10] * s3 + v[15] * s4 + v[20] * s5; 169 x[1 + idt] = v[1] * s1 + v[6] * s2 + v[11] * s3 + v[16] * s4 + v[21] * s5; 170 x[2 + idt] = v[2] * s1 + v[7] * s2 + v[12] * s3 + v[17] * s4 + v[22] * s5; 171 x[3 + idt] = v[3] * s1 + v[8] * s2 + v[13] * s3 + v[18] * s4 + v[23] * s5; 172 x[4 + idt] = v[4] * s1 + v[9] * s2 + v[14] * s3 + v[19] * s4 + v[24] * s5; 173 } 174 175 PetscCall(VecRestoreArrayRead(bb, &b)); 176 PetscCall(VecRestoreArray(xx, &x)); 177 PetscCall(PetscLogFlops(2.0 * 25 * (a->nz) - 5.0 * A->cmap->n)); 178 PetscFunctionReturn(PETSC_SUCCESS); 179 } 180