1 #include <../src/mat/impls/baij/seq/baij.h> 2 3 PetscErrorCode MatSolveTranspose_SeqBAIJ_2_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx) { 4 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 5 PetscInt i, nz, idx, idt, oidx; 6 const PetscInt *diag = a->diag, *vi, n = a->mbs, *ai = a->i, *aj = a->j; 7 const MatScalar *aa = a->a, *v; 8 PetscScalar s1, s2, x1, x2, *x; 9 10 PetscFunctionBegin; 11 PetscCall(VecCopy(bb, xx)); 12 PetscCall(VecGetArray(xx, &x)); 13 14 /* forward solve the U^T */ 15 idx = 0; 16 for (i = 0; i < n; i++) { 17 v = aa + 4 * diag[i]; 18 /* multiply by the inverse of the block diagonal */ 19 x1 = x[idx]; 20 x2 = x[1 + idx]; 21 s1 = v[0] * x1 + v[1] * x2; 22 s2 = v[2] * x1 + v[3] * x2; 23 v += 4; 24 25 vi = aj + diag[i] + 1; 26 nz = ai[i + 1] - diag[i] - 1; 27 while (nz--) { 28 oidx = 2 * (*vi++); 29 x[oidx] -= v[0] * s1 + v[1] * s2; 30 x[oidx + 1] -= v[2] * s1 + v[3] * s2; 31 v += 4; 32 } 33 x[idx] = s1; 34 x[1 + idx] = s2; 35 idx += 2; 36 } 37 /* backward solve the L^T */ 38 for (i = n - 1; i >= 0; i--) { 39 v = aa + 4 * diag[i] - 4; 40 vi = aj + diag[i] - 1; 41 nz = diag[i] - ai[i]; 42 idt = 2 * i; 43 s1 = x[idt]; 44 s2 = x[1 + idt]; 45 while (nz--) { 46 idx = 2 * (*vi--); 47 x[idx] -= v[0] * s1 + v[1] * s2; 48 x[idx + 1] -= v[2] * s1 + v[3] * s2; 49 v -= 4; 50 } 51 } 52 PetscCall(VecRestoreArray(xx, &x)); 53 PetscCall(PetscLogFlops(2.0 * 4.0 * (a->nz) - 2.0 * A->cmap->n)); 54 PetscFunctionReturn(0); 55 } 56 57 PetscErrorCode MatSolveTranspose_SeqBAIJ_2_NaturalOrdering(Mat A, Vec bb, Vec xx) { 58 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 59 const PetscInt n = a->mbs, *vi, *ai = a->i, *aj = a->j, *diag = a->diag; 60 PetscInt nz, idx, idt, j, i, oidx; 61 const PetscInt bs = A->rmap->bs, bs2 = a->bs2; 62 const MatScalar *aa = a->a, *v; 63 PetscScalar s1, s2, x1, x2, *x; 64 65 PetscFunctionBegin; 66 PetscCall(VecCopy(bb, xx)); 67 PetscCall(VecGetArray(xx, &x)); 68 69 /* forward solve the U^T */ 70 idx = 0; 71 for (i = 0; i < n; i++) { 72 v = aa + bs2 * diag[i]; 73 /* multiply by the inverse of the block diagonal */ 74 x1 = x[idx]; 75 x2 = x[1 + idx]; 76 s1 = v[0] * x1 + v[1] * x2; 77 s2 = v[2] * x1 + v[3] * x2; 78 v -= bs2; 79 80 vi = aj + diag[i] - 1; 81 nz = diag[i] - diag[i + 1] - 1; 82 for (j = 0; j > -nz; j--) { 83 oidx = bs * vi[j]; 84 x[oidx] -= v[0] * s1 + v[1] * s2; 85 x[oidx + 1] -= v[2] * s1 + v[3] * s2; 86 v -= bs2; 87 } 88 x[idx] = s1; 89 x[1 + idx] = s2; 90 idx += bs; 91 } 92 /* backward solve the L^T */ 93 for (i = n - 1; i >= 0; i--) { 94 v = aa + bs2 * ai[i]; 95 vi = aj + ai[i]; 96 nz = ai[i + 1] - ai[i]; 97 idt = bs * i; 98 s1 = x[idt]; 99 s2 = x[1 + idt]; 100 for (j = 0; j < nz; j++) { 101 idx = bs * vi[j]; 102 x[idx] -= v[0] * s1 + v[1] * s2; 103 x[idx + 1] -= v[2] * s1 + v[3] * s2; 104 v += bs2; 105 } 106 } 107 PetscCall(VecRestoreArray(xx, &x)); 108 PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz) - bs * A->cmap->n)); 109 PetscFunctionReturn(0); 110 } 111