1 #include <../src/mat/impls/baij/seq/baij.h> 2 #include <petsc/private/kernels/blockinvert.h> 3 4 /* Block operations are done by accessing one column at at time */ 5 /* Default MatSolve for block size 11 */ 6 7 PetscErrorCode MatSolve_SeqBAIJ_11_NaturalOrdering(Mat A, Vec bb, Vec xx) { 8 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 9 const PetscInt n = a->mbs, *ai = a->i, *aj = a->j, *adiag = a->diag, *vi, bs = A->rmap->bs, bs2 = a->bs2; 10 PetscInt i, k, nz, idx, idt, m; 11 const MatScalar *aa = a->a, *v; 12 PetscScalar s[11]; 13 PetscScalar *x, xv; 14 const PetscScalar *b; 15 16 PetscFunctionBegin; 17 PetscCall(VecGetArrayRead(bb, &b)); 18 PetscCall(VecGetArray(xx, &x)); 19 20 /* forward solve the lower triangular */ 21 for (i = 0; i < n; i++) { 22 v = aa + bs2 * ai[i]; 23 vi = aj + ai[i]; 24 nz = ai[i + 1] - ai[i]; 25 idt = bs * i; 26 x[idt] = b[idt]; 27 x[1 + idt] = b[1 + idt]; 28 x[2 + idt] = b[2 + idt]; 29 x[3 + idt] = b[3 + idt]; 30 x[4 + idt] = b[4 + idt]; 31 x[5 + idt] = b[5 + idt]; 32 x[6 + idt] = b[6 + idt]; 33 x[7 + idt] = b[7 + idt]; 34 x[8 + idt] = b[8 + idt]; 35 x[9 + idt] = b[9 + idt]; 36 x[10 + idt] = b[10 + idt]; 37 for (m = 0; m < nz; m++) { 38 idx = bs * vi[m]; 39 for (k = 0; k < 11; k++) { 40 xv = x[k + idx]; 41 x[idt] -= v[0] * xv; 42 x[1 + idt] -= v[1] * xv; 43 x[2 + idt] -= v[2] * xv; 44 x[3 + idt] -= v[3] * xv; 45 x[4 + idt] -= v[4] * xv; 46 x[5 + idt] -= v[5] * xv; 47 x[6 + idt] -= v[6] * xv; 48 x[7 + idt] -= v[7] * xv; 49 x[8 + idt] -= v[8] * xv; 50 x[9 + idt] -= v[9] * xv; 51 x[10 + idt] -= v[10] * xv; 52 v += 11; 53 } 54 } 55 } 56 /* backward solve the upper triangular */ 57 for (i = n - 1; i >= 0; i--) { 58 v = aa + bs2 * (adiag[i + 1] + 1); 59 vi = aj + adiag[i + 1] + 1; 60 nz = adiag[i] - adiag[i + 1] - 1; 61 idt = bs * i; 62 s[0] = x[idt]; 63 s[1] = x[1 + idt]; 64 s[2] = x[2 + idt]; 65 s[3] = x[3 + idt]; 66 s[4] = x[4 + idt]; 67 s[5] = x[5 + idt]; 68 s[6] = x[6 + idt]; 69 s[7] = x[7 + idt]; 70 s[8] = x[8 + idt]; 71 s[9] = x[9 + idt]; 72 s[10] = x[10 + idt]; 73 74 for (m = 0; m < nz; m++) { 75 idx = bs * vi[m]; 76 for (k = 0; k < 11; k++) { 77 xv = x[k + idx]; 78 s[0] -= v[0] * xv; 79 s[1] -= v[1] * xv; 80 s[2] -= v[2] * xv; 81 s[3] -= v[3] * xv; 82 s[4] -= v[4] * xv; 83 s[5] -= v[5] * xv; 84 s[6] -= v[6] * xv; 85 s[7] -= v[7] * xv; 86 s[8] -= v[8] * xv; 87 s[9] -= v[9] * xv; 88 s[10] -= v[10] * xv; 89 v += 11; 90 } 91 } 92 PetscCall(PetscArrayzero(x + idt, bs)); 93 for (k = 0; k < 11; k++) { 94 x[idt] += v[0] * s[k]; 95 x[1 + idt] += v[1] * s[k]; 96 x[2 + idt] += v[2] * s[k]; 97 x[3 + idt] += v[3] * s[k]; 98 x[4 + idt] += v[4] * s[k]; 99 x[5 + idt] += v[5] * s[k]; 100 x[6 + idt] += v[6] * s[k]; 101 x[7 + idt] += v[7] * s[k]; 102 x[8 + idt] += v[8] * s[k]; 103 x[9 + idt] += v[9] * s[k]; 104 x[10 + idt] += v[10] * s[k]; 105 v += 11; 106 } 107 } 108 PetscCall(VecRestoreArrayRead(bb, &b)); 109 PetscCall(VecRestoreArray(xx, &x)); 110 PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz) - bs * A->cmap->n)); 111 PetscFunctionReturn(0); 112 } 113