1 #include <../src/mat/impls/baij/seq/baij.h> 2 #include <petsc/private/kernels/blockinvert.h> 3 4 /* Block operations are done by accessing one column at at time */ 5 /* Default MatSolve for block size 11 */ 6 7 PetscErrorCode MatSolve_SeqBAIJ_11_NaturalOrdering(Mat A,Vec bb,Vec xx) 8 { 9 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 10 const PetscInt n=a->mbs,*ai=a->i,*aj=a->j,*adiag=a->diag,*vi,bs=A->rmap->bs,bs2=a->bs2; 11 PetscInt i,k,nz,idx,idt,m; 12 const MatScalar *aa=a->a,*v; 13 PetscScalar s[11]; 14 PetscScalar *x,xv; 15 const PetscScalar *b; 16 17 PetscFunctionBegin; 18 PetscCall(VecGetArrayRead(bb,&b)); 19 PetscCall(VecGetArray(xx,&x)); 20 21 /* forward solve the lower triangular */ 22 for (i=0; i<n; i++) { 23 v = aa + bs2*ai[i]; 24 vi = aj + ai[i]; 25 nz = ai[i+1] - ai[i]; 26 idt = bs*i; 27 x[idt] = b[idt]; x[1+idt] = b[1+idt]; x[2+idt] = b[2+idt]; x[3+idt] = b[3+idt]; x[4+idt] = b[4+idt]; 28 x[5+idt] = b[5+idt]; x[6+idt] = b[6+idt]; x[7+idt] = b[7+idt]; x[8+idt] = b[8+idt]; x[9+idt] = b[9+idt]; 29 x[10+idt] = b[10+idt]; 30 for (m=0; m<nz; m++) { 31 idx = bs*vi[m]; 32 for (k=0; k<11; k++) { 33 xv = x[k + idx]; 34 x[idt] -= v[0]*xv; 35 x[1+idt] -= v[1]*xv; 36 x[2+idt] -= v[2]*xv; 37 x[3+idt] -= v[3]*xv; 38 x[4+idt] -= v[4]*xv; 39 x[5+idt] -= v[5]*xv; 40 x[6+idt] -= v[6]*xv; 41 x[7+idt] -= v[7]*xv; 42 x[8+idt] -= v[8]*xv; 43 x[9+idt] -= v[9]*xv; 44 x[10+idt] -= v[10]*xv; 45 v += 11; 46 } 47 } 48 } 49 /* backward solve the upper triangular */ 50 for (i=n-1; i>=0; i--) { 51 v = aa + bs2*(adiag[i+1]+1); 52 vi = aj + adiag[i+1]+1; 53 nz = adiag[i] - adiag[i+1] - 1; 54 idt = bs*i; 55 s[0] = x[idt]; s[1] = x[1+idt]; s[2] = x[2+idt]; s[3] = x[3+idt]; s[4] = x[4+idt]; 56 s[5] = x[5+idt]; s[6] = x[6+idt]; s[7] = x[7+idt]; s[8] = x[8+idt]; s[9] = x[9+idt]; 57 s[10] = x[10+idt]; 58 59 for (m=0; m<nz; m++) { 60 idx = bs*vi[m]; 61 for (k=0; k<11; k++) { 62 xv = x[k + idx]; 63 s[0] -= v[0]*xv; 64 s[1] -= v[1]*xv; 65 s[2] -= v[2]*xv; 66 s[3] -= v[3]*xv; 67 s[4] -= v[4]*xv; 68 s[5] -= v[5]*xv; 69 s[6] -= v[6]*xv; 70 s[7] -= v[7]*xv; 71 s[8] -= v[8]*xv; 72 s[9] -= v[9]*xv; 73 s[10] -= v[10]*xv; 74 v += 11; 75 } 76 } 77 PetscCall(PetscArrayzero(x+idt,bs)); 78 for (k=0; k<11; k++) { 79 x[idt] += v[0]*s[k]; 80 x[1+idt] += v[1]*s[k]; 81 x[2+idt] += v[2]*s[k]; 82 x[3+idt] += v[3]*s[k]; 83 x[4+idt] += v[4]*s[k]; 84 x[5+idt] += v[5]*s[k]; 85 x[6+idt] += v[6]*s[k]; 86 x[7+idt] += v[7]*s[k]; 87 x[8+idt] += v[8]*s[k]; 88 x[9+idt] += v[9]*s[k]; 89 x[10+idt] += v[10]*s[k]; 90 v += 11; 91 } 92 } 93 PetscCall(VecRestoreArrayRead(bb,&b)); 94 PetscCall(VecRestoreArray(xx,&x)); 95 PetscCall(PetscLogFlops(2.0*bs2*(a->nz) - bs*A->cmap->n)); 96 PetscFunctionReturn(0); 97 } 98