1 #include <../src/mat/impls/baij/seq/baij.h> 2 #include <petsc/private/kernels/blockinvert.h> 3 4 PetscErrorCode MatSolve_SeqBAIJ_5_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 5 { 6 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 7 const PetscInt *diag=a->diag,n=a->mbs,*vi,*ai=a->i,*aj=a->j; 8 PetscInt i,nz,idx,idt,jdx; 9 const MatScalar *aa=a->a,*v; 10 PetscScalar *x,s1,s2,s3,s4,s5,x1,x2,x3,x4,x5; 11 const PetscScalar *b; 12 13 PetscFunctionBegin; 14 PetscCall(VecGetArrayRead(bb,&b)); 15 PetscCall(VecGetArray(xx,&x)); 16 /* forward solve the lower triangular */ 17 idx = 0; 18 x[0] = b[idx]; x[1] = b[1+idx]; x[2] = b[2+idx]; x[3] = b[3+idx];x[4] = b[4+idx]; 19 for (i=1; i<n; i++) { 20 v = aa + 25*ai[i]; 21 vi = aj + ai[i]; 22 nz = diag[i] - ai[i]; 23 idx = 5*i; 24 s1 = b[idx];s2 = b[1+idx];s3 = b[2+idx];s4 = b[3+idx];s5 = b[4+idx]; 25 while (nz--) { 26 jdx = 5*(*vi++); 27 x1 = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];x4 = x[3+jdx];x5 = x[4+jdx]; 28 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 29 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 30 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 31 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 32 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 33 v += 25; 34 } 35 x[idx] = s1; 36 x[1+idx] = s2; 37 x[2+idx] = s3; 38 x[3+idx] = s4; 39 x[4+idx] = s5; 40 } 41 /* backward solve the upper triangular */ 42 for (i=n-1; i>=0; i--) { 43 v = aa + 25*diag[i] + 25; 44 vi = aj + diag[i] + 1; 45 nz = ai[i+1] - diag[i] - 1; 46 idt = 5*i; 47 s1 = x[idt]; s2 = x[1+idt]; 48 s3 = x[2+idt];s4 = x[3+idt]; s5 = x[4+idt]; 49 while (nz--) { 50 idx = 5*(*vi++); 51 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 52 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 53 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 54 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 55 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 56 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 57 v += 25; 58 } 59 v = aa + 25*diag[i]; 60 x[idt] = v[0]*s1 + v[5]*s2 + v[10]*s3 + v[15]*s4 + v[20]*s5; 61 x[1+idt] = v[1]*s1 + v[6]*s2 + v[11]*s3 + v[16]*s4 + v[21]*s5; 62 x[2+idt] = v[2]*s1 + v[7]*s2 + v[12]*s3 + v[17]*s4 + v[22]*s5; 63 x[3+idt] = v[3]*s1 + v[8]*s2 + v[13]*s3 + v[18]*s4 + v[23]*s5; 64 x[4+idt] = v[4]*s1 + v[9]*s2 + v[14]*s3 + v[19]*s4 + v[24]*s5; 65 } 66 67 PetscCall(VecRestoreArrayRead(bb,&b)); 68 PetscCall(VecRestoreArray(xx,&x)); 69 PetscCall(PetscLogFlops(2.0*25*(a->nz) - 5.0*A->cmap->n)); 70 PetscFunctionReturn(0); 71 } 72 73 PetscErrorCode MatSolve_SeqBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx) 74 { 75 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 76 const PetscInt n = a->mbs,*vi,*ai=a->i,*aj=a->j,*adiag=a->diag; 77 PetscInt i,k,nz,idx,idt,jdx; 78 const MatScalar *aa=a->a,*v; 79 PetscScalar *x,s1,s2,s3,s4,s5,x1,x2,x3,x4,x5; 80 const PetscScalar *b; 81 82 PetscFunctionBegin; 83 PetscCall(VecGetArrayRead(bb,&b)); 84 PetscCall(VecGetArray(xx,&x)); 85 /* forward solve the lower triangular */ 86 idx = 0; 87 x[0] = b[idx]; x[1] = b[1+idx]; x[2] = b[2+idx]; x[3] = b[3+idx];x[4] = b[4+idx]; 88 for (i=1; i<n; i++) { 89 v = aa + 25*ai[i]; 90 vi = aj + ai[i]; 91 nz = ai[i+1] - ai[i]; 92 idx = 5*i; 93 s1 = b[idx];s2 = b[1+idx];s3 = b[2+idx];s4 = b[3+idx];s5 = b[4+idx]; 94 for (k=0; k<nz; k++) { 95 jdx = 5*vi[k]; 96 x1 = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];x4 = x[3+jdx];x5 = x[4+jdx]; 97 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 98 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 99 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 100 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 101 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 102 v += 25; 103 } 104 x[idx] = s1; 105 x[1+idx] = s2; 106 x[2+idx] = s3; 107 x[3+idx] = s4; 108 x[4+idx] = s5; 109 } 110 111 /* backward solve the upper triangular */ 112 for (i=n-1; i>=0; i--) { 113 v = aa + 25*(adiag[i+1]+1); 114 vi = aj + adiag[i+1]+1; 115 nz = adiag[i] - adiag[i+1]-1; 116 idt = 5*i; 117 s1 = x[idt]; s2 = x[1+idt]; 118 s3 = x[2+idt];s4 = x[3+idt]; s5 = x[4+idt]; 119 for (k=0; k<nz; k++) { 120 idx = 5*vi[k]; 121 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 122 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 123 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 124 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 125 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 126 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 127 v += 25; 128 } 129 /* x = inv_diagonal*x */ 130 x[idt] = v[0]*s1 + v[5]*s2 + v[10]*s3 + v[15]*s4 + v[20]*s5; 131 x[1+idt] = v[1]*s1 + v[6]*s2 + v[11]*s3 + v[16]*s4 + v[21]*s5; 132 x[2+idt] = v[2]*s1 + v[7]*s2 + v[12]*s3 + v[17]*s4 + v[22]*s5; 133 x[3+idt] = v[3]*s1 + v[8]*s2 + v[13]*s3 + v[18]*s4 + v[23]*s5; 134 x[4+idt] = v[4]*s1 + v[9]*s2 + v[14]*s3 + v[19]*s4 + v[24]*s5; 135 } 136 137 PetscCall(VecRestoreArrayRead(bb,&b)); 138 PetscCall(VecRestoreArray(xx,&x)); 139 PetscCall(PetscLogFlops(2.0*25*(a->nz) - 5.0*A->cmap->n)); 140 PetscFunctionReturn(0); 141 } 142