xref: /petsc/src/mat/impls/baij/seq/baijsolvnat3.c (revision 9566063d113dddea24716c546802770db7481bc0)
12c733ed4SBarry Smith #include <../src/mat/impls/baij/seq/baij.h>
22c733ed4SBarry Smith #include <petsc/private/kernels/blockinvert.h>
32c733ed4SBarry Smith 
42c733ed4SBarry Smith /*
52c733ed4SBarry Smith       Special case where the matrix was ILU(0) factored in the natural
62c733ed4SBarry Smith    ordering. This eliminates the need for the column and row permutation.
72c733ed4SBarry Smith */
82c733ed4SBarry Smith PetscErrorCode MatSolve_SeqBAIJ_3_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
92c733ed4SBarry Smith {
102c733ed4SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
112c733ed4SBarry Smith   const PetscInt    n  =a->mbs,*ai=a->i,*aj=a->j;
122c733ed4SBarry Smith   const PetscInt    *diag = a->diag,*vi;
132c733ed4SBarry Smith   const MatScalar   *aa   =a->a,*v;
142c733ed4SBarry Smith   PetscScalar       *x,s1,s2,s3,x1,x2,x3;
152c733ed4SBarry Smith   const PetscScalar *b;
162c733ed4SBarry Smith   PetscInt          jdx,idt,idx,nz,i;
172c733ed4SBarry Smith 
182c733ed4SBarry Smith   PetscFunctionBegin;
19*9566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb,&b));
20*9566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx,&x));
212c733ed4SBarry Smith 
222c733ed4SBarry Smith   /* forward solve the lower triangular */
232c733ed4SBarry Smith   idx  = 0;
242c733ed4SBarry Smith   x[0] = b[0]; x[1] = b[1]; x[2] = b[2];
252c733ed4SBarry Smith   for (i=1; i<n; i++) {
262c733ed4SBarry Smith     v    =  aa      + 9*ai[i];
272c733ed4SBarry Smith     vi   =  aj      + ai[i];
282c733ed4SBarry Smith     nz   =  diag[i] - ai[i];
292c733ed4SBarry Smith     idx +=  3;
302c733ed4SBarry Smith     s1   =  b[idx];s2 = b[1+idx];s3 = b[2+idx];
312c733ed4SBarry Smith     while (nz--) {
322c733ed4SBarry Smith       jdx = 3*(*vi++);
332c733ed4SBarry Smith       x1  = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];
342c733ed4SBarry Smith       s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
352c733ed4SBarry Smith       s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
362c733ed4SBarry Smith       s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
372c733ed4SBarry Smith       v  += 9;
382c733ed4SBarry Smith     }
392c733ed4SBarry Smith     x[idx]   = s1;
402c733ed4SBarry Smith     x[1+idx] = s2;
412c733ed4SBarry Smith     x[2+idx] = s3;
422c733ed4SBarry Smith   }
432c733ed4SBarry Smith   /* backward solve the upper triangular */
442c733ed4SBarry Smith   for (i=n-1; i>=0; i--) {
452c733ed4SBarry Smith     v   = aa + 9*diag[i] + 9;
462c733ed4SBarry Smith     vi  = aj + diag[i] + 1;
472c733ed4SBarry Smith     nz  = ai[i+1] - diag[i] - 1;
482c733ed4SBarry Smith     idt = 3*i;
492c733ed4SBarry Smith     s1  = x[idt];  s2 = x[1+idt];
502c733ed4SBarry Smith     s3  = x[2+idt];
512c733ed4SBarry Smith     while (nz--) {
522c733ed4SBarry Smith       idx = 3*(*vi++);
532c733ed4SBarry Smith       x1  = x[idx];   x2 = x[1+idx];x3    = x[2+idx];
542c733ed4SBarry Smith       s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
552c733ed4SBarry Smith       s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
562c733ed4SBarry Smith       s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
572c733ed4SBarry Smith       v  += 9;
582c733ed4SBarry Smith     }
592c733ed4SBarry Smith     v        = aa +  9*diag[i];
602c733ed4SBarry Smith     x[idt]   = v[0]*s1 + v[3]*s2 + v[6]*s3;
612c733ed4SBarry Smith     x[1+idt] = v[1]*s1 + v[4]*s2 + v[7]*s3;
622c733ed4SBarry Smith     x[2+idt] = v[2]*s1 + v[5]*s2 + v[8]*s3;
632c733ed4SBarry Smith   }
642c733ed4SBarry Smith 
65*9566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb,&b));
66*9566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx,&x));
67*9566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0*9*(a->nz) - 3.0*A->cmap->n));
682c733ed4SBarry Smith   PetscFunctionReturn(0);
692c733ed4SBarry Smith }
702c733ed4SBarry Smith 
712c733ed4SBarry Smith PetscErrorCode MatSolve_SeqBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
722c733ed4SBarry Smith {
732c733ed4SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
742c733ed4SBarry Smith   const PetscInt    n  =a->mbs,*vi,*ai=a->i,*aj=a->j,*adiag=a->diag;
752c733ed4SBarry Smith   PetscInt          i,k,nz,idx,jdx,idt;
762c733ed4SBarry Smith   const PetscInt    bs = A->rmap->bs,bs2 = a->bs2;
772c733ed4SBarry Smith   const MatScalar   *aa=a->a,*v;
782c733ed4SBarry Smith   PetscScalar       *x;
792c733ed4SBarry Smith   const PetscScalar *b;
802c733ed4SBarry Smith   PetscScalar       s1,s2,s3,x1,x2,x3;
812c733ed4SBarry Smith 
822c733ed4SBarry Smith   PetscFunctionBegin;
83*9566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb,&b));
84*9566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx,&x));
852c733ed4SBarry Smith   /* forward solve the lower triangular */
862c733ed4SBarry Smith   idx  = 0;
872c733ed4SBarry Smith   x[0] = b[idx]; x[1] = b[1+idx];x[2] = b[2+idx];
882c733ed4SBarry Smith   for (i=1; i<n; i++) {
892c733ed4SBarry Smith     v   = aa + bs2*ai[i];
902c733ed4SBarry Smith     vi  = aj + ai[i];
912c733ed4SBarry Smith     nz  = ai[i+1] - ai[i];
922c733ed4SBarry Smith     idx = bs*i;
932c733ed4SBarry Smith     s1  = b[idx];s2 = b[1+idx];s3 = b[2+idx];
942c733ed4SBarry Smith     for (k=0; k<nz; k++) {
952c733ed4SBarry Smith       jdx = bs*vi[k];
962c733ed4SBarry Smith       x1  = x[jdx];x2 = x[1+jdx]; x3 =x[2+jdx];
972c733ed4SBarry Smith       s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
982c733ed4SBarry Smith       s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
992c733ed4SBarry Smith       s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1002c733ed4SBarry Smith 
1012c733ed4SBarry Smith       v +=  bs2;
1022c733ed4SBarry Smith     }
1032c733ed4SBarry Smith 
1042c733ed4SBarry Smith     x[idx]   = s1;
1052c733ed4SBarry Smith     x[1+idx] = s2;
1062c733ed4SBarry Smith     x[2+idx] = s3;
1072c733ed4SBarry Smith   }
1082c733ed4SBarry Smith 
1092c733ed4SBarry Smith   /* backward solve the upper triangular */
1102c733ed4SBarry Smith   for (i=n-1; i>=0; i--) {
1112c733ed4SBarry Smith     v   = aa + bs2*(adiag[i+1]+1);
1122c733ed4SBarry Smith     vi  = aj + adiag[i+1]+1;
1132c733ed4SBarry Smith     nz  = adiag[i] - adiag[i+1]-1;
1142c733ed4SBarry Smith     idt = bs*i;
1152c733ed4SBarry Smith     s1  = x[idt];  s2 = x[1+idt];s3 = x[2+idt];
1162c733ed4SBarry Smith 
1172c733ed4SBarry Smith     for (k=0; k<nz; k++) {
1182c733ed4SBarry Smith       idx = bs*vi[k];
1192c733ed4SBarry Smith       x1  = x[idx];   x2 = x[1+idx]; x3 = x[2+idx];
1202c733ed4SBarry Smith       s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1212c733ed4SBarry Smith       s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1222c733ed4SBarry Smith       s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1232c733ed4SBarry Smith 
1242c733ed4SBarry Smith       v +=  bs2;
1252c733ed4SBarry Smith     }
1262c733ed4SBarry Smith     /* x = inv_diagonal*x */
1272c733ed4SBarry Smith     x[idt]   = v[0]*s1 + v[3]*s2 + v[6]*s3;
1282c733ed4SBarry Smith     x[1+idt] = v[1]*s1 + v[4]*s2 + v[7]*s3;
1292c733ed4SBarry Smith     x[2+idt] = v[2]*s1 + v[5]*s2 + v[8]*s3;
1302c733ed4SBarry Smith 
1312c733ed4SBarry Smith   }
1322c733ed4SBarry Smith 
133*9566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb,&b));
134*9566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx,&x));
135*9566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0*bs2*(a->nz) - bs*A->cmap->n));
1362c733ed4SBarry Smith   PetscFunctionReturn(0);
1372c733ed4SBarry Smith }
1382c733ed4SBarry Smith 
1392c733ed4SBarry Smith PetscErrorCode MatForwardSolve_SeqBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
1402c733ed4SBarry Smith {
1412c733ed4SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1422c733ed4SBarry Smith   const PetscInt    n  =a->mbs,*vi,*ai=a->i,*aj=a->j;
1432c733ed4SBarry Smith   PetscInt          i,k,nz,idx,jdx;
1442c733ed4SBarry Smith   const PetscInt    bs = A->rmap->bs,bs2 = a->bs2;
1452c733ed4SBarry Smith   const MatScalar   *aa=a->a,*v;
1462c733ed4SBarry Smith   PetscScalar       *x;
1472c733ed4SBarry Smith   const PetscScalar *b;
1482c733ed4SBarry Smith   PetscScalar       s1,s2,s3,x1,x2,x3;
1492c733ed4SBarry Smith 
1502c733ed4SBarry Smith   PetscFunctionBegin;
151*9566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb,&b));
152*9566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx,&x));
1532c733ed4SBarry Smith   /* forward solve the lower triangular */
1542c733ed4SBarry Smith   idx  = 0;
1552c733ed4SBarry Smith   x[0] = b[idx]; x[1] = b[1+idx];x[2] = b[2+idx];
1562c733ed4SBarry Smith   for (i=1; i<n; i++) {
1572c733ed4SBarry Smith     v   = aa + bs2*ai[i];
1582c733ed4SBarry Smith     vi  = aj + ai[i];
1592c733ed4SBarry Smith     nz  = ai[i+1] - ai[i];
1602c733ed4SBarry Smith     idx = bs*i;
1612c733ed4SBarry Smith     s1  = b[idx];s2 = b[1+idx];s3 = b[2+idx];
1622c733ed4SBarry Smith     for (k=0; k<nz; k++) {
1632c733ed4SBarry Smith       jdx = bs*vi[k];
1642c733ed4SBarry Smith       x1  = x[jdx];x2 = x[1+jdx]; x3 =x[2+jdx];
1652c733ed4SBarry Smith       s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1662c733ed4SBarry Smith       s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1672c733ed4SBarry Smith       s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1682c733ed4SBarry Smith 
1692c733ed4SBarry Smith       v +=  bs2;
1702c733ed4SBarry Smith     }
1712c733ed4SBarry Smith 
1722c733ed4SBarry Smith     x[idx]   = s1;
1732c733ed4SBarry Smith     x[1+idx] = s2;
1742c733ed4SBarry Smith     x[2+idx] = s3;
1752c733ed4SBarry Smith   }
1762c733ed4SBarry Smith 
177*9566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb,&b));
178*9566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx,&x));
179*9566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(1.0*bs2*(a->nz) - bs*A->cmap->n));
1802c733ed4SBarry Smith   PetscFunctionReturn(0);
1812c733ed4SBarry Smith }
1822c733ed4SBarry Smith 
1832c733ed4SBarry Smith PetscErrorCode MatBackwardSolve_SeqBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
1842c733ed4SBarry Smith {
1852c733ed4SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1862c733ed4SBarry Smith   const PetscInt    n  =a->mbs,*vi,*aj=a->j,*adiag=a->diag;
1872c733ed4SBarry Smith   PetscInt          i,k,nz,idx,idt;
1882c733ed4SBarry Smith   const PetscInt    bs = A->rmap->bs,bs2 = a->bs2;
1892c733ed4SBarry Smith   const MatScalar   *aa=a->a,*v;
1902c733ed4SBarry Smith   PetscScalar       *x;
1912c733ed4SBarry Smith   const PetscScalar *b;
1922c733ed4SBarry Smith   PetscScalar       s1,s2,s3,x1,x2,x3;
1932c733ed4SBarry Smith 
1942c733ed4SBarry Smith   PetscFunctionBegin;
195*9566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb,&b));
196*9566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx,&x));
1972c733ed4SBarry Smith 
1982c733ed4SBarry Smith   /* backward solve the upper triangular */
1992c733ed4SBarry Smith   for (i=n-1; i>=0; i--) {
2002c733ed4SBarry Smith     v   = aa + bs2*(adiag[i+1]+1);
2012c733ed4SBarry Smith     vi  = aj + adiag[i+1]+1;
2022c733ed4SBarry Smith     nz  = adiag[i] - adiag[i+1]-1;
2032c733ed4SBarry Smith     idt = bs*i;
2042c733ed4SBarry Smith     s1  = b[idt];  s2 = b[1+idt];s3 = b[2+idt];
2052c733ed4SBarry Smith 
2062c733ed4SBarry Smith     for (k=0; k<nz; k++) {
2072c733ed4SBarry Smith       idx = bs*vi[k];
2082c733ed4SBarry Smith       x1  = x[idx];   x2 = x[1+idx]; x3 = x[2+idx];
2092c733ed4SBarry Smith       s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
2102c733ed4SBarry Smith       s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
2112c733ed4SBarry Smith       s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
2122c733ed4SBarry Smith 
2132c733ed4SBarry Smith       v +=  bs2;
2142c733ed4SBarry Smith     }
2152c733ed4SBarry Smith     /* x = inv_diagonal*x */
2162c733ed4SBarry Smith     x[idt]   = v[0]*s1 + v[3]*s2 + v[6]*s3;
2172c733ed4SBarry Smith     x[1+idt] = v[1]*s1 + v[4]*s2 + v[7]*s3;
2182c733ed4SBarry Smith     x[2+idt] = v[2]*s1 + v[5]*s2 + v[8]*s3;
2192c733ed4SBarry Smith 
2202c733ed4SBarry Smith   }
2212c733ed4SBarry Smith 
222*9566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb,&b));
223*9566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx,&x));
224*9566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0*bs2*(a->nz) - bs*A->cmap->n));
2252c733ed4SBarry Smith   PetscFunctionReturn(0);
2262c733ed4SBarry Smith }
227