xref: /petsc/src/mat/impls/baij/seq/baijsolvnat2.c (revision 9566063d113dddea24716c546802770db7481bc0)
12c733ed4SBarry Smith #include <../src/mat/impls/baij/seq/baij.h>
22c733ed4SBarry Smith #include <petsc/private/kernels/blockinvert.h>
32c733ed4SBarry Smith 
42c733ed4SBarry Smith /*
52c733ed4SBarry Smith       Special case where the matrix was ILU(0) factored in the natural
62c733ed4SBarry Smith    ordering. This eliminates the need for the column and row permutation.
72c733ed4SBarry Smith */
82c733ed4SBarry Smith PetscErrorCode MatSolve_SeqBAIJ_2_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
92c733ed4SBarry Smith {
102c733ed4SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
112c733ed4SBarry Smith   const PetscInt    n  =a->mbs,*vi,*ai=a->i,*aj=a->j,*diag=a->diag;
122c733ed4SBarry Smith   const MatScalar   *aa=a->a,*v;
132c733ed4SBarry Smith   PetscScalar       *x,s1,s2,x1,x2;
142c733ed4SBarry Smith   const PetscScalar *b;
152c733ed4SBarry Smith   PetscInt          jdx,idt,idx,nz,i;
162c733ed4SBarry Smith 
172c733ed4SBarry Smith   PetscFunctionBegin;
18*9566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb,&b));
19*9566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx,&x));
202c733ed4SBarry Smith 
212c733ed4SBarry Smith   /* forward solve the lower triangular */
222c733ed4SBarry Smith   idx  = 0;
232c733ed4SBarry Smith   x[0] = b[0]; x[1] = b[1];
242c733ed4SBarry Smith   for (i=1; i<n; i++) {
252c733ed4SBarry Smith     v    =  aa      + 4*ai[i];
262c733ed4SBarry Smith     vi   =  aj      + ai[i];
272c733ed4SBarry Smith     nz   =  diag[i] - ai[i];
282c733ed4SBarry Smith     idx +=  2;
292c733ed4SBarry Smith     s1   =  b[idx];s2 = b[1+idx];
302c733ed4SBarry Smith     while (nz--) {
312c733ed4SBarry Smith       jdx = 2*(*vi++);
322c733ed4SBarry Smith       x1  = x[jdx];x2 = x[1+jdx];
332c733ed4SBarry Smith       s1 -= v[0]*x1 + v[2]*x2;
342c733ed4SBarry Smith       s2 -= v[1]*x1 + v[3]*x2;
352c733ed4SBarry Smith       v  += 4;
362c733ed4SBarry Smith     }
372c733ed4SBarry Smith     x[idx]   = s1;
382c733ed4SBarry Smith     x[1+idx] = s2;
392c733ed4SBarry Smith   }
402c733ed4SBarry Smith   /* backward solve the upper triangular */
412c733ed4SBarry Smith   for (i=n-1; i>=0; i--) {
422c733ed4SBarry Smith     v   = aa + 4*diag[i] + 4;
432c733ed4SBarry Smith     vi  = aj + diag[i] + 1;
442c733ed4SBarry Smith     nz  = ai[i+1] - diag[i] - 1;
452c733ed4SBarry Smith     idt = 2*i;
462c733ed4SBarry Smith     s1  = x[idt];  s2 = x[1+idt];
472c733ed4SBarry Smith     while (nz--) {
482c733ed4SBarry Smith       idx = 2*(*vi++);
492c733ed4SBarry Smith       x1  = x[idx];   x2 = x[1+idx];
502c733ed4SBarry Smith       s1 -= v[0]*x1 + v[2]*x2;
512c733ed4SBarry Smith       s2 -= v[1]*x1 + v[3]*x2;
522c733ed4SBarry Smith       v  += 4;
532c733ed4SBarry Smith     }
542c733ed4SBarry Smith     v        = aa +  4*diag[i];
552c733ed4SBarry Smith     x[idt]   = v[0]*s1 + v[2]*s2;
562c733ed4SBarry Smith     x[1+idt] = v[1]*s1 + v[3]*s2;
572c733ed4SBarry Smith   }
582c733ed4SBarry Smith 
59*9566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb,&b));
60*9566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx,&x));
61*9566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0*4*(a->nz) - 2.0*A->cmap->n));
622c733ed4SBarry Smith   PetscFunctionReturn(0);
632c733ed4SBarry Smith }
642c733ed4SBarry Smith 
652c733ed4SBarry Smith PetscErrorCode MatSolve_SeqBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
662c733ed4SBarry Smith {
672c733ed4SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
682c733ed4SBarry Smith   const PetscInt    n  = a->mbs,*vi,*ai=a->i,*aj=a->j,*adiag=a->diag;
692c733ed4SBarry Smith   PetscInt          i,k,nz,idx,idt,jdx;
702c733ed4SBarry Smith   const MatScalar   *aa=a->a,*v;
712c733ed4SBarry Smith   PetscScalar       *x,s1,s2,x1,x2;
722c733ed4SBarry Smith   const PetscScalar *b;
732c733ed4SBarry Smith 
742c733ed4SBarry Smith   PetscFunctionBegin;
75*9566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb,&b));
76*9566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx,&x));
772c733ed4SBarry Smith   /* forward solve the lower triangular */
782c733ed4SBarry Smith   idx  = 0;
792c733ed4SBarry Smith   x[0] = b[idx]; x[1] = b[1+idx];
802c733ed4SBarry Smith   for (i=1; i<n; i++) {
812c733ed4SBarry Smith     v   = aa + 4*ai[i];
822c733ed4SBarry Smith     vi  = aj + ai[i];
832c733ed4SBarry Smith     nz  = ai[i+1] - ai[i];
842c733ed4SBarry Smith     idx = 2*i;
852c733ed4SBarry Smith     s1  = b[idx];s2 = b[1+idx];
862c733ed4SBarry Smith     PetscPrefetchBlock(vi+nz,nz,0,PETSC_PREFETCH_HINT_NTA);
872c733ed4SBarry Smith     PetscPrefetchBlock(v+4*nz,4*nz,0,PETSC_PREFETCH_HINT_NTA);
882c733ed4SBarry Smith     for (k=0; k<nz; k++) {
892c733ed4SBarry Smith       jdx = 2*vi[k];
902c733ed4SBarry Smith       x1  = x[jdx];x2 = x[1+jdx];
912c733ed4SBarry Smith       s1 -= v[0]*x1 + v[2]*x2;
922c733ed4SBarry Smith       s2 -= v[1]*x1 + v[3]*x2;
932c733ed4SBarry Smith       v  +=  4;
942c733ed4SBarry Smith     }
952c733ed4SBarry Smith     x[idx]   = s1;
962c733ed4SBarry Smith     x[1+idx] = s2;
972c733ed4SBarry Smith   }
982c733ed4SBarry Smith 
992c733ed4SBarry Smith   /* backward solve the upper triangular */
1002c733ed4SBarry Smith   for (i=n-1; i>=0; i--) {
1012c733ed4SBarry Smith     v   = aa + 4*(adiag[i+1]+1);
1022c733ed4SBarry Smith     vi  = aj + adiag[i+1]+1;
1032c733ed4SBarry Smith     nz  = adiag[i] - adiag[i+1]-1;
1042c733ed4SBarry Smith     idt = 2*i;
1052c733ed4SBarry Smith     s1  = x[idt];  s2 = x[1+idt];
1062c733ed4SBarry Smith     PetscPrefetchBlock(vi+nz,nz,0,PETSC_PREFETCH_HINT_NTA);
1072c733ed4SBarry Smith     PetscPrefetchBlock(v+4*nz,4*nz,0,PETSC_PREFETCH_HINT_NTA);
1082c733ed4SBarry Smith     for (k=0; k<nz; k++) {
1092c733ed4SBarry Smith       idx = 2*vi[k];
1102c733ed4SBarry Smith       x1  = x[idx];   x2 = x[1+idx];
1112c733ed4SBarry Smith       s1 -= v[0]*x1 + v[2]*x2;
1122c733ed4SBarry Smith       s2 -= v[1]*x1 + v[3]*x2;
1132c733ed4SBarry Smith       v  += 4;
1142c733ed4SBarry Smith     }
1152c733ed4SBarry Smith     /* x = inv_diagonal*x */
1162c733ed4SBarry Smith     x[idt]   = v[0]*s1 + v[2]*s2;
1172c733ed4SBarry Smith     x[1+idt] = v[1]*s1 + v[3]*s2;
1182c733ed4SBarry Smith   }
1192c733ed4SBarry Smith 
120*9566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb,&b));
121*9566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx,&x));
122*9566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0*4*(a->nz) - 2.0*A->cmap->n));
1232c733ed4SBarry Smith   PetscFunctionReturn(0);
1242c733ed4SBarry Smith }
1252c733ed4SBarry Smith 
1262c733ed4SBarry Smith PetscErrorCode MatForwardSolve_SeqBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
1272c733ed4SBarry Smith {
1282c733ed4SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1292c733ed4SBarry Smith   const PetscInt    n  = a->mbs,*vi,*ai=a->i,*aj=a->j;
1302c733ed4SBarry Smith   PetscInt          i,k,nz,idx,jdx;
1312c733ed4SBarry Smith   const MatScalar   *aa=a->a,*v;
1322c733ed4SBarry Smith   PetscScalar       *x,s1,s2,x1,x2;
1332c733ed4SBarry Smith   const PetscScalar *b;
1342c733ed4SBarry Smith 
1352c733ed4SBarry Smith   PetscFunctionBegin;
136*9566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb,&b));
137*9566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx,&x));
1382c733ed4SBarry Smith   /* forward solve the lower triangular */
1392c733ed4SBarry Smith   idx  = 0;
1402c733ed4SBarry Smith   x[0] = b[idx]; x[1] = b[1+idx];
1412c733ed4SBarry Smith   for (i=1; i<n; i++) {
1422c733ed4SBarry Smith     v   = aa + 4*ai[i];
1432c733ed4SBarry Smith     vi  = aj + ai[i];
1442c733ed4SBarry Smith     nz  = ai[i+1] - ai[i];
1452c733ed4SBarry Smith     idx = 2*i;
1462c733ed4SBarry Smith     s1  = b[idx];s2 = b[1+idx];
1472c733ed4SBarry Smith     PetscPrefetchBlock(vi+nz,nz,0,PETSC_PREFETCH_HINT_NTA);
1482c733ed4SBarry Smith     PetscPrefetchBlock(v+4*nz,4*nz,0,PETSC_PREFETCH_HINT_NTA);
1492c733ed4SBarry Smith     for (k=0; k<nz; k++) {
1502c733ed4SBarry Smith       jdx = 2*vi[k];
1512c733ed4SBarry Smith       x1  = x[jdx];x2 = x[1+jdx];
1522c733ed4SBarry Smith       s1 -= v[0]*x1 + v[2]*x2;
1532c733ed4SBarry Smith       s2 -= v[1]*x1 + v[3]*x2;
1542c733ed4SBarry Smith       v  +=  4;
1552c733ed4SBarry Smith     }
1562c733ed4SBarry Smith     x[idx]   = s1;
1572c733ed4SBarry Smith     x[1+idx] = s2;
1582c733ed4SBarry Smith   }
1592c733ed4SBarry Smith 
160*9566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb,&b));
161*9566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx,&x));
162*9566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(4.0*(a->nz) - A->cmap->n));
1632c733ed4SBarry Smith   PetscFunctionReturn(0);
1642c733ed4SBarry Smith }
1652c733ed4SBarry Smith 
1662c733ed4SBarry Smith PetscErrorCode MatBackwardSolve_SeqBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
1672c733ed4SBarry Smith {
1682c733ed4SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1692c733ed4SBarry Smith   const PetscInt    n  = a->mbs,*vi,*aj=a->j,*adiag=a->diag;
1702c733ed4SBarry Smith   PetscInt          i,k,nz,idx,idt;
1712c733ed4SBarry Smith   const MatScalar   *aa=a->a,*v;
1722c733ed4SBarry Smith   PetscScalar       *x,s1,s2,x1,x2;
1732c733ed4SBarry Smith   const PetscScalar *b;
1742c733ed4SBarry Smith 
1752c733ed4SBarry Smith   PetscFunctionBegin;
176*9566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb,&b));
177*9566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx,&x));
1782c733ed4SBarry Smith 
1792c733ed4SBarry Smith   /* backward solve the upper triangular */
1802c733ed4SBarry Smith   for (i=n-1; i>=0; i--) {
1812c733ed4SBarry Smith     v   = aa + 4*(adiag[i+1]+1);
1822c733ed4SBarry Smith     vi  = aj + adiag[i+1]+1;
1832c733ed4SBarry Smith     nz  = adiag[i] - adiag[i+1]-1;
1842c733ed4SBarry Smith     idt = 2*i;
1852c733ed4SBarry Smith     s1  = b[idt];  s2 = b[1+idt];
1862c733ed4SBarry Smith     PetscPrefetchBlock(vi+nz,nz,0,PETSC_PREFETCH_HINT_NTA);
1872c733ed4SBarry Smith     PetscPrefetchBlock(v+4*nz,4*nz,0,PETSC_PREFETCH_HINT_NTA);
1882c733ed4SBarry Smith     for (k=0; k<nz; k++) {
1892c733ed4SBarry Smith       idx = 2*vi[k];
1902c733ed4SBarry Smith       x1  = x[idx];   x2 = x[1+idx];
1912c733ed4SBarry Smith       s1 -= v[0]*x1 + v[2]*x2;
1922c733ed4SBarry Smith       s2 -= v[1]*x1 + v[3]*x2;
1932c733ed4SBarry Smith       v  += 4;
1942c733ed4SBarry Smith     }
1952c733ed4SBarry Smith     /* x = inv_diagonal*x */
1962c733ed4SBarry Smith     x[idt]   = v[0]*s1 + v[2]*s2;
1972c733ed4SBarry Smith     x[1+idt] = v[1]*s1 + v[3]*s2;
1982c733ed4SBarry Smith   }
1992c733ed4SBarry Smith 
200*9566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb,&b));
201*9566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx,&x));
202*9566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(4.0*a->nz - A->cmap->n));
2032c733ed4SBarry Smith   PetscFunctionReturn(0);
2042c733ed4SBarry Smith }
205