xref: /petsc/src/mat/impls/baij/seq/ftn-kernels/fsolvebaij.F90 (revision d66e387e1b17bf1eafe50b0bb7df00ccc9053b5a)
1c96caaccSSatish Balay!
2c96caaccSSatish Balay!
3c96caaccSSatish Balay!    Fortran kernel for sparse triangular solve in the BAIJ matrix format
4c96caaccSSatish Balay! This ONLY works for factorizations in the NATURAL ORDERING, i.e.
5c96caaccSSatish Balay! with MatSolve_SeqBAIJ_4_NaturalOrdering()
6c96caaccSSatish Balay!
7c96caaccSSatish Balay#include <petsc/finclude/petscsys.h>
8c96caaccSSatish Balay!
9c96caaccSSatish Balay
100ccf82acSMartin Diehlpure subroutine FortranSolveBAIJ4Unroll(n,x,ai,aj,adiag,a,b)
110ccf82acSMartin Diehl  implicit none (type, external)
120ccf82acSMartin Diehl  MatScalar, intent(in) :: a(0:*)
130ccf82acSMartin Diehl  PetscScalar, intent(inout) :: x(0:*)
140ccf82acSMartin Diehl  PetscScalar, intent(in) :: b(0:*)
150ccf82acSMartin Diehl  PetscInt, intent(in) :: n
160ccf82acSMartin Diehl  PetscInt, intent(in) :: ai(0:*), aj(0:*), adiag(0:*)
17c96caaccSSatish Balay
180ccf82acSMartin Diehl  PetscInt :: i,j,jstart,jend
190ccf82acSMartin Diehl  PetscInt :: idx,ax,jdx
200ccf82acSMartin Diehl  PetscScalar :: s1,s2,s3,s4
210ccf82acSMartin Diehl  PetscScalar :: x1,x2,x3,x4
220ccf82acSMartin Diehl
23c96caaccSSatish Balay  PETSC_AssertAlignx(16,a(1))
24c96caaccSSatish Balay  PETSC_AssertAlignx(16,x(1))
25c96caaccSSatish Balay  PETSC_AssertAlignx(16,b(1))
26c96caaccSSatish Balay  PETSC_AssertAlignx(16,ai(1))
27c96caaccSSatish Balay  PETSC_AssertAlignx(16,aj(1))
28c96caaccSSatish Balay  PETSC_AssertAlignx(16,adiag(1))
29c96caaccSSatish Balay
300ccf82acSMartin Diehl  !
310ccf82acSMartin Diehl  ! Forward Solve
320ccf82acSMartin Diehl  !
33*d66e387eSMartin Diehl  x(0:3) = b(0:3)
34c96caaccSSatish Balay  idx  = 0
350113e719SMartin Diehl  do i=1,n-1
36c96caaccSSatish Balay    jstart = ai(i)
37c96caaccSSatish Balay    jend   = adiag(i) - 1
38c96caaccSSatish Balay    ax     = 16*jstart
39c96caaccSSatish Balay    idx    = idx + 4
40*d66e387eSMartin Diehl    s1     = b(idx+0)
41c96caaccSSatish Balay    s2     = b(idx+1)
42c96caaccSSatish Balay    s3     = b(idx+2)
43c96caaccSSatish Balay    s4     = b(idx+3)
440113e719SMartin Diehl    do j=jstart,jend
45c96caaccSSatish Balay      jdx = 4*aj(j)
46c96caaccSSatish Balay
47*d66e387eSMartin Diehl      x1  = x(jdx+0)
48c96caaccSSatish Balay      x2  = x(jdx+1)
49c96caaccSSatish Balay      x3  = x(jdx+2)
50c96caaccSSatish Balay      x4  = x(jdx+3)
51*d66e387eSMartin Diehl      s1  = s1-(a(ax+0)*x1+a(ax+4)*x2+a(ax+ 8)*x3+a(ax+12)*x4)
52c96caaccSSatish Balay      s2  = s2-(a(ax+1)*x1+a(ax+5)*x2+a(ax+9)*x3 +a(ax+13)*x4)
53c96caaccSSatish Balay      s3  = s3-(a(ax+2)*x1+a(ax+6)*x2+a(ax+10)*x3+a(ax+14)*x4)
54c96caaccSSatish Balay      s4  = s4-(a(ax+3)*x1+a(ax+7)*x2+a(ax+11)*x3+a(ax+15)*x4)
55c96caaccSSatish Balay      ax  = ax + 16
560113e719SMartin Diehl    end do
57*d66e387eSMartin Diehl    x(idx+0) = s1
58c96caaccSSatish Balay    x(idx+1) = s2
59c96caaccSSatish Balay    x(idx+2) = s3
60c96caaccSSatish Balay    x(idx+3) = s4
610113e719SMartin Diehl  end do
62c96caaccSSatish Balay
63c96caaccSSatish Balay  !
64c96caaccSSatish Balay  ! Backward solve the upper triangular
65c96caaccSSatish Balay  !
660113e719SMartin Diehl  do i=n-1,0,-1
67c96caaccSSatish Balay    jstart = adiag(i) + 1
68c96caaccSSatish Balay    jend   = ai(i+1) - 1
69c96caaccSSatish Balay    ax     = 16*jstart
70*d66e387eSMartin Diehl    s1     = x(idx+0)
71c96caaccSSatish Balay    s2     = x(idx+1)
72c96caaccSSatish Balay    s3     = x(idx+2)
73c96caaccSSatish Balay    s4     = x(idx+3)
740113e719SMartin Diehl    do j=jstart,jend
75c96caaccSSatish Balay      jdx   = 4*aj(j)
76*d66e387eSMartin Diehl      x1    = x(jdx+0)
77c96caaccSSatish Balay      x2    = x(jdx+1)
78c96caaccSSatish Balay      x3    = x(jdx+2)
79c96caaccSSatish Balay      x4    = x(jdx+3)
80*d66e387eSMartin Diehl      s1 = s1-(a(ax+0)*x1+a(ax+4)*x2+a(ax+ 8)*x3+a(ax+12)*x4)
81c96caaccSSatish Balay      s2 = s2-(a(ax+1)*x1+a(ax+5)*x2+a(ax+9)*x3 +a(ax+13)*x4)
82c96caaccSSatish Balay      s3 = s3-(a(ax+2)*x1+a(ax+6)*x2+a(ax+10)*x3+a(ax+14)*x4)
83c96caaccSSatish Balay      s4 = s4-(a(ax+3)*x1+a(ax+7)*x2+a(ax+11)*x3+a(ax+15)*x4)
84c96caaccSSatish Balay      ax = ax + 16
850113e719SMartin Diehl    end do
86c96caaccSSatish Balay    ax      = 16*adiag(i)
87*d66e387eSMartin Diehl    x(idx+0) = a(ax+0)*s1+a(ax+4)*s2+a(ax+ 8)*s3+a(ax+12)*s4
88c96caaccSSatish Balay    x(idx+1) = a(ax+1)*s1+a(ax+5)*s2+a(ax+9)*s3 +a(ax+13)*s4
89c96caaccSSatish Balay    x(idx+2) = a(ax+2)*s1+a(ax+6)*s2+a(ax+10)*s3+a(ax+14)*s4
90c96caaccSSatish Balay    x(idx+3) = a(ax+3)*s1+a(ax+7)*s2+a(ax+11)*s3+a(ax+15)*s4
91c96caaccSSatish Balay    idx      = idx - 4
920113e719SMartin Diehl  end do
930113e719SMartin Diehlend subroutine FortranSolveBAIJ4Unroll
94c96caaccSSatish Balay
95c96caaccSSatish Balay!   version that does not call BLAS 2 operation for each row block
96c96caaccSSatish Balay!
97c96caaccSSatish Balaysubroutine FortranSolveBAIJ4(n,x,ai,aj,adiag,a,b,w)
98c96caaccSSatish Balay  implicit none
990ccf82acSMartin Diehl  MatScalar, intent(in) :: a(0:*)
1000ccf82acSMartin Diehl  PetscScalar, intent(inout) :: x(0:*),w(0:*)
1010ccf82acSMartin Diehl  PetscScalar, intent(in) :: b(0:*)
1020ccf82acSMartin Diehl  PetscInt, intent(in) :: n
1030ccf82acSMartin Diehl  PetscInt, intent(in) :: ai(0:*), aj(0:*), adiag(0:*)
104c96caaccSSatish Balay
1050ccf82acSMartin Diehl  PetscInt :: ii,jj,i,j
1060ccf82acSMartin Diehl  PetscInt :: jstart,jend,idx,ax,jdx,kdx,nn
1070ccf82acSMartin Diehl  PetscScalar :: s(0:3)
108c96caaccSSatish Balay
109c96caaccSSatish Balay  PETSC_AssertAlignx(16,a(1))
110c96caaccSSatish Balay  PETSC_AssertAlignx(16,w(1))
111c96caaccSSatish Balay  PETSC_AssertAlignx(16,x(1))
112c96caaccSSatish Balay  PETSC_AssertAlignx(16,b(1))
113c96caaccSSatish Balay  PETSC_AssertAlignx(16,ai(1))
114c96caaccSSatish Balay  PETSC_AssertAlignx(16,aj(1))
115c96caaccSSatish Balay  PETSC_AssertAlignx(16,adiag(1))
1160113e719SMartin Diehl  !
1170113e719SMartin Diehl  !     Forward Solve
1180113e719SMartin Diehl  !
119*d66e387eSMartin Diehl  x(0:3) = b(0:3)
120c96caaccSSatish Balay  idx  = 0
1210113e719SMartin Diehl  do i=1,n-1
122c96caaccSSatish Balay    !
123c96caaccSSatish Balay    ! Pack required part of vector into work array
124c96caaccSSatish Balay    !
125c96caaccSSatish Balay    kdx    = 0
126c96caaccSSatish Balay    jstart = ai(i)
127c96caaccSSatish Balay    jend   = adiag(i) - 1
128*d66e387eSMartin Diehl
129*d66e387eSMartin Diehl    if (jend - jstart >= 500) write(6,*) 'Overflowing vector FortranSolveBAIJ4()'
130*d66e387eSMartin Diehl
1310113e719SMartin Diehl    do j=jstart,jend
132c96caaccSSatish Balay
133c96caaccSSatish Balay      jdx       = 4*aj(j)
134*d66e387eSMartin Diehl      w(kdx:kdx+3) = x(jdx:jdx+3)
135c96caaccSSatish Balay      kdx       = kdx + 4
1360113e719SMartin Diehl    end do
137c96caaccSSatish Balay
138c96caaccSSatish Balay    ax       = 16*jstart
139c96caaccSSatish Balay    idx      = idx + 4
140*d66e387eSMartin Diehl    s(0:3) = b(idx:idx+3)
141c96caaccSSatish Balay    !
142c96caaccSSatish Balay    !    s = s - a(ax:)*w
143c96caaccSSatish Balay    !
144c96caaccSSatish Balay    nn = 4*(jend - jstart + 1) - 1
1450113e719SMartin Diehl    do ii=0,3
1460113e719SMartin Diehl      do jj=0,nn
147c96caaccSSatish Balay        s(ii) = s(ii) - a(ax+4*jj+ii)*w(jj)
1480113e719SMartin Diehl      end do
1490113e719SMartin Diehl    end do
150c96caaccSSatish Balay
151*d66e387eSMartin Diehl    x(idx:idx+3) = s(0:3)
1520113e719SMartin Diehl  end do
153c96caaccSSatish Balay  !
154c96caaccSSatish Balay  ! Backward solve the upper triangular
155c96caaccSSatish Balay  !
1560113e719SMartin Diehl  do i=n-1,0,-1
157c96caaccSSatish Balay     jstart    = adiag(i) + 1
158c96caaccSSatish Balay     jend      = ai(i+1) - 1
159c96caaccSSatish Balay     ax        = 16*jstart
160*d66e387eSMartin Diehl     s(0:3) = x(idx:idx+3)
161c96caaccSSatish Balay     !
162c96caaccSSatish Balay     !   Pack each chunk of vector needed
163c96caaccSSatish Balay     !
164c96caaccSSatish Balay     kdx = 0
165*d66e387eSMartin Diehl     if (jend - jstart >= 500) write(6,*) 'Overflowing vector FortranSolveBAIJ4()'
166*d66e387eSMartin Diehl
1670113e719SMartin Diehl     do j=jstart,jend
168c96caaccSSatish Balay       jdx      = 4*aj(j)
169*d66e387eSMartin Diehl       w(kdx:kdx+3) = x(jdx:jdx+3)
170c96caaccSSatish Balay       kdx      = kdx + 4
1710113e719SMartin Diehl     end do
172c96caaccSSatish Balay     nn = 4*(jend - jstart + 1) - 1
1730113e719SMartin Diehl     do ii=0,3
1740113e719SMartin Diehl       do jj=0,nn
175c96caaccSSatish Balay         s(ii) = s(ii) - a(ax+4*jj+ii)*w(jj)
1760113e719SMartin Diehl       end do
1770113e719SMartin Diehl     end do
178c96caaccSSatish Balay
179c96caaccSSatish Balay     ax      = 16*adiag(i)
180*d66e387eSMartin Diehl     x(idx)  = a(ax+0)*s(0)+a(ax+4)*s(1)+a(ax+ 8)*s(2)+a(ax+12)*s(3)
181c96caaccSSatish Balay     x(idx+1)= a(ax+1)*s(0)+a(ax+5)*s(1)+a(ax+9)*s(2) +a(ax+13)*s(3)
182c96caaccSSatish Balay     x(idx+2)= a(ax+2)*s(0)+a(ax+6)*s(1)+a(ax+10)*s(2)+a(ax+14)*s(3)
183c96caaccSSatish Balay     x(idx+3)= a(ax+3)*s(0)+a(ax+7)*s(1)+a(ax+11)*s(2)+a(ax+15)*s(3)
184c96caaccSSatish Balay     idx     = idx - 4
1850113e719SMartin Diehl  end do
1860113e719SMartin Diehlend subroutine FortranSolveBAIJ4
187