xref: /petsc/src/mat/impls/baij/seq/ftn-kernels/fsolvebaij.F90 (revision ff45ff59e5033a47c65b211fde43097dd9cb5a3c)
1c96caaccSSatish Balay!
2c96caaccSSatish Balay!
3c96caaccSSatish Balay!    Fortran kernel for sparse triangular solve in the BAIJ matrix format
4c96caaccSSatish Balay! This ONLY works for factorizations in the NATURAL ORDERING, i.e.
5c96caaccSSatish Balay! with MatSolve_SeqBAIJ_4_NaturalOrdering()
6c96caaccSSatish Balay!
7c96caaccSSatish Balay#include <petsc/finclude/petscsys.h>
8c96caaccSSatish Balay!
9c96caaccSSatish Balay
100ccf82acSMartin Diehlpure subroutine FortranSolveBAIJ4Unroll(n,x,ai,aj,adiag,a,b)
110ccf82acSMartin Diehl  implicit none (type, external)
120ccf82acSMartin Diehl  MatScalar, intent(in) :: a(0:*)
130ccf82acSMartin Diehl  PetscScalar, intent(inout) :: x(0:*)
140ccf82acSMartin Diehl  PetscScalar, intent(in) :: b(0:*)
150ccf82acSMartin Diehl  PetscInt, intent(in) :: n
160ccf82acSMartin Diehl  PetscInt, intent(in) :: ai(0:*), aj(0:*), adiag(0:*)
17c96caaccSSatish Balay
180ccf82acSMartin Diehl  PetscInt :: i,j,jstart,jend
190ccf82acSMartin Diehl  PetscInt :: idx,ax,jdx
20*ff45ff59SMartin Diehl  PetscScalar :: s(0:3)
210ccf82acSMartin Diehl
22c96caaccSSatish Balay  PETSC_AssertAlignx(16,a(1))
23c96caaccSSatish Balay  PETSC_AssertAlignx(16,x(1))
24c96caaccSSatish Balay  PETSC_AssertAlignx(16,b(1))
25c96caaccSSatish Balay  PETSC_AssertAlignx(16,ai(1))
26c96caaccSSatish Balay  PETSC_AssertAlignx(16,aj(1))
27c96caaccSSatish Balay  PETSC_AssertAlignx(16,adiag(1))
28c96caaccSSatish Balay
290ccf82acSMartin Diehl  !
300ccf82acSMartin Diehl  ! Forward Solve
310ccf82acSMartin Diehl  !
32d66e387eSMartin Diehl  x(0:3) = b(0:3)
33c96caaccSSatish Balay  idx  = 0
340113e719SMartin Diehl  do i=1,n-1
35c96caaccSSatish Balay    jstart = ai(i)
36c96caaccSSatish Balay    jend   = adiag(i) - 1
37c96caaccSSatish Balay    ax     = 16*jstart
38c96caaccSSatish Balay    idx    = idx + 4
39*ff45ff59SMartin Diehl    s(0:3) = b(idx+0:idx+3)
400113e719SMartin Diehl    do j=jstart,jend
41c96caaccSSatish Balay      jdx = 4*aj(j)
42c96caaccSSatish Balay
43*ff45ff59SMartin Diehl      s(0) = s(0)-(a(ax+0)*x(jdx+0)+a(ax+4)*x(jdx+1)+a(ax+ 8)*x(jdx+2)+a(ax+12)*x(jdx+3))
44*ff45ff59SMartin Diehl      s(1) = s(1)-(a(ax+1)*x(jdx+0)+a(ax+5)*x(jdx+1)+a(ax+ 9)*x(jdx+2)+a(ax+13)*x(jdx+3))
45*ff45ff59SMartin Diehl      s(2) = s(2)-(a(ax+2)*x(jdx+0)+a(ax+6)*x(jdx+1)+a(ax+10)*x(jdx+2)+a(ax+14)*x(jdx+3))
46*ff45ff59SMartin Diehl      s(3) = s(3)-(a(ax+3)*x(jdx+0)+a(ax+7)*x(jdx+1)+a(ax+11)*x(jdx+2)+a(ax+15)*x(jdx+3))
47c96caaccSSatish Balay      ax = ax + 16
480113e719SMartin Diehl    end do
49*ff45ff59SMartin Diehl    x(idx+0:idx+3) = s(0:3)
500113e719SMartin Diehl  end do
51c96caaccSSatish Balay
52c96caaccSSatish Balay  !
53c96caaccSSatish Balay  ! Backward solve the upper triangular
54c96caaccSSatish Balay  !
550113e719SMartin Diehl  do i=n-1,0,-1
56c96caaccSSatish Balay    jstart = adiag(i) + 1
57c96caaccSSatish Balay    jend   = ai(i+1) - 1
58c96caaccSSatish Balay    ax     = 16*jstart
59*ff45ff59SMartin Diehl    s(0:3) = x(idx+0:idx+3)
600113e719SMartin Diehl    do j=jstart,jend
61c96caaccSSatish Balay      jdx   = 4*aj(j)
62*ff45ff59SMartin Diehl      s(0) = s(0)-(a(ax+0)*x(jdx+0)+a(ax+4)*x(jdx+1)+a(ax+ 8)*x(jdx+2)+a(ax+12)*x(jdx+3))
63*ff45ff59SMartin Diehl      s(1) = s(1)-(a(ax+1)*x(jdx+0)+a(ax+5)*x(jdx+1)+a(ax+ 9)*x(jdx+2)+a(ax+13)*x(jdx+3))
64*ff45ff59SMartin Diehl      s(2) = s(2)-(a(ax+2)*x(jdx+0)+a(ax+6)*x(jdx+1)+a(ax+10)*x(jdx+2)+a(ax+14)*x(jdx+3))
65*ff45ff59SMartin Diehl      s(3) = s(3)-(a(ax+3)*x(jdx+0)+a(ax+7)*x(jdx+1)+a(ax+11)*x(jdx+2)+a(ax+15)*x(jdx+3))
66c96caaccSSatish Balay      ax = ax + 16
670113e719SMartin Diehl    end do
68c96caaccSSatish Balay    ax      = 16*adiag(i)
69*ff45ff59SMartin Diehl    x(idx+0) = a(ax+0)*s(0)+a(ax+4)*s(1)+a(ax+ 8)*s(2)+a(ax+12)*s(3)
70*ff45ff59SMartin Diehl    x(idx+1) = a(ax+1)*s(0)+a(ax+5)*s(1)+a(ax+ 9)*s(2)+a(ax+13)*s(3)
71*ff45ff59SMartin Diehl    x(idx+2) = a(ax+2)*s(0)+a(ax+6)*s(1)+a(ax+10)*s(2)+a(ax+14)*s(3)
72*ff45ff59SMartin Diehl    x(idx+3) = a(ax+3)*s(0)+a(ax+7)*s(1)+a(ax+11)*s(2)+a(ax+15)*s(3)
73c96caaccSSatish Balay    idx      = idx - 4
740113e719SMartin Diehl  end do
750113e719SMartin Diehlend subroutine FortranSolveBAIJ4Unroll
76c96caaccSSatish Balay
77c96caaccSSatish Balay!   version that does not call BLAS 2 operation for each row block
78c96caaccSSatish Balay!
79*ff45ff59SMartin Diehlpure subroutine FortranSolveBAIJ4(n,x,ai,aj,adiag,a,b,w)
80c96caaccSSatish Balay  implicit none
810ccf82acSMartin Diehl  MatScalar, intent(in) :: a(0:*)
820ccf82acSMartin Diehl  PetscScalar, intent(inout) :: x(0:*),w(0:*)
830ccf82acSMartin Diehl  PetscScalar, intent(in) :: b(0:*)
840ccf82acSMartin Diehl  PetscInt, intent(in) :: n
850ccf82acSMartin Diehl  PetscInt, intent(in) :: ai(0:*), aj(0:*), adiag(0:*)
86c96caaccSSatish Balay
870ccf82acSMartin Diehl  PetscInt :: ii,jj,i,j
880ccf82acSMartin Diehl  PetscInt :: jstart,jend,idx,ax,jdx,kdx,nn
890ccf82acSMartin Diehl  PetscScalar :: s(0:3)
90c96caaccSSatish Balay
91c96caaccSSatish Balay  PETSC_AssertAlignx(16,a(1))
92c96caaccSSatish Balay  PETSC_AssertAlignx(16,w(1))
93c96caaccSSatish Balay  PETSC_AssertAlignx(16,x(1))
94c96caaccSSatish Balay  PETSC_AssertAlignx(16,b(1))
95c96caaccSSatish Balay  PETSC_AssertAlignx(16,ai(1))
96c96caaccSSatish Balay  PETSC_AssertAlignx(16,aj(1))
97c96caaccSSatish Balay  PETSC_AssertAlignx(16,adiag(1))
980113e719SMartin Diehl  !
990113e719SMartin Diehl  !     Forward Solve
1000113e719SMartin Diehl  !
101d66e387eSMartin Diehl  x(0:3) = b(0:3)
102c96caaccSSatish Balay  idx  = 0
1030113e719SMartin Diehl  do i=1,n-1
104c96caaccSSatish Balay    !
105c96caaccSSatish Balay    ! Pack required part of vector into work array
106c96caaccSSatish Balay    !
107c96caaccSSatish Balay    kdx    = 0
108c96caaccSSatish Balay    jstart = ai(i)
109c96caaccSSatish Balay    jend   = adiag(i) - 1
110d66e387eSMartin Diehl
111*ff45ff59SMartin Diehl    if (jend - jstart >= 500) error stop 'Overflowing vector FortranSolveBAIJ4()'
112d66e387eSMartin Diehl
1130113e719SMartin Diehl    do j=jstart,jend
114c96caaccSSatish Balay      jdx       = 4*aj(j)
115d66e387eSMartin Diehl      w(kdx:kdx+3) = x(jdx:jdx+3)
116c96caaccSSatish Balay      kdx       = kdx + 4
1170113e719SMartin Diehl    end do
118c96caaccSSatish Balay
119c96caaccSSatish Balay    ax     = 16*jstart
120c96caaccSSatish Balay    idx    = idx + 4
121d66e387eSMartin Diehl    s(0:3) = b(idx:idx+3)
122c96caaccSSatish Balay    !
123c96caaccSSatish Balay    !    s = s - a(ax:)*w
124c96caaccSSatish Balay    !
125c96caaccSSatish Balay    nn = 4*(jend - jstart + 1) - 1
1260113e719SMartin Diehl    do ii=0,3
1270113e719SMartin Diehl      do jj=0,nn
128c96caaccSSatish Balay        s(ii) = s(ii) - a(ax+4*jj+ii)*w(jj)
1290113e719SMartin Diehl      end do
1300113e719SMartin Diehl    end do
131c96caaccSSatish Balay
132d66e387eSMartin Diehl    x(idx:idx+3) = s(0:3)
1330113e719SMartin Diehl  end do
134c96caaccSSatish Balay  !
135c96caaccSSatish Balay  ! Backward solve the upper triangular
136c96caaccSSatish Balay  !
1370113e719SMartin Diehl  do i=n-1,0,-1
138c96caaccSSatish Balay     jstart = adiag(i) + 1
139c96caaccSSatish Balay     jend   = ai(i+1) - 1
140c96caaccSSatish Balay     ax     = 16*jstart
141d66e387eSMartin Diehl     s(0:3) = x(idx:idx+3)
142c96caaccSSatish Balay     !
143c96caaccSSatish Balay     !   Pack each chunk of vector needed
144c96caaccSSatish Balay     !
145c96caaccSSatish Balay     kdx = 0
146*ff45ff59SMartin Diehl     if (jend - jstart >= 500) error stop 'Overflowing vector FortranSolveBAIJ4()'
147d66e387eSMartin Diehl
1480113e719SMartin Diehl     do j=jstart,jend
149c96caaccSSatish Balay       jdx = 4*aj(j)
150d66e387eSMartin Diehl       w(kdx:kdx+3) = x(jdx:jdx+3)
151c96caaccSSatish Balay       kdx = kdx + 4
1520113e719SMartin Diehl     end do
153c96caaccSSatish Balay     nn = 4*(jend - jstart + 1) - 1
1540113e719SMartin Diehl     do ii=0,3
1550113e719SMartin Diehl       do jj=0,nn
156c96caaccSSatish Balay         s(ii) = s(ii) - a(ax+4*jj+ii)*w(jj)
1570113e719SMartin Diehl       end do
1580113e719SMartin Diehl     end do
159c96caaccSSatish Balay
160c96caaccSSatish Balay     ax      = 16*adiag(i)
161d66e387eSMartin Diehl     x(idx)  = a(ax+0)*s(0)+a(ax+4)*s(1)+a(ax+ 8)*s(2)+a(ax+12)*s(3)
162c96caaccSSatish Balay     x(idx+1)= a(ax+1)*s(0)+a(ax+5)*s(1)+a(ax+ 9)*s(2)+a(ax+13)*s(3)
163c96caaccSSatish Balay     x(idx+2)= a(ax+2)*s(0)+a(ax+6)*s(1)+a(ax+10)*s(2)+a(ax+14)*s(3)
164c96caaccSSatish Balay     x(idx+3)= a(ax+3)*s(0)+a(ax+7)*s(1)+a(ax+11)*s(2)+a(ax+15)*s(3)
165c96caaccSSatish Balay     idx     = idx - 4
1660113e719SMartin Diehl  end do
1670113e719SMartin Diehlend subroutine FortranSolveBAIJ4
168