xref: /petsc/src/ksp/ksp/tutorials/ex61f.F90 (revision 9b88ac225e01f016352a5f4cd90e158abe5f5675)
1!
2!        Demonstrates having each OpenMP thread manage its own PETSc objects and solves
3!           - each thread is ONLY allowed to access objects that IT created
4!                  that is, threads cannot shared objects
5!
6!        Run with "export OMP_NUM_THREADS=16 ./ex61f"
7!           to use 16 independent threads
8!
9!        ./configure --with-threadsafety --with-openmp
10!
11#include <petsc/finclude/petsc.h>
12module ex61fmodule
13  implicit none
14contains
15  subroutine split_indices(total, num_pieces, ibeg, iend)
16    implicit none
17
18    integer :: total
19    integer :: num_pieces
20    integer :: ibeg(num_pieces), iend(num_pieces)
21    integer :: itmp1, itmp2, ioffset, i
22
23    itmp1 = total/num_pieces
24    itmp2 = mod(total, num_pieces)
25    ioffset = 0
26    do i = 1, itmp2
27      ibeg(i) = ioffset + 1
28      iend(i) = ioffset + (itmp1 + 1)
29      ioffset = iend(i)
30    end do
31    do i = itmp2 + 1, num_pieces
32      ibeg(i) = ioffset + 1
33      if (ibeg(i) > total) then
34        iend(i) = ibeg(i) - 1
35      else
36        iend(i) = ioffset + itmp1
37        ioffset = iend(i)
38      end if
39    end do
40
41  end subroutine split_indices
42end module ex61fmodule
43
44program tpetsc
45
46  use ex61fmodule
47  use petsc
48!$ use OMP_LIB
49  implicit none
50!     ----------------------------
51!     test concurrent PETSc solver
52!     ----------------------------
53
54  integer, parameter :: NCASES = 100
55  integer, parameter :: MAXTHREADS = 64
56  real(8), parameter :: tol = 1.0d-6
57
58  integer, dimension(MAXTHREADS) :: ibeg, iend
59
60!$ integer :: omp_get_num_threads
61
62  Mat, target :: Mcol_f_mat(MAXTHREADS)
63  Vec, target :: Mcol_f_vecb(MAXTHREADS)
64  Vec, target :: Mcol_f_vecx(MAXTHREADS)
65  KSP, target :: Mcol_f_ksp(MAXTHREADS)
66  PC, target :: MPC(MAXTHREADS)
67
68  Mat, pointer :: col_f_mat
69  Vec, pointer :: col_f_vecb
70  Vec, pointer :: col_f_vecx
71  KSP, pointer :: col_f_ksp
72  PC, pointer :: pc
73
74  PetscInt :: ith, nthreads
75  PetscErrorCode ierr
76
77  integer, parameter :: nz_per_row = 9
78  integer, parameter :: n = 100
79  integer :: i, j, ij, ij2, ii, jj, nz, ip, dx, dy, icase
80  integer, allocatable :: ilist(:), jlist(:)
81  PetscScalar :: aij
82  PetscScalar, allocatable :: alist(:)
83  logical :: isvalid_ii, isvalid_jj, is_diag
84
85  PetscInt nrow
86  PetscInt ncol
87  PetscScalar, allocatable :: x(:), b(:)
88  real(8) :: err(NCASES)
89
90  integer :: t1, t2, count_rate
91  real :: ttime
92
93  PetscCallA(PetscInitialize(ierr))
94
95  allocate (ilist(n*n*nz_per_row), jlist(n*n*nz_per_row), alist(n*n*nz_per_row))
96  allocate (x(0:(n*n - 1)), b(0:(n*n - 1)))
97  nrow = n*n
98  ncol = nrow
99
100  nthreads = 1
101!$omp parallel
102!$omp master
103!$ nthreads = omp_get_num_threads()
104!$omp end master
105!$omp end parallel
106  print *, 'nthreads = ', nthreads, ' NCASES = ', NCASES
107
108  call split_indices(NCASES, nthreads, ibeg, iend)
109
110!$omp parallel do                                                        &
111!$omp private(ith,ierr)                                                  &
112!$omp private(col_f_mat,col_f_vecb,col_f_vecx,col_f_ksp)
113  do ith = 1, nthreads
114    col_f_mat => Mcol_f_mat(ith)
115    col_f_vecb => Mcol_f_vecb(ith)
116    col_f_vecx => Mcol_f_vecx(ith)
117    col_f_ksp => Mcol_f_ksp(ith)
118
119    PetscCallA(MatCreateSeqAIJ(PETSC_COMM_SELF, nrow, ncol, nz_per_row, PETSC_NULL_INTEGER_ARRAY, col_f_mat, ierr))
120    PetscCallA(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nrow, PETSC_NULL_SCALAR_ARRAY, col_f_vecb, ierr))
121    PetscCallA(VecDuplicate(col_f_vecb, col_f_vecx, ierr))
122    PetscCallA(KSPCreate(PETSC_COMM_SELF, col_f_ksp, ierr))
123  end do
124
125!      -----------------------
126!      setup sparsity pattern
127!      -----------------------
128  nz = 0
129  do j = 1, n
130  do i = 1, n
131    ij = i + (j - 1)*n
132    do dx = -1, 1
133    do dy = -1, 1
134      ii = i + dx
135      jj = j + dy
136
137      ij2 = ii + (jj - 1)*n
138      isvalid_ii = (1 <= ii) .and. (ii <= n)
139      isvalid_jj = (1 <= jj) .and. (jj <= n)
140      if (isvalid_ii .and. isvalid_jj) then
141        is_diag = (ij == ij2)
142        nz = nz + 1
143        ilist(nz) = ij
144        jlist(nz) = ij2
145        if (is_diag) then
146          aij = 4.0
147        else
148          aij = -1.0
149        end if
150        alist(nz) = aij
151      end if
152    end do
153    end do
154  end do
155  end do
156
157  print *, 'nz = ', nz
158
159!      ----------------------------------
160!      convert from Fortran to C indexing
161!      ----------------------------------
162  ilist(1:nz) = ilist(1:nz) - 1
163  jlist(1:nz) = jlist(1:nz) - 1
164
165!      --------------
166!      setup matrices
167!      --------------
168  call system_clock(t1, count_rate)
169
170!$omp  parallel do                                                      &
171!$omp& private(ith,icase,ip,i,j,ii,jj,aij,ierr,x,b)                      &
172!$omp& private(col_f_mat,col_f_vecb,col_f_vecx,col_f_ksp,pc)
173  do ith = 1, nthreads
174    col_f_mat => Mcol_f_mat(ith)
175    col_f_vecb => Mcol_f_vecb(ith)
176    col_f_vecx => Mcol_f_vecx(ith)
177    col_f_ksp => Mcol_f_ksp(ith)
178    pc => MPC(ith)
179
180    do icase = ibeg(ith), iend(ith)
181
182      do ip = 1, nz
183        ii = ilist(ip)
184        jj = jlist(ip)
185        aij = alist(ip)
186        PetscCallA(MatSetValue(col_f_mat, ii, jj, aij, INSERT_VALUES, ierr))
187      end do
188      PetscCallA(MatAssemblyBegin(col_f_mat, MAT_FINAL_ASSEMBLY, ierr))
189      PetscCallA(MatAssemblyEnd(col_f_mat, MAT_FINAL_ASSEMBLY, ierr))
190      PetscCallA(KSPSetOperators(col_f_ksp, col_f_mat, col_f_mat, ierr))
191
192      ! set linear solver
193      PetscCallA(KSPGetPC(col_f_ksp, pc, ierr))
194      PetscCallA(PCSetType(pc, PCLU, ierr))
195
196      ! associate PETSc vector with allocated array
197      x(:) = 1
198!$omp    critical
199      PetscCallA(VecPlaceArray(col_f_vecx, x, ierr))
200!$omp    end critical
201
202      b(:) = 0
203      do ip = 1, nz
204        i = ilist(ip)
205        j = jlist(ip)
206        aij = alist(ip)
207        b(i) = b(i) + aij*x(j)
208      end do
209      x = 0
210!$omp    critical
211      PetscCallA(VecPlaceArray(col_f_vecb, b, ierr))
212!$omp    end critical
213
214!  -----------------------------------------------------------
215!  key test, need to solve multiple linear systems in parallel
216!  -----------------------------------------------------------
217      PetscCallA(KSPSetFromOptions(col_f_ksp, ierr))
218
219      PetscCallA(KSPSetUp(col_f_ksp, ierr))
220      PetscCallA(KSPSolve(col_f_ksp, col_f_vecb, col_f_vecx, ierr))
221
222!        ------------
223!        check answer
224!        ------------
225      err(icase) = maxval(abs(x(:) - 1))
226
227!$omp    critical
228      PetscCallA(VecResetArray(col_f_vecx, ierr))
229!$omp    end critical
230
231!$omp    critical
232      PetscCallA(VecResetArray(col_f_vecb, ierr))
233!$omp    end critical
234
235    end do
236  end do
237
238!$omp parallel do                                                        &
239!$omp private(ith,ierr)                                                  &
240!$omp private(col_f_mat,col_f_vecb,col_f_vecx,col_f_ksp)
241  do ith = 1, nthreads
242    col_f_mat => Mcol_f_mat(ith)
243    col_f_vecb => Mcol_f_vecb(ith)
244    col_f_vecx => Mcol_f_vecx(ith)
245    col_f_ksp => Mcol_f_ksp(ith)
246
247    PetscCallA(MatDestroy(col_f_mat, ierr))
248    PetscCallA(VecDestroy(col_f_vecb, ierr))
249    PetscCallA(VecDestroy(col_f_vecx, ierr))
250
251    PetscCallA(KSPDestroy(col_f_ksp, ierr))
252
253  end do
254
255  call system_clock(t2, count_rate)
256  ttime = real(t2 - t1)/real(count_rate)
257  write (*, *) 'total time is ', ttime
258
259  write (*, *) 'maxval(err) ', maxval(err)
260  do icase = 1, NCASES
261    if (err(icase) > tol) then
262      write (*, *) 'icase,err(icase) ', icase, err(icase)
263    end if
264  end do
265
266  deallocate (ilist, jlist, alist)
267  deallocate (x, b)
268  PetscCallA(PetscFinalize(ierr))
269end program tpetsc
270
271!/*TEST
272!
273!   build:
274!      requires: double !complex !defined(PETSC_USE_64BIT_INDICES)
275!
276!   test:
277!      output_file: output/ex61f_1.out
278!      TODO: Need to determine how to test OpenMP code
279!
280!TEST*/
281