1! 2! Demonstrates having each OpenMP thread manage its own PETSc objects and solves 3! - each thread is ONLY allowed to access objects that IT created 4! that is, threads cannot shared objects 5! 6! Run with "export OMP_NUM_THREADS=16 ./ex61f" 7! to use 16 independent threads 8! 9! ./configure --with-threadsafety --with-log=0 --with-openmp 10! 11 module omp_module 12 implicit none 13 contains 14 subroutine split_indices(total,num_pieces,ibeg,iend) 15 implicit none 16 17 integer :: total 18 integer :: num_pieces 19 integer :: ibeg(num_pieces), iend(num_pieces) 20 integer :: itmp1, itmp2, ioffset, i 21 22 itmp1 = total/num_pieces 23 itmp2 = mod(total,num_pieces) 24 ioffset = 0 25 do i=1,itmp2 26 ibeg(i) = ioffset + 1 27 iend(i) = ioffset + (itmp1+1) 28 ioffset = iend(i) 29 enddo 30 do i=itmp2+1,num_pieces 31 ibeg(i) = ioffset + 1 32 if (ibeg(i) > total) then 33 iend(i) = ibeg(i) - 1 34 else 35 iend(i) = ioffset + itmp1 36 ioffset = iend(i) 37 endif 38 enddo 39 40 end subroutine split_indices 41 end module omp_module 42 43 module assert_mod 44 implicit none 45 contains 46 subroutine assert(lcond,msg,icase) 47 logical,intent(in) :: lcond 48 character(len=*), intent(in) :: msg 49 integer, intent(in) :: icase 50 51 if (.not.lcond) then 52 write(*,*) msg, icase 53 stop 'assertion error ' 54 endif 55 return 56 end subroutine assert 57 end module assert_mod 58 59 program tpetsc 60 61#include <petsc/finclude/petsc.h> 62 use assert_mod 63 use omp_module 64 use petsc 65 implicit none 66! ---------------------------- 67! test concurrent petsc solver 68! ---------------------------- 69 70 integer, parameter :: NCASES=100 71 integer, parameter :: MAXTHREADS=64 72 real(8), parameter :: tol = 1.0d-6 73 74 integer, dimension(MAXTHREADS) :: ibeg,iend 75 76!$ integer, external :: omp_get_num_threads 77 78 Mat, target :: Mcol_f_mat(MAXTHREADS) 79 Vec, target :: Mcol_f_vecb(MAXTHREADS) 80 Vec, target :: Mcol_f_vecx(MAXTHREADS) 81 KSP, target :: Mcol_f_ksp(MAXTHREADS) 82 PC, target :: MPC(MAXTHREADS) 83 84 Mat, pointer :: col_f_mat 85 Vec, pointer :: col_f_vecb 86 Vec, pointer :: col_f_vecx 87 KSP, pointer :: col_f_ksp 88 PC, pointer :: pc 89 90 PetscInt :: ith, nthreads 91 PetscErrorCode ierr 92 93 integer, parameter :: nz_per_row = 9 94 integer, parameter :: n =100 95 integer :: i,j,ij,ij2,ii,jj,nz,ip, dx,dy,icase 96 integer, allocatable :: ilist(:),jlist(:) 97 PetscScalar :: aij 98 PetscScalar, allocatable :: alist(:) 99 logical :: isvalid_ii, isvalid_jj, is_diag 100 101 PetscInt nrow 102 PetscInt ncol 103 PetscScalar, allocatable :: x(:), b(:) 104 real(8) :: err(NCASES) 105 106 integer :: t1,t2,count_rate 107 real :: ttime 108 109 call PetscInitialize(PETSC_NULL_CHARACTER,ierr) 110 if (ierr .ne. 0) then 111 print*,'Unable to initialize PETSc' 112 stop 113 endif 114 115 allocate(ilist(n*n*nz_per_row),jlist(n*n*nz_per_row),alist(n*n*nz_per_row)) 116 allocate(x(0:(n*n-1)),b(0:(n*n-1))) 117 nrow = n*n 118 ncol = nrow 119 120 nthreads = 1 121!$omp parallel 122!$omp master 123!$ nthreads = omp_get_num_threads() 124!$omp end master 125!$omp end parallel 126 print*,'nthreads = ', nthreads,' NCASES = ', NCASES 127 128 call split_indices(NCASES,nthreads,ibeg,iend) 129 130 131!$omp parallel do & 132!$omp private(ith,ierr) & 133!$omp private(col_f_mat,col_f_vecb,col_f_vecx,col_f_ksp) 134 do ith=1,nthreads 135 col_f_mat => Mcol_f_mat(ith) 136 col_f_vecb => Mcol_f_vecb(ith) 137 col_f_vecx => Mcol_f_vecx(ith) 138 col_f_ksp => Mcol_f_ksp(ith) 139 140 141 call MatCreateSeqAIJ( PETSC_COMM_SELF, nrow,ncol, nz_per_row,PETSC_NULL_INTEGER, col_f_mat, ierr) 142 call assert(ierr.eq.0,'matcreateseqaij return ',ierr) 143 144 call VecCreateSeqWithArray(PETSC_COMM_SELF,1,nrow,PETSC_NULL_SCALAR, col_f_vecb, ierr) 145 call assert(ierr.eq.0,'veccreateseqwitharray return ierr',ierr) 146 147 call VecDuplicate(col_f_vecb, col_f_vecx,ierr) 148 call assert(ierr.eq.0,'vecduplicate return ierr',ierr) 149 150 call KSPCreate(PETSC_COMM_SELF, col_f_ksp,ierr) 151 call assert(ierr.eq.0,'kspcreate return ierr',ierr) 152 153 enddo 154 155! ----------------------- 156! setup sparsity pattern 157! ----------------------- 158 nz = 0 159 do j=1,n 160 do i=1,n 161 ij = i + (j-1)*n 162 do dx=-1,1 163 do dy=-1,1 164 ii = i + dx 165 jj = j + dy 166 167 ij2 = ii + (jj-1)*n 168 isvalid_ii = (1 <= ii).and.(ii <= n) 169 isvalid_jj = (1 <= jj).and.(jj <= n) 170 if (isvalid_ii.and.isvalid_jj) then 171 is_diag = (ij .eq. ij2) 172 nz = nz + 1 173 ilist(nz) = ij 174 jlist(nz) = ij2 175 if (is_diag) then 176 aij = 4.0 177 else 178 aij = -1.0 179 endif 180 alist(nz) = aij 181 endif 182 enddo 183 enddo 184 enddo 185 enddo 186 187 print*,'nz = ', nz 188 189! --------------------------------- 190! convert from fortan to c indexing 191! --------------------------------- 192 ilist(1:nz) = ilist(1:nz) - 1 193 jlist(1:nz) = jlist(1:nz) - 1 194 195 196! -------------- 197! setup matrices 198! -------------- 199 call system_clock(t1,count_rate) 200 201!$omp parallel do & 202!$omp& private(ith,icase,ip,i,j,ii,jj,aij,ierr,x,b) & 203!$omp& private(col_f_mat,col_f_vecb,col_f_vecx,col_f_ksp,pc) 204 do ith=1,nthreads 205 col_f_mat => Mcol_f_mat(ith) 206 col_f_vecb => Mcol_f_vecb(ith) 207 col_f_vecx => Mcol_f_vecx(ith) 208 col_f_ksp => Mcol_f_ksp(ith) 209 pc => MPC(ith) 210 211 do icase=ibeg(ith),iend(ith) 212 213 do ip=1,nz 214 ii = ilist(ip) 215 jj = jlist(ip) 216 aij = alist(ip) 217 call MatSetValue(col_f_mat,ii,jj,aij,INSERT_VALUES,ierr) 218 call assert(ierr.eq.0,'matsetvalue return ierr',ierr) 219 enddo 220 call MatAssemblyBegin(col_f_mat,MAT_FINAL_ASSEMBLY,ierr) 221 call assert(ierr.eq.0,'matassemblybegin return ierr',ierr) 222 223 call MatAssemblyEnd(col_f_mat,MAT_FINAL_ASSEMBLY,ierr) 224 call assert(ierr.eq.0,'matassemblyend return ierr',ierr) 225 226 call KSPSetOperators(col_f_ksp,col_f_mat,col_f_mat,ierr) 227 call assert(ierr.eq.0,'KSPSetOperators return ierr',ierr) 228 229 ! set linear solver 230 call KSPGetPC(col_f_ksp,pc,ierr) 231 call assert(ierr.eq.0,'KSPGetPC return ierr ', ierr) 232 233 call PCSetType(pc,PCLU,ierr) 234 call assert(ierr.eq.0,'PCSetType return ierr ',ierr) 235 236 ! associate petsc vector with allocated array 237 x(:) = 1 238!$omp critical 239 call VecPlaceArray(col_f_vecx,x,ierr) 240!$omp end critical 241 call assert(ierr.eq.0,'VecPlaceArray col_f_vecx return ',ierr) 242 243 b(:) = 0 244 do ip=1,nz 245 i = ilist(ip) 246 j = jlist(ip) 247 aij = alist(ip) 248 b(i) = b(i) + aij*x(j) 249 enddo 250 x = 0 251!$omp critical 252 call VecPlaceArray(col_f_vecb,b,ierr) 253!$omp end critical 254 call assert(ierr.eq.0,'VecPlaceArray col_f_vecb return ',ierr) 255 256 257 258! ----------------------------------------------------------- 259! key test, need to solve multiple linear systems in parallel 260! ----------------------------------------------------------- 261 call KSPSetFromOptions(col_f_ksp,ierr) 262 call assert(ierr.eq.0,'KSPSetFromOptions return ierr ',ierr) 263 264 call KSPSetUp(col_f_ksp,ierr) 265 call assert(ierr.eq.0,'KSPSetup return ierr ',ierr) 266 267 268 call KSPSolve(col_f_ksp,col_f_vecb,col_f_vecx,ierr) 269 call assert(ierr.eq.0,'KSPSolve return ierr ',ierr) 270 271 272! ------------ 273! check answer 274! ------------ 275 err(icase) = maxval(abs(x(:)-1)) 276 277!$omp critical 278 call VecResetArray(col_f_vecx,ierr) 279!$omp end critical 280 call assert(ierr.eq.0,'VecResetArray return ierr ',ierr) 281 282!$omp critical 283 call VecResetArray(col_f_vecb,ierr) 284!$omp end critical 285 call assert(ierr.eq.0,'VecResetArray return ierr ',ierr) 286 287 enddo 288 enddo 289 290!$omp parallel do & 291!$omp private(ith,ierr) & 292!$omp private(col_f_mat,col_f_vecb,col_f_vecx,col_f_ksp) 293 do ith=1,nthreads 294 col_f_mat => Mcol_f_mat(ith) 295 col_f_vecb => Mcol_f_vecb(ith) 296 col_f_vecx => Mcol_f_vecx(ith) 297 col_f_ksp => Mcol_f_ksp(ith) 298 299 300 call MatDestroy(col_f_mat, ierr) 301 call assert(ierr.eq.0,'matdestroy return ',ierr) 302 303 call VecDestroy(col_f_vecb, ierr) 304 call assert(ierr.eq.0,'vecdestroy return ierr',ierr) 305 306 call VecDestroy(col_f_vecx,ierr) 307 call assert(ierr.eq.0,'vecdestroy return ierr',ierr) 308 309 call KSPDestroy(col_f_ksp,ierr) 310 call assert(ierr.eq.0,'kspdestroy return ierr',ierr) 311 312 enddo 313 314 call system_clock(t2,count_rate) 315 ttime = real(t2-t1)/real(count_rate) 316 write(*,*) 'total time is ',ttime 317 318 write(*,*) 'maxval(err) ', maxval(err) 319 do icase=1,NCASES 320 if (err(icase) > tol) then 321 write(*,*) 'icase,err(icase) ',icase,err(icase) 322 endif 323 enddo 324 325 deallocate(ilist,jlist,alist) 326 deallocate(x,b) 327 call PetscFinalize(ierr) 328 call assert(ierr.eq.0,'petscfinalize return ierr',ierr) 329 330 end program tpetsc 331 332!/*TEST 333! 334! build: 335! requires: double !complex !define(PETSC_USE_64BIT_INDICES) 336! 337! test: 338! output_file: output/ex61f_1.out 339! TODO: Need to determine how to test OpenMP code 340! 341!TEST*/ 342