1! 2! Demonstrates having each OpenMP thread manage its own PETSc objects and solves 3! - each thread is ONLY allowed to access objects that IT created 4! that is, threads cannot shared objects 5! 6! Run with "export OMP_NUM_THREADS=16 ./ex61f" 7! to use 16 independent threads 8! 9! ./configure --with-threadsafety --with-log=0 --with-openmp 10! 11 module omp_module 12 implicit none 13 contains 14 subroutine split_indices(total,num_pieces,ibeg,iend) 15 implicit none 16 17 integer :: total 18 integer :: num_pieces 19 integer :: ibeg(num_pieces), iend(num_pieces) 20 integer :: itmp1, itmp2, ioffset, i 21 22 itmp1 = total/num_pieces 23 itmp2 = mod(total,num_pieces) 24 ioffset = 0 25 do i=1,itmp2 26 ibeg(i) = ioffset + 1 27 iend(i) = ioffset + (itmp1+1) 28 ioffset = iend(i) 29 enddo 30 do i=itmp2+1,num_pieces 31 ibeg(i) = ioffset + 1 32 if (ibeg(i) > total) then 33 iend(i) = ibeg(i) - 1 34 else 35 iend(i) = ioffset + itmp1 36 ioffset = iend(i) 37 endif 38 enddo 39 40 end subroutine split_indices 41 end module omp_module 42 43 program tpetsc 44 45#include <petsc/finclude/petsc.h> 46 use omp_module 47 use petsc 48 implicit none 49! ---------------------------- 50! test concurrent petsc solver 51! ---------------------------- 52 53 integer, parameter :: NCASES=100 54 integer, parameter :: MAXTHREADS=64 55 real(8), parameter :: tol = 1.0d-6 56 57 integer, dimension(MAXTHREADS) :: ibeg,iend 58 59!$ integer, external :: omp_get_num_threads 60 61 Mat, target :: Mcol_f_mat(MAXTHREADS) 62 Vec, target :: Mcol_f_vecb(MAXTHREADS) 63 Vec, target :: Mcol_f_vecx(MAXTHREADS) 64 KSP, target :: Mcol_f_ksp(MAXTHREADS) 65 PC, target :: MPC(MAXTHREADS) 66 67 Mat, pointer :: col_f_mat 68 Vec, pointer :: col_f_vecb 69 Vec, pointer :: col_f_vecx 70 KSP, pointer :: col_f_ksp 71 PC, pointer :: pc 72 73 PetscInt :: ith, nthreads 74 PetscErrorCode ierr 75 76 integer, parameter :: nz_per_row = 9 77 integer, parameter :: n =100 78 integer :: i,j,ij,ij2,ii,jj,nz,ip, dx,dy,icase 79 integer, allocatable :: ilist(:),jlist(:) 80 PetscScalar :: aij 81 PetscScalar, allocatable :: alist(:) 82 logical :: isvalid_ii, isvalid_jj, is_diag 83 84 PetscInt nrow 85 PetscInt ncol 86 PetscScalar, allocatable :: x(:), b(:) 87 real(8) :: err(NCASES) 88 89 integer :: t1,t2,count_rate 90 real :: ttime 91 92 PetscCallA(PetscInitialize(ierr)) 93 94 allocate(ilist(n*n*nz_per_row),jlist(n*n*nz_per_row),alist(n*n*nz_per_row)) 95 allocate(x(0:(n*n-1)),b(0:(n*n-1))) 96 nrow = n*n 97 ncol = nrow 98 99 nthreads = 1 100!$omp parallel 101!$omp master 102!$ nthreads = omp_get_num_threads() 103!$omp end master 104!$omp end parallel 105 print*,'nthreads = ', nthreads,' NCASES = ', NCASES 106 107 call split_indices(NCASES,nthreads,ibeg,iend) 108 109!$omp parallel do & 110!$omp private(ith,ierr) & 111!$omp private(col_f_mat,col_f_vecb,col_f_vecx,col_f_ksp) 112 do ith=1,nthreads 113 col_f_mat => Mcol_f_mat(ith) 114 col_f_vecb => Mcol_f_vecb(ith) 115 col_f_vecx => Mcol_f_vecx(ith) 116 col_f_ksp => Mcol_f_ksp(ith) 117 118 PetscCallA(MatCreateSeqAIJ( PETSC_COMM_SELF, nrow,ncol, nz_per_row,PETSC_NULL_INTEGER, col_f_mat, ierr)) 119 PetscCallA(VecCreateSeqWithArray(PETSC_COMM_SELF,1,nrow,PETSC_NULL_SCALAR, col_f_vecb, ierr)) 120 PetscCallA(VecDuplicate(col_f_vecb, col_f_vecx,ierr)) 121 PetscCallA(KSPCreate(PETSC_COMM_SELF, col_f_ksp,ierr)) 122 enddo 123 124! ----------------------- 125! setup sparsity pattern 126! ----------------------- 127 nz = 0 128 do j=1,n 129 do i=1,n 130 ij = i + (j-1)*n 131 do dx=-1,1 132 do dy=-1,1 133 ii = i + dx 134 jj = j + dy 135 136 ij2 = ii + (jj-1)*n 137 isvalid_ii = (1 <= ii).and.(ii <= n) 138 isvalid_jj = (1 <= jj).and.(jj <= n) 139 if (isvalid_ii.and.isvalid_jj) then 140 is_diag = (ij .eq. ij2) 141 nz = nz + 1 142 ilist(nz) = ij 143 jlist(nz) = ij2 144 if (is_diag) then 145 aij = 4.0 146 else 147 aij = -1.0 148 endif 149 alist(nz) = aij 150 endif 151 enddo 152 enddo 153 enddo 154 enddo 155 156 print*,'nz = ', nz 157 158! --------------------------------- 159! convert from fortan to c indexing 160! --------------------------------- 161 ilist(1:nz) = ilist(1:nz) - 1 162 jlist(1:nz) = jlist(1:nz) - 1 163 164! -------------- 165! setup matrices 166! -------------- 167 call system_clock(t1,count_rate) 168 169!$omp parallel do & 170!$omp& private(ith,icase,ip,i,j,ii,jj,aij,ierr,x,b) & 171!$omp& private(col_f_mat,col_f_vecb,col_f_vecx,col_f_ksp,pc) 172 do ith=1,nthreads 173 col_f_mat => Mcol_f_mat(ith) 174 col_f_vecb => Mcol_f_vecb(ith) 175 col_f_vecx => Mcol_f_vecx(ith) 176 col_f_ksp => Mcol_f_ksp(ith) 177 pc => MPC(ith) 178 179 do icase=ibeg(ith),iend(ith) 180 181 do ip=1,nz 182 ii = ilist(ip) 183 jj = jlist(ip) 184 aij = alist(ip) 185 PetscCallA(MatSetValue(col_f_mat,ii,jj,aij,INSERT_VALUES,ierr)) 186 enddo 187 PetscCallA(MatAssemblyBegin(col_f_mat,MAT_FINAL_ASSEMBLY,ierr)) 188 PetscCallA(MatAssemblyEnd(col_f_mat,MAT_FINAL_ASSEMBLY,ierr)) 189 PetscCallA(KSPSetOperators(col_f_ksp,col_f_mat,col_f_mat,ierr)) 190 191 ! set linear solver 192 PetscCallA(KSPGetPC(col_f_ksp,pc,ierr)) 193 PetscCallA(PCSetType(pc,PCLU,ierr)) 194 195 ! associate petsc vector with allocated array 196 x(:) = 1 197!$omp critical 198 PetscCallA(VecPlaceArray(col_f_vecx,x,ierr)) 199!$omp end critical 200 201 b(:) = 0 202 do ip=1,nz 203 i = ilist(ip) 204 j = jlist(ip) 205 aij = alist(ip) 206 b(i) = b(i) + aij*x(j) 207 enddo 208 x = 0 209!$omp critical 210 PetscCallA(VecPlaceArray(col_f_vecb,b,ierr)) 211!$omp end critical 212 213! ----------------------------------------------------------- 214! key test, need to solve multiple linear systems in parallel 215! ----------------------------------------------------------- 216 PetscCallA(KSPSetFromOptions(col_f_ksp,ierr)) 217 218 PetscCallA(KSPSetUp(col_f_ksp,ierr)) 219 PetscCallA(KSPSolve(col_f_ksp,col_f_vecb,col_f_vecx,ierr)) 220 221! ------------ 222! check answer 223! ------------ 224 err(icase) = maxval(abs(x(:)-1)) 225 226!$omp critical 227 PetscCallA(VecResetArray(col_f_vecx,ierr)) 228!$omp end critical 229 230!$omp critical 231 PetscCallA(VecResetArray(col_f_vecb,ierr)) 232!$omp end critical 233 234 enddo 235 enddo 236 237!$omp parallel do & 238!$omp private(ith,ierr) & 239!$omp private(col_f_mat,col_f_vecb,col_f_vecx,col_f_ksp) 240 do ith=1,nthreads 241 col_f_mat => Mcol_f_mat(ith) 242 col_f_vecb => Mcol_f_vecb(ith) 243 col_f_vecx => Mcol_f_vecx(ith) 244 col_f_ksp => Mcol_f_ksp(ith) 245 246 PetscCallA(MatDestroy(col_f_mat, ierr)) 247 PetscCallA(VecDestroy(col_f_vecb, ierr)) 248 PetscCallA(VecDestroy(col_f_vecx,ierr)) 249 250 PetscCallA(KSPDestroy(col_f_ksp,ierr)) 251 252 enddo 253 254 call system_clock(t2,count_rate) 255 ttime = real(t2-t1)/real(count_rate) 256 write(*,*) 'total time is ',ttime 257 258 write(*,*) 'maxval(err) ', maxval(err) 259 do icase=1,NCASES 260 if (err(icase) > tol) then 261 write(*,*) 'icase,err(icase) ',icase,err(icase) 262 endif 263 enddo 264 265 deallocate(ilist,jlist,alist) 266 deallocate(x,b) 267 PetscCallA(PetscFinalize(ierr)) 268 end program tpetsc 269 270!/*TEST 271! 272! build: 273! requires: double !complex !defined(PETSC_USE_64BIT_INDICES) 274! 275! test: 276! output_file: output/ex61f_1.out 277! TODO: Need to determine how to test OpenMP code 278! 279!TEST*/ 280