1c4762a1bSJed Brown static char help[] = "Solves -Laplacian u - exp(u) = 0, 0 < x < 1 using GPU\n\n"; 2c4762a1bSJed Brown /* 3c4762a1bSJed Brown Same as ex47.c except it also uses the GPU to evaluate the function 4c4762a1bSJed Brown */ 5c4762a1bSJed Brown 6c4762a1bSJed Brown #include <petscdm.h> 7c4762a1bSJed Brown #include <petscdmda.h> 8c4762a1bSJed Brown #include <petscsnes.h> 9c4762a1bSJed Brown 10c4762a1bSJed Brown #include <thrust/device_ptr.h> 11c4762a1bSJed Brown #include <thrust/for_each.h> 12c4762a1bSJed Brown #include <thrust/tuple.h> 13c4762a1bSJed Brown #include <thrust/iterator/constant_iterator.h> 14c4762a1bSJed Brown #include <thrust/iterator/counting_iterator.h> 15c4762a1bSJed Brown #include <thrust/iterator/zip_iterator.h> 16c4762a1bSJed Brown 17c4762a1bSJed Brown extern PetscErrorCode ComputeFunction(SNES,Vec,Vec,void*), ComputeJacobian(SNES,Vec,Mat,Mat,void*); 18c4762a1bSJed Brown PetscBool useCUDA = PETSC_FALSE; 19c4762a1bSJed Brown 20c4762a1bSJed Brown int main(int argc,char **argv) 21c4762a1bSJed Brown { 22c4762a1bSJed Brown SNES snes; 23c4762a1bSJed Brown Vec x,f; 24c4762a1bSJed Brown Mat J; 25c4762a1bSJed Brown DM da; 26c4762a1bSJed Brown char *tmp,typeName[256]; 27c4762a1bSJed Brown PetscBool flg; 28c4762a1bSJed Brown 29*b122ec5aSJacob Faibussowitsch CHKERRQ(PetscInitialize(&argc,&argv,(char*)0,help)); 305f80ce2aSJacob Faibussowitsch CHKERRQ(PetscOptionsGetString(NULL,NULL,"-dm_vec_type",typeName,sizeof(typeName),&flg)); 31c4762a1bSJed Brown if (flg) { 325f80ce2aSJacob Faibussowitsch CHKERRQ(PetscStrstr(typeName,"cuda",&tmp)); 33c4762a1bSJed Brown if (tmp) useCUDA = PETSC_TRUE; 34c4762a1bSJed Brown } 35c4762a1bSJed Brown 365f80ce2aSJacob Faibussowitsch CHKERRQ(DMDACreate1d(PETSC_COMM_WORLD,DM_BOUNDARY_NONE,8,1,1,NULL,&da)); 375f80ce2aSJacob Faibussowitsch CHKERRQ(DMSetFromOptions(da)); 385f80ce2aSJacob Faibussowitsch CHKERRQ(DMSetUp(da)); 395f80ce2aSJacob Faibussowitsch CHKERRQ(DMCreateGlobalVector(da,&x); VecDuplicate(x,&f)); 405f80ce2aSJacob Faibussowitsch CHKERRQ(DMCreateMatrix(da,&J)); 41c4762a1bSJed Brown 425f80ce2aSJacob Faibussowitsch CHKERRQ(SNESCreate(PETSC_COMM_WORLD,&snes)); 435f80ce2aSJacob Faibussowitsch CHKERRQ(SNESSetFunction(snes,f,ComputeFunction,da)); 445f80ce2aSJacob Faibussowitsch CHKERRQ(SNESSetJacobian(snes,J,J,ComputeJacobian,da)); 455f80ce2aSJacob Faibussowitsch CHKERRQ(SNESSetFromOptions(snes)); 465f80ce2aSJacob Faibussowitsch CHKERRQ(SNESSolve(snes,NULL,x)); 47c4762a1bSJed Brown 485f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&J)); 495f80ce2aSJacob Faibussowitsch CHKERRQ(VecDestroy(&x)); 505f80ce2aSJacob Faibussowitsch CHKERRQ(VecDestroy(&f)); 515f80ce2aSJacob Faibussowitsch CHKERRQ(SNESDestroy(&snes)); 525f80ce2aSJacob Faibussowitsch CHKERRQ(DMDestroy(&da)); 53c4762a1bSJed Brown 54*b122ec5aSJacob Faibussowitsch CHKERRQ(PetscFinalize()); 55*b122ec5aSJacob Faibussowitsch return 0; 56c4762a1bSJed Brown } 57c4762a1bSJed Brown 58c4762a1bSJed Brown struct ApplyStencil 59c4762a1bSJed Brown { 60c4762a1bSJed Brown template <typename Tuple> 61c4762a1bSJed Brown __host__ __device__ 62c4762a1bSJed Brown void operator()(Tuple t) 63c4762a1bSJed Brown { 64c4762a1bSJed Brown /* f = (2*x_i - x_(i+1) - x_(i-1))/h - h*exp(x_i) */ 65c4762a1bSJed Brown thrust::get<0>(t) = 1; 66c4762a1bSJed Brown if ((thrust::get<4>(t) > 0) && (thrust::get<4>(t) < thrust::get<5>(t)-1)) { 6788e3df94SJunchao Zhang thrust::get<0>(t) = (((PetscScalar)2.0)*thrust::get<1>(t) - thrust::get<2>(t) - thrust::get<3>(t)) / (thrust::get<6>(t)) - (thrust::get<6>(t))*exp(thrust::get<1>(t)); 68c4762a1bSJed Brown } else if (thrust::get<4>(t) == 0) { 69c4762a1bSJed Brown thrust::get<0>(t) = thrust::get<1>(t) / (thrust::get<6>(t)); 70c4762a1bSJed Brown } else if (thrust::get<4>(t) == thrust::get<5>(t)-1) { 71c4762a1bSJed Brown thrust::get<0>(t) = thrust::get<1>(t) / (thrust::get<6>(t)); 72c4762a1bSJed Brown } 73c4762a1bSJed Brown } 74c4762a1bSJed Brown }; 75c4762a1bSJed Brown 76c4762a1bSJed Brown PetscErrorCode ComputeFunction(SNES snes,Vec x,Vec f,void *ctx) 77c4762a1bSJed Brown { 78c4762a1bSJed Brown PetscInt i,Mx,xs,xm,xstartshift,xendshift,fstart,lsize; 79c4762a1bSJed Brown PetscScalar *xx,*ff,hx; 80c4762a1bSJed Brown DM da = (DM) ctx; 81c4762a1bSJed Brown Vec xlocal; 82c4762a1bSJed Brown PetscMPIInt rank,size; 83c4762a1bSJed Brown MPI_Comm comm; 84c4762a1bSJed Brown PetscScalar const *xarray; 85c4762a1bSJed Brown PetscScalar *farray; 86c4762a1bSJed Brown 875f80ce2aSJacob Faibussowitsch CHKERRQ(DMDAGetInfo(da,PETSC_IGNORE,&Mx,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE)); 88c4762a1bSJed Brown hx = 1.0/(PetscReal)(Mx-1); 895f80ce2aSJacob Faibussowitsch CHKERRQ(DMGetLocalVector(da,&xlocal)); 905f80ce2aSJacob Faibussowitsch CHKERRQ(DMGlobalToLocalBegin(da,x,INSERT_VALUES,xlocal)); 915f80ce2aSJacob Faibussowitsch CHKERRQ(DMGlobalToLocalEnd(da,x,INSERT_VALUES,xlocal)); 92c4762a1bSJed Brown 93c4762a1bSJed Brown if (useCUDA) { 945f80ce2aSJacob Faibussowitsch CHKERRQ(VecCUDAGetArrayRead(xlocal,&xarray)); 955f80ce2aSJacob Faibussowitsch CHKERRQ(VecCUDAGetArrayWrite(f,&farray)); 965f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectGetComm((PetscObject)da,&comm)); 975f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_size(comm,&size)); 985f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_rank(comm,&rank)); 99c4762a1bSJed Brown if (rank) xstartshift = 1; 100c4762a1bSJed Brown else xstartshift = 0; 101c4762a1bSJed Brown if (rank != size-1) xendshift = 1; 102c4762a1bSJed Brown else xendshift = 0; 1035f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetOwnershipRange(f,&fstart,NULL)); 1045f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetLocalSize(x,&lsize)); 105c4762a1bSJed Brown try { 106c4762a1bSJed Brown thrust::for_each( 107c4762a1bSJed Brown thrust::make_zip_iterator( 108c4762a1bSJed Brown thrust::make_tuple( 109c4762a1bSJed Brown thrust::device_ptr<PetscScalar>(farray), 110c4762a1bSJed Brown thrust::device_ptr<const PetscScalar>(xarray + xstartshift), 111c4762a1bSJed Brown thrust::device_ptr<const PetscScalar>(xarray + xstartshift + 1), 112c4762a1bSJed Brown thrust::device_ptr<const PetscScalar>(xarray + xstartshift - 1), 113c4762a1bSJed Brown thrust::counting_iterator<int>(fstart), 114c4762a1bSJed Brown thrust::constant_iterator<int>(Mx), 115c4762a1bSJed Brown thrust::constant_iterator<PetscScalar>(hx))), 116c4762a1bSJed Brown thrust::make_zip_iterator( 117c4762a1bSJed Brown thrust::make_tuple( 118c4762a1bSJed Brown thrust::device_ptr<PetscScalar>(farray + lsize), 119c4762a1bSJed Brown thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift), 120c4762a1bSJed Brown thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift + 1), 121c4762a1bSJed Brown thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift - 1), 122c4762a1bSJed Brown thrust::counting_iterator<int>(fstart) + lsize, 123c4762a1bSJed Brown thrust::constant_iterator<int>(Mx), 124c4762a1bSJed Brown thrust::constant_iterator<PetscScalar>(hx))), 125c4762a1bSJed Brown ApplyStencil()); 126c4762a1bSJed Brown } 127c4762a1bSJed Brown catch (char *all) { 1285f80ce2aSJacob Faibussowitsch CHKERRQ(PetscPrintf(PETSC_COMM_WORLD, "Thrust is not working\n")); 129c4762a1bSJed Brown } 1305f80ce2aSJacob Faibussowitsch CHKERRQ(VecCUDARestoreArrayRead(xlocal,&xarray)); 1315f80ce2aSJacob Faibussowitsch CHKERRQ(VecCUDARestoreArrayWrite(f,&farray)); 132c4762a1bSJed Brown } else { 1335f80ce2aSJacob Faibussowitsch CHKERRQ(DMDAVecGetArray(da,xlocal,&xx)); 1345f80ce2aSJacob Faibussowitsch CHKERRQ(DMDAVecGetArray(da,f,&ff)); 1355f80ce2aSJacob Faibussowitsch CHKERRQ(DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL)); 136c4762a1bSJed Brown 137c4762a1bSJed Brown for (i=xs; i<xs+xm; i++) { 138c4762a1bSJed Brown if (i == 0 || i == Mx-1) ff[i] = xx[i]/hx; 139c4762a1bSJed Brown else ff[i] = (2.0*xx[i] - xx[i-1] - xx[i+1])/hx - hx*PetscExpScalar(xx[i]); 140c4762a1bSJed Brown } 1415f80ce2aSJacob Faibussowitsch CHKERRQ(DMDAVecRestoreArray(da,xlocal,&xx)); 1425f80ce2aSJacob Faibussowitsch CHKERRQ(DMDAVecRestoreArray(da,f,&ff)); 143c4762a1bSJed Brown } 1445f80ce2aSJacob Faibussowitsch CHKERRQ(DMRestoreLocalVector(da,&xlocal)); 145c4762a1bSJed Brown return 0; 146c4762a1bSJed Brown 147c4762a1bSJed Brown } 148c4762a1bSJed Brown PetscErrorCode ComputeJacobian(SNES snes,Vec x,Mat J,Mat B,void *ctx) 149c4762a1bSJed Brown { 150c4762a1bSJed Brown DM da = (DM) ctx; 151c4762a1bSJed Brown PetscInt i,Mx,xm,xs; 152c4762a1bSJed Brown PetscScalar hx,*xx; 153c4762a1bSJed Brown Vec xlocal; 154c4762a1bSJed Brown 1555f80ce2aSJacob Faibussowitsch CHKERRQ(DMDAGetInfo(da,PETSC_IGNORE,&Mx,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE)); 156c4762a1bSJed Brown hx = 1.0/(PetscReal)(Mx-1); 1575f80ce2aSJacob Faibussowitsch CHKERRQ(DMGetLocalVector(da,&xlocal)); 1585f80ce2aSJacob Faibussowitsch CHKERRQ(DMGlobalToLocalBegin(da,x,INSERT_VALUES,xlocal)); 1595f80ce2aSJacob Faibussowitsch CHKERRQ(DMGlobalToLocalEnd(da,x,INSERT_VALUES,xlocal)); 1605f80ce2aSJacob Faibussowitsch CHKERRQ(DMDAVecGetArray(da,xlocal,&xx)); 1615f80ce2aSJacob Faibussowitsch CHKERRQ(DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL)); 162c4762a1bSJed Brown 163c4762a1bSJed Brown for (i=xs; i<xs+xm; i++) { 164c4762a1bSJed Brown if (i == 0 || i == Mx-1) { 1655f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetValue(J,i,i,1.0/hx,INSERT_VALUES)); 166c4762a1bSJed Brown } else { 1675f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetValue(J,i,i-1,-1.0/hx,INSERT_VALUES)); 1685f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetValue(J,i,i,2.0/hx - hx*PetscExpScalar(xx[i]),INSERT_VALUES)); 1695f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetValue(J,i,i+1,-1.0/hx,INSERT_VALUES)); 170c4762a1bSJed Brown } 171c4762a1bSJed Brown } 1725f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 1735f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 1745f80ce2aSJacob Faibussowitsch CHKERRQ(DMDAVecRestoreArray(da,xlocal,&xx)); 1755f80ce2aSJacob Faibussowitsch CHKERRQ(DMRestoreLocalVector(da,&xlocal)); 176c4762a1bSJed Brown return 0; 177c4762a1bSJed Brown } 178c4762a1bSJed Brown 179c4762a1bSJed Brown /*TEST 180c4762a1bSJed Brown 181c4762a1bSJed Brown build: 182c4762a1bSJed Brown requires: cuda 183c4762a1bSJed Brown 184499ce956SJunchao Zhang testset: 185499ce956SJunchao Zhang args: -snes_monitor_short -dm_mat_type aijcusparse -dm_vec_type cuda 186499ce956SJunchao Zhang output_file: output/ex47cu_1.out 187c4762a1bSJed Brown test: 188499ce956SJunchao Zhang suffix: 1 189499ce956SJunchao Zhang nsize: 1 190499ce956SJunchao Zhang test: 191499ce956SJunchao Zhang suffix: 2 192499ce956SJunchao Zhang nsize: 2 193c4762a1bSJed Brown 194c4762a1bSJed Brown TEST*/ 195