1*c4762a1bSJed Brown static char help[] = "Solves -Laplacian u - exp(u) = 0, 0 < x < 1 using GPU\n\n"; 2*c4762a1bSJed Brown /* 3*c4762a1bSJed Brown Same as ex47.c except it also uses the GPU to evaluate the function 4*c4762a1bSJed Brown */ 5*c4762a1bSJed Brown 6*c4762a1bSJed Brown #include <petscdm.h> 7*c4762a1bSJed Brown #include <petscdmda.h> 8*c4762a1bSJed Brown #include <petscsnes.h> 9*c4762a1bSJed Brown 10*c4762a1bSJed Brown #include <thrust/device_ptr.h> 11*c4762a1bSJed Brown #include <thrust/for_each.h> 12*c4762a1bSJed Brown #include <thrust/tuple.h> 13*c4762a1bSJed Brown #include <thrust/iterator/constant_iterator.h> 14*c4762a1bSJed Brown #include <thrust/iterator/counting_iterator.h> 15*c4762a1bSJed Brown #include <thrust/iterator/zip_iterator.h> 16*c4762a1bSJed Brown 17*c4762a1bSJed Brown extern PetscErrorCode ComputeFunction(SNES,Vec,Vec,void*), ComputeJacobian(SNES,Vec,Mat,Mat,void*); 18*c4762a1bSJed Brown PetscBool useCUDA = PETSC_FALSE; 19*c4762a1bSJed Brown 20*c4762a1bSJed Brown int main(int argc,char **argv) 21*c4762a1bSJed Brown { 22*c4762a1bSJed Brown SNES snes; 23*c4762a1bSJed Brown Vec x,f; 24*c4762a1bSJed Brown Mat J; 25*c4762a1bSJed Brown DM da; 26*c4762a1bSJed Brown PetscErrorCode ierr; 27*c4762a1bSJed Brown char *tmp,typeName[256]; 28*c4762a1bSJed Brown PetscBool flg; 29*c4762a1bSJed Brown 30*c4762a1bSJed Brown ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr; 31*c4762a1bSJed Brown ierr = PetscOptionsGetString(NULL,NULL,"-dm_vec_type",typeName,256,&flg);CHKERRQ(ierr); 32*c4762a1bSJed Brown if (flg) { 33*c4762a1bSJed Brown ierr = PetscStrstr(typeName,"cuda",&tmp);CHKERRQ(ierr); 34*c4762a1bSJed Brown if (tmp) useCUDA = PETSC_TRUE; 35*c4762a1bSJed Brown } 36*c4762a1bSJed Brown 37*c4762a1bSJed Brown ierr = DMDACreate1d(PETSC_COMM_WORLD,DM_BOUNDARY_NONE,8,1,1,NULL,&da);CHKERRQ(ierr); 38*c4762a1bSJed Brown ierr = DMSetFromOptions(da);CHKERRQ(ierr); 39*c4762a1bSJed Brown ierr = DMSetUp(da);CHKERRQ(ierr); 40*c4762a1bSJed Brown ierr = DMCreateGlobalVector(da,&x); VecDuplicate(x,&f);CHKERRQ(ierr); 41*c4762a1bSJed Brown ierr = DMSetMatType(da,MATAIJ);CHKERRQ(ierr); 42*c4762a1bSJed Brown ierr = DMCreateMatrix(da,&J);CHKERRQ(ierr); 43*c4762a1bSJed Brown 44*c4762a1bSJed Brown ierr = SNESCreate(PETSC_COMM_WORLD,&snes);CHKERRQ(ierr); 45*c4762a1bSJed Brown ierr = SNESSetFunction(snes,f,ComputeFunction,da);CHKERRQ(ierr); 46*c4762a1bSJed Brown ierr = SNESSetJacobian(snes,J,J,ComputeJacobian,da);CHKERRQ(ierr); 47*c4762a1bSJed Brown ierr = SNESSetFromOptions(snes);CHKERRQ(ierr); 48*c4762a1bSJed Brown ierr = SNESSolve(snes,NULL,x);CHKERRQ(ierr); 49*c4762a1bSJed Brown 50*c4762a1bSJed Brown ierr = MatDestroy(&J);CHKERRQ(ierr); 51*c4762a1bSJed Brown ierr = VecDestroy(&x);CHKERRQ(ierr); 52*c4762a1bSJed Brown ierr = VecDestroy(&f);CHKERRQ(ierr); 53*c4762a1bSJed Brown ierr = SNESDestroy(&snes);CHKERRQ(ierr); 54*c4762a1bSJed Brown ierr = DMDestroy(&da);CHKERRQ(ierr); 55*c4762a1bSJed Brown 56*c4762a1bSJed Brown ierr = PetscFinalize(); 57*c4762a1bSJed Brown return ierr; 58*c4762a1bSJed Brown } 59*c4762a1bSJed Brown 60*c4762a1bSJed Brown struct ApplyStencil 61*c4762a1bSJed Brown { 62*c4762a1bSJed Brown template <typename Tuple> 63*c4762a1bSJed Brown __host__ __device__ 64*c4762a1bSJed Brown void operator()(Tuple t) 65*c4762a1bSJed Brown { 66*c4762a1bSJed Brown /* f = (2*x_i - x_(i+1) - x_(i-1))/h - h*exp(x_i) */ 67*c4762a1bSJed Brown thrust::get<0>(t) = 1; 68*c4762a1bSJed Brown if ((thrust::get<4>(t) > 0) && (thrust::get<4>(t) < thrust::get<5>(t)-1)) { 69*c4762a1bSJed Brown thrust::get<0>(t) = (2.0*thrust::get<1>(t) - thrust::get<2>(t) - thrust::get<3>(t)) / (thrust::get<6>(t)) - (thrust::get<6>(t))*exp(thrust::get<1>(t)); 70*c4762a1bSJed Brown } else if (thrust::get<4>(t) == 0) { 71*c4762a1bSJed Brown thrust::get<0>(t) = thrust::get<1>(t) / (thrust::get<6>(t)); 72*c4762a1bSJed Brown } else if (thrust::get<4>(t) == thrust::get<5>(t)-1) { 73*c4762a1bSJed Brown thrust::get<0>(t) = thrust::get<1>(t) / (thrust::get<6>(t)); 74*c4762a1bSJed Brown } 75*c4762a1bSJed Brown } 76*c4762a1bSJed Brown }; 77*c4762a1bSJed Brown 78*c4762a1bSJed Brown PetscErrorCode ComputeFunction(SNES snes,Vec x,Vec f,void *ctx) 79*c4762a1bSJed Brown { 80*c4762a1bSJed Brown PetscInt i,Mx,xs,xm,xstartshift,xendshift,fstart,lsize; 81*c4762a1bSJed Brown PetscScalar *xx,*ff,hx; 82*c4762a1bSJed Brown DM da = (DM) ctx; 83*c4762a1bSJed Brown Vec xlocal; 84*c4762a1bSJed Brown PetscErrorCode ierr; 85*c4762a1bSJed Brown PetscMPIInt rank,size; 86*c4762a1bSJed Brown MPI_Comm comm; 87*c4762a1bSJed Brown PetscScalar const *xarray; 88*c4762a1bSJed Brown PetscScalar *farray; 89*c4762a1bSJed Brown 90*c4762a1bSJed Brown ierr = DMDAGetInfo(da,PETSC_IGNORE,&Mx,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE);CHKERRQ(ierr); 91*c4762a1bSJed Brown hx = 1.0/(PetscReal)(Mx-1); 92*c4762a1bSJed Brown ierr = DMGetLocalVector(da,&xlocal);CHKERRQ(ierr); 93*c4762a1bSJed Brown ierr = DMGlobalToLocalBegin(da,x,INSERT_VALUES,xlocal);CHKERRQ(ierr); 94*c4762a1bSJed Brown ierr = DMGlobalToLocalEnd(da,x,INSERT_VALUES,xlocal);CHKERRQ(ierr); 95*c4762a1bSJed Brown 96*c4762a1bSJed Brown if (useCUDA) { 97*c4762a1bSJed Brown ierr = VecCUDAGetArrayRead(xlocal,&xarray);CHKERRQ(ierr); 98*c4762a1bSJed Brown ierr = VecCUDAGetArrayWrite(f,&farray);CHKERRQ(ierr); 99*c4762a1bSJed Brown ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 100*c4762a1bSJed Brown ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 101*c4762a1bSJed Brown ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 102*c4762a1bSJed Brown if (rank) xstartshift = 1; 103*c4762a1bSJed Brown else xstartshift = 0; 104*c4762a1bSJed Brown if (rank != size-1) xendshift = 1; 105*c4762a1bSJed Brown else xendshift = 0; 106*c4762a1bSJed Brown ierr = VecGetOwnershipRange(f,&fstart,NULL);CHKERRQ(ierr); 107*c4762a1bSJed Brown ierr = VecGetLocalSize(x,&lsize);CHKERRQ(ierr); 108*c4762a1bSJed Brown try { 109*c4762a1bSJed Brown thrust::for_each( 110*c4762a1bSJed Brown thrust::make_zip_iterator( 111*c4762a1bSJed Brown thrust::make_tuple( 112*c4762a1bSJed Brown thrust::device_ptr<PetscScalar>(farray), 113*c4762a1bSJed Brown thrust::device_ptr<const PetscScalar>(xarray + xstartshift), 114*c4762a1bSJed Brown thrust::device_ptr<const PetscScalar>(xarray + xstartshift + 1), 115*c4762a1bSJed Brown thrust::device_ptr<const PetscScalar>(xarray + xstartshift - 1), 116*c4762a1bSJed Brown thrust::counting_iterator<int>(fstart), 117*c4762a1bSJed Brown thrust::constant_iterator<int>(Mx), 118*c4762a1bSJed Brown thrust::constant_iterator<PetscScalar>(hx))), 119*c4762a1bSJed Brown thrust::make_zip_iterator( 120*c4762a1bSJed Brown thrust::make_tuple( 121*c4762a1bSJed Brown thrust::device_ptr<PetscScalar>(farray + lsize), 122*c4762a1bSJed Brown thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift), 123*c4762a1bSJed Brown thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift + 1), 124*c4762a1bSJed Brown thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift - 1), 125*c4762a1bSJed Brown thrust::counting_iterator<int>(fstart) + lsize, 126*c4762a1bSJed Brown thrust::constant_iterator<int>(Mx), 127*c4762a1bSJed Brown thrust::constant_iterator<PetscScalar>(hx))), 128*c4762a1bSJed Brown ApplyStencil()); 129*c4762a1bSJed Brown } 130*c4762a1bSJed Brown catch (char *all) { 131*c4762a1bSJed Brown ierr = PetscPrintf(PETSC_COMM_WORLD, "Thrust is not working\n");CHKERRQ(ierr); 132*c4762a1bSJed Brown } 133*c4762a1bSJed Brown ierr = VecCUDARestoreArrayRead(xlocal,&xarray);CHKERRQ(ierr); 134*c4762a1bSJed Brown ierr = VecCUDARestoreArrayWrite(f,&farray);CHKERRQ(ierr); 135*c4762a1bSJed Brown } else { 136*c4762a1bSJed Brown ierr = DMDAVecGetArray(da,xlocal,&xx);CHKERRQ(ierr); 137*c4762a1bSJed Brown ierr = DMDAVecGetArray(da,f,&ff);CHKERRQ(ierr); 138*c4762a1bSJed Brown ierr = DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL);CHKERRQ(ierr); 139*c4762a1bSJed Brown 140*c4762a1bSJed Brown for (i=xs; i<xs+xm; i++) { 141*c4762a1bSJed Brown if (i == 0 || i == Mx-1) ff[i] = xx[i]/hx; 142*c4762a1bSJed Brown else ff[i] = (2.0*xx[i] - xx[i-1] - xx[i+1])/hx - hx*PetscExpScalar(xx[i]); 143*c4762a1bSJed Brown } 144*c4762a1bSJed Brown ierr = DMDAVecRestoreArray(da,xlocal,&xx);CHKERRQ(ierr); 145*c4762a1bSJed Brown ierr = DMDAVecRestoreArray(da,f,&ff);CHKERRQ(ierr); 146*c4762a1bSJed Brown } 147*c4762a1bSJed Brown ierr = DMRestoreLocalVector(da,&xlocal);CHKERRQ(ierr); 148*c4762a1bSJed Brown // VecView(x,0);printf("f\n"); 149*c4762a1bSJed Brown // VecView(f,0); 150*c4762a1bSJed Brown return 0; 151*c4762a1bSJed Brown 152*c4762a1bSJed Brown } 153*c4762a1bSJed Brown PetscErrorCode ComputeJacobian(SNES snes,Vec x,Mat J,Mat B,void *ctx) 154*c4762a1bSJed Brown { 155*c4762a1bSJed Brown DM da = (DM) ctx; 156*c4762a1bSJed Brown PetscInt i,Mx,xm,xs; 157*c4762a1bSJed Brown PetscScalar hx,*xx; 158*c4762a1bSJed Brown Vec xlocal; 159*c4762a1bSJed Brown PetscErrorCode ierr; 160*c4762a1bSJed Brown 161*c4762a1bSJed Brown ierr = DMDAGetInfo(da,PETSC_IGNORE,&Mx,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE);CHKERRQ(ierr); 162*c4762a1bSJed Brown hx = 1.0/(PetscReal)(Mx-1); 163*c4762a1bSJed Brown ierr = DMGetLocalVector(da,&xlocal);DMGlobalToLocalBegin(da,x,INSERT_VALUES,xlocal);CHKERRQ(ierr); 164*c4762a1bSJed Brown ierr = DMGlobalToLocalEnd(da,x,INSERT_VALUES,xlocal);CHKERRQ(ierr); 165*c4762a1bSJed Brown ierr = DMDAVecGetArray(da,xlocal,&xx);CHKERRQ(ierr); 166*c4762a1bSJed Brown ierr = DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL);CHKERRQ(ierr); 167*c4762a1bSJed Brown 168*c4762a1bSJed Brown for (i=xs; i<xs+xm; i++) { 169*c4762a1bSJed Brown if (i == 0 || i == Mx-1) { 170*c4762a1bSJed Brown ierr = MatSetValue(J,i,i,1.0/hx,INSERT_VALUES);CHKERRQ(ierr); 171*c4762a1bSJed Brown } else { 172*c4762a1bSJed Brown ierr = MatSetValue(J,i,i-1,-1.0/hx,INSERT_VALUES);CHKERRQ(ierr); 173*c4762a1bSJed Brown ierr = MatSetValue(J,i,i,2.0/hx - hx*PetscExpScalar(xx[i]),INSERT_VALUES);CHKERRQ(ierr); 174*c4762a1bSJed Brown ierr = MatSetValue(J,i,i+1,-1.0/hx,INSERT_VALUES);CHKERRQ(ierr); 175*c4762a1bSJed Brown } 176*c4762a1bSJed Brown } 177*c4762a1bSJed Brown ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 178*c4762a1bSJed Brown ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 179*c4762a1bSJed Brown ierr = DMDAVecRestoreArray(da,xlocal,&xx);CHKERRQ(ierr); 180*c4762a1bSJed Brown ierr = DMRestoreLocalVector(da,&xlocal);CHKERRQ(ierr); 181*c4762a1bSJed Brown return 0; 182*c4762a1bSJed Brown } 183*c4762a1bSJed Brown 184*c4762a1bSJed Brown 185*c4762a1bSJed Brown 186*c4762a1bSJed Brown /*TEST 187*c4762a1bSJed Brown 188*c4762a1bSJed Brown build: 189*c4762a1bSJed Brown requires: cuda 190*c4762a1bSJed Brown 191*c4762a1bSJed Brown test: 192*c4762a1bSJed Brown args: -snes_monitor_short -dm_vec_type cuda 193*c4762a1bSJed Brown 194*c4762a1bSJed Brown TEST*/ 195