xref: /petsc/src/snes/tutorials/ex47cu.cu (revision 5f80ce2ab25dff0f4601e710601cbbcecf323266)
1c4762a1bSJed Brown static char help[] = "Solves -Laplacian u - exp(u) = 0,  0 < x < 1 using GPU\n\n";
2c4762a1bSJed Brown /*
3c4762a1bSJed Brown    Same as ex47.c except it also uses the GPU to evaluate the function
4c4762a1bSJed Brown */
5c4762a1bSJed Brown 
6c4762a1bSJed Brown #include <petscdm.h>
7c4762a1bSJed Brown #include <petscdmda.h>
8c4762a1bSJed Brown #include <petscsnes.h>
9c4762a1bSJed Brown 
10c4762a1bSJed Brown #include <thrust/device_ptr.h>
11c4762a1bSJed Brown #include <thrust/for_each.h>
12c4762a1bSJed Brown #include <thrust/tuple.h>
13c4762a1bSJed Brown #include <thrust/iterator/constant_iterator.h>
14c4762a1bSJed Brown #include <thrust/iterator/counting_iterator.h>
15c4762a1bSJed Brown #include <thrust/iterator/zip_iterator.h>
16c4762a1bSJed Brown 
17c4762a1bSJed Brown extern PetscErrorCode ComputeFunction(SNES,Vec,Vec,void*), ComputeJacobian(SNES,Vec,Mat,Mat,void*);
18c4762a1bSJed Brown PetscBool useCUDA = PETSC_FALSE;
19c4762a1bSJed Brown 
20c4762a1bSJed Brown int main(int argc,char **argv)
21c4762a1bSJed Brown {
22c4762a1bSJed Brown   SNES           snes;
23c4762a1bSJed Brown   Vec            x,f;
24c4762a1bSJed Brown   Mat            J;
25c4762a1bSJed Brown   DM             da;
26c4762a1bSJed Brown   char           *tmp,typeName[256];
27c4762a1bSJed Brown   PetscBool      flg;
28c4762a1bSJed Brown 
29c4762a1bSJed Brown   ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr;
30*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscOptionsGetString(NULL,NULL,"-dm_vec_type",typeName,sizeof(typeName),&flg));
31c4762a1bSJed Brown   if (flg) {
32*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscStrstr(typeName,"cuda",&tmp));
33c4762a1bSJed Brown     if (tmp) useCUDA = PETSC_TRUE;
34c4762a1bSJed Brown   }
35c4762a1bSJed Brown 
36*5f80ce2aSJacob Faibussowitsch   CHKERRQ(DMDACreate1d(PETSC_COMM_WORLD,DM_BOUNDARY_NONE,8,1,1,NULL,&da));
37*5f80ce2aSJacob Faibussowitsch   CHKERRQ(DMSetFromOptions(da));
38*5f80ce2aSJacob Faibussowitsch   CHKERRQ(DMSetUp(da));
39*5f80ce2aSJacob Faibussowitsch   CHKERRQ(DMCreateGlobalVector(da,&x); VecDuplicate(x,&f));
40*5f80ce2aSJacob Faibussowitsch   CHKERRQ(DMCreateMatrix(da,&J));
41c4762a1bSJed Brown 
42*5f80ce2aSJacob Faibussowitsch   CHKERRQ(SNESCreate(PETSC_COMM_WORLD,&snes));
43*5f80ce2aSJacob Faibussowitsch   CHKERRQ(SNESSetFunction(snes,f,ComputeFunction,da));
44*5f80ce2aSJacob Faibussowitsch   CHKERRQ(SNESSetJacobian(snes,J,J,ComputeJacobian,da));
45*5f80ce2aSJacob Faibussowitsch   CHKERRQ(SNESSetFromOptions(snes));
46*5f80ce2aSJacob Faibussowitsch   CHKERRQ(SNESSolve(snes,NULL,x));
47c4762a1bSJed Brown 
48*5f80ce2aSJacob Faibussowitsch   CHKERRQ(MatDestroy(&J));
49*5f80ce2aSJacob Faibussowitsch   CHKERRQ(VecDestroy(&x));
50*5f80ce2aSJacob Faibussowitsch   CHKERRQ(VecDestroy(&f));
51*5f80ce2aSJacob Faibussowitsch   CHKERRQ(SNESDestroy(&snes));
52*5f80ce2aSJacob Faibussowitsch   CHKERRQ(DMDestroy(&da));
53c4762a1bSJed Brown 
54c4762a1bSJed Brown   ierr = PetscFinalize();
55c4762a1bSJed Brown   return ierr;
56c4762a1bSJed Brown }
57c4762a1bSJed Brown 
58c4762a1bSJed Brown struct ApplyStencil
59c4762a1bSJed Brown {
60c4762a1bSJed Brown   template <typename Tuple>
61c4762a1bSJed Brown   __host__ __device__
62c4762a1bSJed Brown   void operator()(Tuple t)
63c4762a1bSJed Brown   {
64c4762a1bSJed Brown     /* f = (2*x_i - x_(i+1) - x_(i-1))/h - h*exp(x_i) */
65c4762a1bSJed Brown     thrust::get<0>(t) = 1;
66c4762a1bSJed Brown     if ((thrust::get<4>(t) > 0) && (thrust::get<4>(t) < thrust::get<5>(t)-1)) {
6788e3df94SJunchao Zhang       thrust::get<0>(t) = (((PetscScalar)2.0)*thrust::get<1>(t) - thrust::get<2>(t) - thrust::get<3>(t)) / (thrust::get<6>(t)) - (thrust::get<6>(t))*exp(thrust::get<1>(t));
68c4762a1bSJed Brown     } else if (thrust::get<4>(t) == 0) {
69c4762a1bSJed Brown       thrust::get<0>(t) = thrust::get<1>(t) / (thrust::get<6>(t));
70c4762a1bSJed Brown     } else if (thrust::get<4>(t) == thrust::get<5>(t)-1) {
71c4762a1bSJed Brown       thrust::get<0>(t) = thrust::get<1>(t) / (thrust::get<6>(t));
72c4762a1bSJed Brown     }
73c4762a1bSJed Brown   }
74c4762a1bSJed Brown };
75c4762a1bSJed Brown 
76c4762a1bSJed Brown PetscErrorCode ComputeFunction(SNES snes,Vec x,Vec f,void *ctx)
77c4762a1bSJed Brown {
78c4762a1bSJed Brown   PetscInt          i,Mx,xs,xm,xstartshift,xendshift,fstart,lsize;
79c4762a1bSJed Brown   PetscScalar       *xx,*ff,hx;
80c4762a1bSJed Brown   DM                da = (DM) ctx;
81c4762a1bSJed Brown   Vec               xlocal;
82c4762a1bSJed Brown   PetscMPIInt       rank,size;
83c4762a1bSJed Brown   MPI_Comm          comm;
84c4762a1bSJed Brown   PetscScalar const *xarray;
85c4762a1bSJed Brown   PetscScalar       *farray;
86c4762a1bSJed Brown 
87*5f80ce2aSJacob Faibussowitsch   CHKERRQ(DMDAGetInfo(da,PETSC_IGNORE,&Mx,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE));
88c4762a1bSJed Brown   hx   = 1.0/(PetscReal)(Mx-1);
89*5f80ce2aSJacob Faibussowitsch   CHKERRQ(DMGetLocalVector(da,&xlocal));
90*5f80ce2aSJacob Faibussowitsch   CHKERRQ(DMGlobalToLocalBegin(da,x,INSERT_VALUES,xlocal));
91*5f80ce2aSJacob Faibussowitsch   CHKERRQ(DMGlobalToLocalEnd(da,x,INSERT_VALUES,xlocal));
92c4762a1bSJed Brown 
93c4762a1bSJed Brown   if (useCUDA) {
94*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecCUDAGetArrayRead(xlocal,&xarray));
95*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecCUDAGetArrayWrite(f,&farray));
96*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscObjectGetComm((PetscObject)da,&comm));
97*5f80ce2aSJacob Faibussowitsch     CHKERRMPI(MPI_Comm_size(comm,&size));
98*5f80ce2aSJacob Faibussowitsch     CHKERRMPI(MPI_Comm_rank(comm,&rank));
99c4762a1bSJed Brown     if (rank) xstartshift = 1;
100c4762a1bSJed Brown     else xstartshift = 0;
101c4762a1bSJed Brown     if (rank != size-1) xendshift = 1;
102c4762a1bSJed Brown     else xendshift = 0;
103*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecGetOwnershipRange(f,&fstart,NULL));
104*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecGetLocalSize(x,&lsize));
105c4762a1bSJed Brown     try {
106c4762a1bSJed Brown       thrust::for_each(
107c4762a1bSJed Brown         thrust::make_zip_iterator(
108c4762a1bSJed Brown           thrust::make_tuple(
109c4762a1bSJed Brown             thrust::device_ptr<PetscScalar>(farray),
110c4762a1bSJed Brown             thrust::device_ptr<const PetscScalar>(xarray + xstartshift),
111c4762a1bSJed Brown             thrust::device_ptr<const PetscScalar>(xarray + xstartshift + 1),
112c4762a1bSJed Brown             thrust::device_ptr<const PetscScalar>(xarray + xstartshift - 1),
113c4762a1bSJed Brown             thrust::counting_iterator<int>(fstart),
114c4762a1bSJed Brown             thrust::constant_iterator<int>(Mx),
115c4762a1bSJed Brown             thrust::constant_iterator<PetscScalar>(hx))),
116c4762a1bSJed Brown         thrust::make_zip_iterator(
117c4762a1bSJed Brown           thrust::make_tuple(
118c4762a1bSJed Brown             thrust::device_ptr<PetscScalar>(farray + lsize),
119c4762a1bSJed Brown             thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift),
120c4762a1bSJed Brown             thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift + 1),
121c4762a1bSJed Brown             thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift - 1),
122c4762a1bSJed Brown             thrust::counting_iterator<int>(fstart) + lsize,
123c4762a1bSJed Brown             thrust::constant_iterator<int>(Mx),
124c4762a1bSJed Brown             thrust::constant_iterator<PetscScalar>(hx))),
125c4762a1bSJed Brown         ApplyStencil());
126c4762a1bSJed Brown     }
127c4762a1bSJed Brown     catch (char *all) {
128*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscPrintf(PETSC_COMM_WORLD, "Thrust is not working\n"));
129c4762a1bSJed Brown     }
130*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecCUDARestoreArrayRead(xlocal,&xarray));
131*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecCUDARestoreArrayWrite(f,&farray));
132c4762a1bSJed Brown   } else {
133*5f80ce2aSJacob Faibussowitsch     CHKERRQ(DMDAVecGetArray(da,xlocal,&xx));
134*5f80ce2aSJacob Faibussowitsch     CHKERRQ(DMDAVecGetArray(da,f,&ff));
135*5f80ce2aSJacob Faibussowitsch     CHKERRQ(DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL));
136c4762a1bSJed Brown 
137c4762a1bSJed Brown     for (i=xs; i<xs+xm; i++) {
138c4762a1bSJed Brown       if (i == 0 || i == Mx-1) ff[i] = xx[i]/hx;
139c4762a1bSJed Brown       else ff[i] =  (2.0*xx[i] - xx[i-1] - xx[i+1])/hx - hx*PetscExpScalar(xx[i]);
140c4762a1bSJed Brown     }
141*5f80ce2aSJacob Faibussowitsch     CHKERRQ(DMDAVecRestoreArray(da,xlocal,&xx));
142*5f80ce2aSJacob Faibussowitsch     CHKERRQ(DMDAVecRestoreArray(da,f,&ff));
143c4762a1bSJed Brown   }
144*5f80ce2aSJacob Faibussowitsch   CHKERRQ(DMRestoreLocalVector(da,&xlocal));
145c4762a1bSJed Brown   return 0;
146c4762a1bSJed Brown 
147c4762a1bSJed Brown }
148c4762a1bSJed Brown PetscErrorCode ComputeJacobian(SNES snes,Vec x,Mat J,Mat B,void *ctx)
149c4762a1bSJed Brown {
150c4762a1bSJed Brown   DM             da = (DM) ctx;
151c4762a1bSJed Brown   PetscInt       i,Mx,xm,xs;
152c4762a1bSJed Brown   PetscScalar    hx,*xx;
153c4762a1bSJed Brown   Vec            xlocal;
154c4762a1bSJed Brown 
155*5f80ce2aSJacob Faibussowitsch   CHKERRQ(DMDAGetInfo(da,PETSC_IGNORE,&Mx,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE));
156c4762a1bSJed Brown   hx   = 1.0/(PetscReal)(Mx-1);
157*5f80ce2aSJacob Faibussowitsch   CHKERRQ(DMGetLocalVector(da,&xlocal));
158*5f80ce2aSJacob Faibussowitsch   CHKERRQ(DMGlobalToLocalBegin(da,x,INSERT_VALUES,xlocal));
159*5f80ce2aSJacob Faibussowitsch   CHKERRQ(DMGlobalToLocalEnd(da,x,INSERT_VALUES,xlocal));
160*5f80ce2aSJacob Faibussowitsch   CHKERRQ(DMDAVecGetArray(da,xlocal,&xx));
161*5f80ce2aSJacob Faibussowitsch   CHKERRQ(DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL));
162c4762a1bSJed Brown 
163c4762a1bSJed Brown   for (i=xs; i<xs+xm; i++) {
164c4762a1bSJed Brown     if (i == 0 || i == Mx-1) {
165*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatSetValue(J,i,i,1.0/hx,INSERT_VALUES));
166c4762a1bSJed Brown     } else {
167*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatSetValue(J,i,i-1,-1.0/hx,INSERT_VALUES));
168*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatSetValue(J,i,i,2.0/hx - hx*PetscExpScalar(xx[i]),INSERT_VALUES));
169*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatSetValue(J,i,i+1,-1.0/hx,INSERT_VALUES));
170c4762a1bSJed Brown     }
171c4762a1bSJed Brown   }
172*5f80ce2aSJacob Faibussowitsch   CHKERRQ(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
173*5f80ce2aSJacob Faibussowitsch   CHKERRQ(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
174*5f80ce2aSJacob Faibussowitsch   CHKERRQ(DMDAVecRestoreArray(da,xlocal,&xx));
175*5f80ce2aSJacob Faibussowitsch   CHKERRQ(DMRestoreLocalVector(da,&xlocal));
176c4762a1bSJed Brown   return 0;
177c4762a1bSJed Brown }
178c4762a1bSJed Brown 
179c4762a1bSJed Brown /*TEST
180c4762a1bSJed Brown 
181c4762a1bSJed Brown    build:
182c4762a1bSJed Brown       requires: cuda
183c4762a1bSJed Brown 
184499ce956SJunchao Zhang    testset:
185499ce956SJunchao Zhang       args: -snes_monitor_short -dm_mat_type aijcusparse -dm_vec_type cuda
186499ce956SJunchao Zhang       output_file: output/ex47cu_1.out
187c4762a1bSJed Brown       test:
188499ce956SJunchao Zhang         suffix: 1
189499ce956SJunchao Zhang         nsize:  1
190499ce956SJunchao Zhang       test:
191499ce956SJunchao Zhang         suffix: 2
192499ce956SJunchao Zhang         nsize:  2
193c4762a1bSJed Brown 
194c4762a1bSJed Brown TEST*/
195