xref: /petsc/src/snes/tutorials/ex47cu.cu (revision c4762a1b19cd2af06abeed90e8f9d34fb975dd94)
1*c4762a1bSJed Brown static char help[] = "Solves -Laplacian u - exp(u) = 0,  0 < x < 1 using GPU\n\n";
2*c4762a1bSJed Brown /*
3*c4762a1bSJed Brown    Same as ex47.c except it also uses the GPU to evaluate the function
4*c4762a1bSJed Brown */
5*c4762a1bSJed Brown 
6*c4762a1bSJed Brown #include <petscdm.h>
7*c4762a1bSJed Brown #include <petscdmda.h>
8*c4762a1bSJed Brown #include <petscsnes.h>
9*c4762a1bSJed Brown 
10*c4762a1bSJed Brown #include <thrust/device_ptr.h>
11*c4762a1bSJed Brown #include <thrust/for_each.h>
12*c4762a1bSJed Brown #include <thrust/tuple.h>
13*c4762a1bSJed Brown #include <thrust/iterator/constant_iterator.h>
14*c4762a1bSJed Brown #include <thrust/iterator/counting_iterator.h>
15*c4762a1bSJed Brown #include <thrust/iterator/zip_iterator.h>
16*c4762a1bSJed Brown 
17*c4762a1bSJed Brown extern PetscErrorCode ComputeFunction(SNES,Vec,Vec,void*), ComputeJacobian(SNES,Vec,Mat,Mat,void*);
18*c4762a1bSJed Brown PetscBool useCUDA = PETSC_FALSE;
19*c4762a1bSJed Brown 
20*c4762a1bSJed Brown int main(int argc,char **argv)
21*c4762a1bSJed Brown {
22*c4762a1bSJed Brown   SNES           snes;
23*c4762a1bSJed Brown   Vec            x,f;
24*c4762a1bSJed Brown   Mat            J;
25*c4762a1bSJed Brown   DM             da;
26*c4762a1bSJed Brown   PetscErrorCode ierr;
27*c4762a1bSJed Brown   char           *tmp,typeName[256];
28*c4762a1bSJed Brown   PetscBool      flg;
29*c4762a1bSJed Brown 
30*c4762a1bSJed Brown   ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr;
31*c4762a1bSJed Brown   ierr = PetscOptionsGetString(NULL,NULL,"-dm_vec_type",typeName,256,&flg);CHKERRQ(ierr);
32*c4762a1bSJed Brown   if (flg) {
33*c4762a1bSJed Brown     ierr = PetscStrstr(typeName,"cuda",&tmp);CHKERRQ(ierr);
34*c4762a1bSJed Brown     if (tmp) useCUDA = PETSC_TRUE;
35*c4762a1bSJed Brown   }
36*c4762a1bSJed Brown 
37*c4762a1bSJed Brown   ierr = DMDACreate1d(PETSC_COMM_WORLD,DM_BOUNDARY_NONE,8,1,1,NULL,&da);CHKERRQ(ierr);
38*c4762a1bSJed Brown   ierr = DMSetFromOptions(da);CHKERRQ(ierr);
39*c4762a1bSJed Brown   ierr = DMSetUp(da);CHKERRQ(ierr);
40*c4762a1bSJed Brown   ierr = DMCreateGlobalVector(da,&x); VecDuplicate(x,&f);CHKERRQ(ierr);
41*c4762a1bSJed Brown   ierr = DMSetMatType(da,MATAIJ);CHKERRQ(ierr);
42*c4762a1bSJed Brown   ierr = DMCreateMatrix(da,&J);CHKERRQ(ierr);
43*c4762a1bSJed Brown 
44*c4762a1bSJed Brown   ierr = SNESCreate(PETSC_COMM_WORLD,&snes);CHKERRQ(ierr);
45*c4762a1bSJed Brown   ierr = SNESSetFunction(snes,f,ComputeFunction,da);CHKERRQ(ierr);
46*c4762a1bSJed Brown   ierr = SNESSetJacobian(snes,J,J,ComputeJacobian,da);CHKERRQ(ierr);
47*c4762a1bSJed Brown   ierr = SNESSetFromOptions(snes);CHKERRQ(ierr);
48*c4762a1bSJed Brown   ierr = SNESSolve(snes,NULL,x);CHKERRQ(ierr);
49*c4762a1bSJed Brown 
50*c4762a1bSJed Brown   ierr = MatDestroy(&J);CHKERRQ(ierr);
51*c4762a1bSJed Brown   ierr = VecDestroy(&x);CHKERRQ(ierr);
52*c4762a1bSJed Brown   ierr = VecDestroy(&f);CHKERRQ(ierr);
53*c4762a1bSJed Brown   ierr = SNESDestroy(&snes);CHKERRQ(ierr);
54*c4762a1bSJed Brown   ierr = DMDestroy(&da);CHKERRQ(ierr);
55*c4762a1bSJed Brown 
56*c4762a1bSJed Brown   ierr = PetscFinalize();
57*c4762a1bSJed Brown   return ierr;
58*c4762a1bSJed Brown }
59*c4762a1bSJed Brown 
60*c4762a1bSJed Brown struct ApplyStencil
61*c4762a1bSJed Brown {
62*c4762a1bSJed Brown   template <typename Tuple>
63*c4762a1bSJed Brown   __host__ __device__
64*c4762a1bSJed Brown   void operator()(Tuple t)
65*c4762a1bSJed Brown   {
66*c4762a1bSJed Brown     /* f = (2*x_i - x_(i+1) - x_(i-1))/h - h*exp(x_i) */
67*c4762a1bSJed Brown     thrust::get<0>(t) = 1;
68*c4762a1bSJed Brown     if ((thrust::get<4>(t) > 0) && (thrust::get<4>(t) < thrust::get<5>(t)-1)) {
69*c4762a1bSJed Brown       thrust::get<0>(t) = (2.0*thrust::get<1>(t) - thrust::get<2>(t) - thrust::get<3>(t)) / (thrust::get<6>(t)) - (thrust::get<6>(t))*exp(thrust::get<1>(t));
70*c4762a1bSJed Brown     } else if (thrust::get<4>(t) == 0) {
71*c4762a1bSJed Brown       thrust::get<0>(t) = thrust::get<1>(t) / (thrust::get<6>(t));
72*c4762a1bSJed Brown     } else if (thrust::get<4>(t) == thrust::get<5>(t)-1) {
73*c4762a1bSJed Brown       thrust::get<0>(t) = thrust::get<1>(t) / (thrust::get<6>(t));
74*c4762a1bSJed Brown     }
75*c4762a1bSJed Brown   }
76*c4762a1bSJed Brown };
77*c4762a1bSJed Brown 
78*c4762a1bSJed Brown PetscErrorCode ComputeFunction(SNES snes,Vec x,Vec f,void *ctx)
79*c4762a1bSJed Brown {
80*c4762a1bSJed Brown   PetscInt          i,Mx,xs,xm,xstartshift,xendshift,fstart,lsize;
81*c4762a1bSJed Brown   PetscScalar       *xx,*ff,hx;
82*c4762a1bSJed Brown   DM                da = (DM) ctx;
83*c4762a1bSJed Brown   Vec               xlocal;
84*c4762a1bSJed Brown   PetscErrorCode    ierr;
85*c4762a1bSJed Brown   PetscMPIInt       rank,size;
86*c4762a1bSJed Brown   MPI_Comm          comm;
87*c4762a1bSJed Brown   PetscScalar const *xarray;
88*c4762a1bSJed Brown   PetscScalar       *farray;
89*c4762a1bSJed Brown 
90*c4762a1bSJed Brown   ierr = DMDAGetInfo(da,PETSC_IGNORE,&Mx,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE);CHKERRQ(ierr);
91*c4762a1bSJed Brown   hx   = 1.0/(PetscReal)(Mx-1);
92*c4762a1bSJed Brown   ierr = DMGetLocalVector(da,&xlocal);CHKERRQ(ierr);
93*c4762a1bSJed Brown   ierr = DMGlobalToLocalBegin(da,x,INSERT_VALUES,xlocal);CHKERRQ(ierr);
94*c4762a1bSJed Brown   ierr = DMGlobalToLocalEnd(da,x,INSERT_VALUES,xlocal);CHKERRQ(ierr);
95*c4762a1bSJed Brown 
96*c4762a1bSJed Brown   if (useCUDA) {
97*c4762a1bSJed Brown     ierr = VecCUDAGetArrayRead(xlocal,&xarray);CHKERRQ(ierr);
98*c4762a1bSJed Brown     ierr = VecCUDAGetArrayWrite(f,&farray);CHKERRQ(ierr);
99*c4762a1bSJed Brown     ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
100*c4762a1bSJed Brown     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
101*c4762a1bSJed Brown     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
102*c4762a1bSJed Brown     if (rank) xstartshift = 1;
103*c4762a1bSJed Brown     else xstartshift = 0;
104*c4762a1bSJed Brown     if (rank != size-1) xendshift = 1;
105*c4762a1bSJed Brown     else xendshift = 0;
106*c4762a1bSJed Brown     ierr = VecGetOwnershipRange(f,&fstart,NULL);CHKERRQ(ierr);
107*c4762a1bSJed Brown     ierr = VecGetLocalSize(x,&lsize);CHKERRQ(ierr);
108*c4762a1bSJed Brown     try {
109*c4762a1bSJed Brown       thrust::for_each(
110*c4762a1bSJed Brown         thrust::make_zip_iterator(
111*c4762a1bSJed Brown           thrust::make_tuple(
112*c4762a1bSJed Brown             thrust::device_ptr<PetscScalar>(farray),
113*c4762a1bSJed Brown             thrust::device_ptr<const PetscScalar>(xarray + xstartshift),
114*c4762a1bSJed Brown             thrust::device_ptr<const PetscScalar>(xarray + xstartshift + 1),
115*c4762a1bSJed Brown             thrust::device_ptr<const PetscScalar>(xarray + xstartshift - 1),
116*c4762a1bSJed Brown             thrust::counting_iterator<int>(fstart),
117*c4762a1bSJed Brown             thrust::constant_iterator<int>(Mx),
118*c4762a1bSJed Brown             thrust::constant_iterator<PetscScalar>(hx))),
119*c4762a1bSJed Brown         thrust::make_zip_iterator(
120*c4762a1bSJed Brown           thrust::make_tuple(
121*c4762a1bSJed Brown             thrust::device_ptr<PetscScalar>(farray + lsize),
122*c4762a1bSJed Brown             thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift),
123*c4762a1bSJed Brown             thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift + 1),
124*c4762a1bSJed Brown             thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift - 1),
125*c4762a1bSJed Brown             thrust::counting_iterator<int>(fstart) + lsize,
126*c4762a1bSJed Brown             thrust::constant_iterator<int>(Mx),
127*c4762a1bSJed Brown             thrust::constant_iterator<PetscScalar>(hx))),
128*c4762a1bSJed Brown         ApplyStencil());
129*c4762a1bSJed Brown     }
130*c4762a1bSJed Brown     catch (char *all) {
131*c4762a1bSJed Brown       ierr = PetscPrintf(PETSC_COMM_WORLD, "Thrust is not working\n");CHKERRQ(ierr);
132*c4762a1bSJed Brown     }
133*c4762a1bSJed Brown     ierr = VecCUDARestoreArrayRead(xlocal,&xarray);CHKERRQ(ierr);
134*c4762a1bSJed Brown     ierr = VecCUDARestoreArrayWrite(f,&farray);CHKERRQ(ierr);
135*c4762a1bSJed Brown   } else {
136*c4762a1bSJed Brown     ierr = DMDAVecGetArray(da,xlocal,&xx);CHKERRQ(ierr);
137*c4762a1bSJed Brown     ierr = DMDAVecGetArray(da,f,&ff);CHKERRQ(ierr);
138*c4762a1bSJed Brown     ierr = DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL);CHKERRQ(ierr);
139*c4762a1bSJed Brown 
140*c4762a1bSJed Brown     for (i=xs; i<xs+xm; i++) {
141*c4762a1bSJed Brown       if (i == 0 || i == Mx-1) ff[i] = xx[i]/hx;
142*c4762a1bSJed Brown       else ff[i] =  (2.0*xx[i] - xx[i-1] - xx[i+1])/hx - hx*PetscExpScalar(xx[i]);
143*c4762a1bSJed Brown     }
144*c4762a1bSJed Brown     ierr = DMDAVecRestoreArray(da,xlocal,&xx);CHKERRQ(ierr);
145*c4762a1bSJed Brown     ierr = DMDAVecRestoreArray(da,f,&ff);CHKERRQ(ierr);
146*c4762a1bSJed Brown   }
147*c4762a1bSJed Brown   ierr = DMRestoreLocalVector(da,&xlocal);CHKERRQ(ierr);
148*c4762a1bSJed Brown   //  VecView(x,0);printf("f\n");
149*c4762a1bSJed Brown   //  VecView(f,0);
150*c4762a1bSJed Brown   return 0;
151*c4762a1bSJed Brown 
152*c4762a1bSJed Brown }
153*c4762a1bSJed Brown PetscErrorCode ComputeJacobian(SNES snes,Vec x,Mat J,Mat B,void *ctx)
154*c4762a1bSJed Brown {
155*c4762a1bSJed Brown   DM             da = (DM) ctx;
156*c4762a1bSJed Brown   PetscInt       i,Mx,xm,xs;
157*c4762a1bSJed Brown   PetscScalar    hx,*xx;
158*c4762a1bSJed Brown   Vec            xlocal;
159*c4762a1bSJed Brown   PetscErrorCode ierr;
160*c4762a1bSJed Brown 
161*c4762a1bSJed Brown   ierr = DMDAGetInfo(da,PETSC_IGNORE,&Mx,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE);CHKERRQ(ierr);
162*c4762a1bSJed Brown   hx   = 1.0/(PetscReal)(Mx-1);
163*c4762a1bSJed Brown   ierr = DMGetLocalVector(da,&xlocal);DMGlobalToLocalBegin(da,x,INSERT_VALUES,xlocal);CHKERRQ(ierr);
164*c4762a1bSJed Brown   ierr = DMGlobalToLocalEnd(da,x,INSERT_VALUES,xlocal);CHKERRQ(ierr);
165*c4762a1bSJed Brown   ierr = DMDAVecGetArray(da,xlocal,&xx);CHKERRQ(ierr);
166*c4762a1bSJed Brown   ierr = DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL);CHKERRQ(ierr);
167*c4762a1bSJed Brown 
168*c4762a1bSJed Brown   for (i=xs; i<xs+xm; i++) {
169*c4762a1bSJed Brown     if (i == 0 || i == Mx-1) {
170*c4762a1bSJed Brown       ierr = MatSetValue(J,i,i,1.0/hx,INSERT_VALUES);CHKERRQ(ierr);
171*c4762a1bSJed Brown     } else {
172*c4762a1bSJed Brown       ierr = MatSetValue(J,i,i-1,-1.0/hx,INSERT_VALUES);CHKERRQ(ierr);
173*c4762a1bSJed Brown       ierr = MatSetValue(J,i,i,2.0/hx - hx*PetscExpScalar(xx[i]),INSERT_VALUES);CHKERRQ(ierr);
174*c4762a1bSJed Brown       ierr = MatSetValue(J,i,i+1,-1.0/hx,INSERT_VALUES);CHKERRQ(ierr);
175*c4762a1bSJed Brown     }
176*c4762a1bSJed Brown   }
177*c4762a1bSJed Brown   ierr  = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
178*c4762a1bSJed Brown   ierr  = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
179*c4762a1bSJed Brown   ierr  = DMDAVecRestoreArray(da,xlocal,&xx);CHKERRQ(ierr);
180*c4762a1bSJed Brown   ierr  = DMRestoreLocalVector(da,&xlocal);CHKERRQ(ierr);
181*c4762a1bSJed Brown   return 0;
182*c4762a1bSJed Brown }
183*c4762a1bSJed Brown 
184*c4762a1bSJed Brown 
185*c4762a1bSJed Brown 
186*c4762a1bSJed Brown /*TEST
187*c4762a1bSJed Brown 
188*c4762a1bSJed Brown    build:
189*c4762a1bSJed Brown       requires: cuda
190*c4762a1bSJed Brown 
191*c4762a1bSJed Brown    test:
192*c4762a1bSJed Brown       args: -snes_monitor_short -dm_vec_type cuda
193*c4762a1bSJed Brown 
194*c4762a1bSJed Brown TEST*/
195