xref: /petsc/src/snes/tutorials/ex47cu.cu (revision 9371c9d470a9602b6d10a8bf50c9b2280a79e45a)
1c4762a1bSJed Brown static char help[] = "Solves -Laplacian u - exp(u) = 0,  0 < x < 1 using GPU\n\n";
2c4762a1bSJed Brown /*
3c4762a1bSJed Brown    Same as ex47.c except it also uses the GPU to evaluate the function
4c4762a1bSJed Brown */
5c4762a1bSJed Brown 
6c4762a1bSJed Brown #include <petscdm.h>
7c4762a1bSJed Brown #include <petscdmda.h>
8c4762a1bSJed Brown #include <petscsnes.h>
9c4762a1bSJed Brown 
10c4762a1bSJed Brown #include <thrust/device_ptr.h>
11c4762a1bSJed Brown #include <thrust/for_each.h>
12c4762a1bSJed Brown #include <thrust/tuple.h>
13c4762a1bSJed Brown #include <thrust/iterator/constant_iterator.h>
14c4762a1bSJed Brown #include <thrust/iterator/counting_iterator.h>
15c4762a1bSJed Brown #include <thrust/iterator/zip_iterator.h>
16c4762a1bSJed Brown 
17c4762a1bSJed Brown extern PetscErrorCode ComputeFunction(SNES, Vec, Vec, void *), ComputeJacobian(SNES, Vec, Mat, Mat, void *);
18c4762a1bSJed Brown PetscBool             useCUDA = PETSC_FALSE;
19c4762a1bSJed Brown 
20*9371c9d4SSatish Balay int main(int argc, char **argv) {
21c4762a1bSJed Brown   SNES      snes;
22c4762a1bSJed Brown   Vec       x, f;
23c4762a1bSJed Brown   Mat       J;
24c4762a1bSJed Brown   DM        da;
25c4762a1bSJed Brown   char     *tmp, typeName[256];
26c4762a1bSJed Brown   PetscBool flg;
27c4762a1bSJed Brown 
28327415f7SBarry Smith   PetscFunctionBeginUser;
299566063dSJacob Faibussowitsch   PetscCall(PetscInitialize(&argc, &argv, (char *)0, help));
309566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetString(NULL, NULL, "-dm_vec_type", typeName, sizeof(typeName), &flg));
31c4762a1bSJed Brown   if (flg) {
329566063dSJacob Faibussowitsch     PetscCall(PetscStrstr(typeName, "cuda", &tmp));
33c4762a1bSJed Brown     if (tmp) useCUDA = PETSC_TRUE;
34c4762a1bSJed Brown   }
35c4762a1bSJed Brown 
369566063dSJacob Faibussowitsch   PetscCall(DMDACreate1d(PETSC_COMM_WORLD, DM_BOUNDARY_NONE, 8, 1, 1, NULL, &da));
379566063dSJacob Faibussowitsch   PetscCall(DMSetFromOptions(da));
389566063dSJacob Faibussowitsch   PetscCall(DMSetUp(da));
39e09e07f2SBarry Smith   PetscCall(DMCreateGlobalVector(da, &x));
40e09e07f2SBarry Smith   PetscCall(VecDuplicate(x, &f));
419566063dSJacob Faibussowitsch   PetscCall(DMCreateMatrix(da, &J));
42c4762a1bSJed Brown 
439566063dSJacob Faibussowitsch   PetscCall(SNESCreate(PETSC_COMM_WORLD, &snes));
449566063dSJacob Faibussowitsch   PetscCall(SNESSetFunction(snes, f, ComputeFunction, da));
459566063dSJacob Faibussowitsch   PetscCall(SNESSetJacobian(snes, J, J, ComputeJacobian, da));
469566063dSJacob Faibussowitsch   PetscCall(SNESSetFromOptions(snes));
479566063dSJacob Faibussowitsch   PetscCall(SNESSolve(snes, NULL, x));
48c4762a1bSJed Brown 
499566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&J));
509566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&x));
519566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&f));
529566063dSJacob Faibussowitsch   PetscCall(SNESDestroy(&snes));
539566063dSJacob Faibussowitsch   PetscCall(DMDestroy(&da));
54c4762a1bSJed Brown 
559566063dSJacob Faibussowitsch   PetscCall(PetscFinalize());
56b122ec5aSJacob Faibussowitsch   return 0;
57c4762a1bSJed Brown }
58c4762a1bSJed Brown 
59*9371c9d4SSatish Balay struct ApplyStencil {
60c4762a1bSJed Brown   template <typename Tuple>
61*9371c9d4SSatish Balay   __host__ __device__ void operator()(Tuple t) {
62c4762a1bSJed Brown     /* f = (2*x_i - x_(i+1) - x_(i-1))/h - h*exp(x_i) */
63c4762a1bSJed Brown     thrust::get<0>(t) = 1;
64c4762a1bSJed Brown     if ((thrust::get<4>(t) > 0) && (thrust::get<4>(t) < thrust::get<5>(t) - 1)) {
6588e3df94SJunchao Zhang       thrust::get<0>(t) = (((PetscScalar)2.0) * thrust::get<1>(t) - thrust::get<2>(t) - thrust::get<3>(t)) / (thrust::get<6>(t)) - (thrust::get<6>(t)) * exp(thrust::get<1>(t));
66c4762a1bSJed Brown     } else if (thrust::get<4>(t) == 0) {
67c4762a1bSJed Brown       thrust::get<0>(t) = thrust::get<1>(t) / (thrust::get<6>(t));
68c4762a1bSJed Brown     } else if (thrust::get<4>(t) == thrust::get<5>(t) - 1) {
69c4762a1bSJed Brown       thrust::get<0>(t) = thrust::get<1>(t) / (thrust::get<6>(t));
70c4762a1bSJed Brown     }
71c4762a1bSJed Brown   }
72c4762a1bSJed Brown };
73c4762a1bSJed Brown 
74*9371c9d4SSatish Balay PetscErrorCode ComputeFunction(SNES snes, Vec x, Vec f, void *ctx) {
75c4762a1bSJed Brown   PetscInt           i, Mx, xs, xm, xstartshift, xendshift, fstart, lsize;
76c4762a1bSJed Brown   PetscScalar       *xx, *ff, hx;
77c4762a1bSJed Brown   DM                 da = (DM)ctx;
78c4762a1bSJed Brown   Vec                xlocal;
79c4762a1bSJed Brown   PetscMPIInt        rank, size;
80c4762a1bSJed Brown   MPI_Comm           comm;
81c4762a1bSJed Brown   PetscScalar const *xarray;
82c4762a1bSJed Brown   PetscScalar       *farray;
83c4762a1bSJed Brown 
849566063dSJacob Faibussowitsch   PetscCall(DMDAGetInfo(da, PETSC_IGNORE, &Mx, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE));
85c4762a1bSJed Brown   hx = 1.0 / (PetscReal)(Mx - 1);
869566063dSJacob Faibussowitsch   PetscCall(DMGetLocalVector(da, &xlocal));
879566063dSJacob Faibussowitsch   PetscCall(DMGlobalToLocalBegin(da, x, INSERT_VALUES, xlocal));
889566063dSJacob Faibussowitsch   PetscCall(DMGlobalToLocalEnd(da, x, INSERT_VALUES, xlocal));
89c4762a1bSJed Brown 
90c4762a1bSJed Brown   if (useCUDA) {
919566063dSJacob Faibussowitsch     PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
929566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_size(comm, &size));
939566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_rank(comm, &rank));
9406cf5b03SBarry Smith     PetscCall(VecCUDAGetArrayRead(xlocal, &xarray));
9506cf5b03SBarry Smith     PetscCall(VecCUDAGetArrayWrite(f, &farray));
96c4762a1bSJed Brown     if (rank) xstartshift = 1;
97c4762a1bSJed Brown     else xstartshift = 0;
98c4762a1bSJed Brown     if (rank != size - 1) xendshift = 1;
99c4762a1bSJed Brown     else xendshift = 0;
1009566063dSJacob Faibussowitsch     PetscCall(VecGetOwnershipRange(f, &fstart, NULL));
1019566063dSJacob Faibussowitsch     PetscCall(VecGetLocalSize(x, &lsize));
10211486bccSBarry Smith     // clang-format off
103c4762a1bSJed Brown     try {
104c4762a1bSJed Brown       thrust::for_each(
105c4762a1bSJed Brown         thrust::make_zip_iterator(
106c4762a1bSJed Brown           thrust::make_tuple(
107c4762a1bSJed Brown             thrust::device_ptr<PetscScalar>(farray),
108c4762a1bSJed Brown             thrust::device_ptr<const PetscScalar>(xarray + xstartshift),
109c4762a1bSJed Brown             thrust::device_ptr<const PetscScalar>(xarray + xstartshift + 1),
110c4762a1bSJed Brown             thrust::device_ptr<const PetscScalar>(xarray + xstartshift - 1),
111c4762a1bSJed Brown             thrust::counting_iterator<int>(fstart),
112c4762a1bSJed Brown             thrust::constant_iterator<int>(Mx),
113c4762a1bSJed Brown             thrust::constant_iterator<PetscScalar>(hx))),
114c4762a1bSJed Brown         thrust::make_zip_iterator(
115c4762a1bSJed Brown           thrust::make_tuple(
116c4762a1bSJed Brown             thrust::device_ptr<PetscScalar>(farray + lsize),
117c4762a1bSJed Brown             thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift),
118c4762a1bSJed Brown             thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift + 1),
119c4762a1bSJed Brown             thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift - 1),
120c4762a1bSJed Brown             thrust::counting_iterator<int>(fstart) + lsize,
121c4762a1bSJed Brown             thrust::constant_iterator<int>(Mx),
122c4762a1bSJed Brown             thrust::constant_iterator<PetscScalar>(hx))),
123c4762a1bSJed Brown         ApplyStencil());
124c4762a1bSJed Brown     }
12511486bccSBarry Smith     // clang-format on
126c4762a1bSJed Brown     catch (char *all) {
1279566063dSJacob Faibussowitsch       PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Thrust is not working\n"));
128c4762a1bSJed Brown     }
1299566063dSJacob Faibussowitsch     PetscCall(VecCUDARestoreArrayRead(xlocal, &xarray));
1309566063dSJacob Faibussowitsch     PetscCall(VecCUDARestoreArrayWrite(f, &farray));
131c4762a1bSJed Brown   } else {
1329566063dSJacob Faibussowitsch     PetscCall(DMDAVecGetArray(da, xlocal, &xx));
1339566063dSJacob Faibussowitsch     PetscCall(DMDAVecGetArray(da, f, &ff));
1349566063dSJacob Faibussowitsch     PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &xm, NULL, NULL));
135c4762a1bSJed Brown 
136c4762a1bSJed Brown     for (i = xs; i < xs + xm; i++) {
137c4762a1bSJed Brown       if (i == 0 || i == Mx - 1) ff[i] = xx[i] / hx;
138c4762a1bSJed Brown       else ff[i] = (2.0 * xx[i] - xx[i - 1] - xx[i + 1]) / hx - hx * PetscExpScalar(xx[i]);
139c4762a1bSJed Brown     }
1409566063dSJacob Faibussowitsch     PetscCall(DMDAVecRestoreArray(da, xlocal, &xx));
1419566063dSJacob Faibussowitsch     PetscCall(DMDAVecRestoreArray(da, f, &ff));
142c4762a1bSJed Brown   }
1439566063dSJacob Faibussowitsch   PetscCall(DMRestoreLocalVector(da, &xlocal));
144c4762a1bSJed Brown   return 0;
145c4762a1bSJed Brown }
146*9371c9d4SSatish Balay PetscErrorCode ComputeJacobian(SNES snes, Vec x, Mat J, Mat B, void *ctx) {
147c4762a1bSJed Brown   DM          da = (DM)ctx;
148c4762a1bSJed Brown   PetscInt    i, Mx, xm, xs;
149c4762a1bSJed Brown   PetscScalar hx, *xx;
150c4762a1bSJed Brown   Vec         xlocal;
151c4762a1bSJed Brown 
1529566063dSJacob Faibussowitsch   PetscCall(DMDAGetInfo(da, PETSC_IGNORE, &Mx, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE));
153c4762a1bSJed Brown   hx = 1.0 / (PetscReal)(Mx - 1);
1549566063dSJacob Faibussowitsch   PetscCall(DMGetLocalVector(da, &xlocal));
1559566063dSJacob Faibussowitsch   PetscCall(DMGlobalToLocalBegin(da, x, INSERT_VALUES, xlocal));
1569566063dSJacob Faibussowitsch   PetscCall(DMGlobalToLocalEnd(da, x, INSERT_VALUES, xlocal));
1579566063dSJacob Faibussowitsch   PetscCall(DMDAVecGetArray(da, xlocal, &xx));
1589566063dSJacob Faibussowitsch   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &xm, NULL, NULL));
159c4762a1bSJed Brown 
160c4762a1bSJed Brown   for (i = xs; i < xs + xm; i++) {
161c4762a1bSJed Brown     if (i == 0 || i == Mx - 1) {
1629566063dSJacob Faibussowitsch       PetscCall(MatSetValue(J, i, i, 1.0 / hx, INSERT_VALUES));
163c4762a1bSJed Brown     } else {
1649566063dSJacob Faibussowitsch       PetscCall(MatSetValue(J, i, i - 1, -1.0 / hx, INSERT_VALUES));
1659566063dSJacob Faibussowitsch       PetscCall(MatSetValue(J, i, i, 2.0 / hx - hx * PetscExpScalar(xx[i]), INSERT_VALUES));
1669566063dSJacob Faibussowitsch       PetscCall(MatSetValue(J, i, i + 1, -1.0 / hx, INSERT_VALUES));
167c4762a1bSJed Brown     }
168c4762a1bSJed Brown   }
1699566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1709566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1719566063dSJacob Faibussowitsch   PetscCall(DMDAVecRestoreArray(da, xlocal, &xx));
1729566063dSJacob Faibussowitsch   PetscCall(DMRestoreLocalVector(da, &xlocal));
173c4762a1bSJed Brown   return 0;
174c4762a1bSJed Brown }
175c4762a1bSJed Brown 
176c4762a1bSJed Brown /*TEST
177c4762a1bSJed Brown 
178c4762a1bSJed Brown    build:
179c4762a1bSJed Brown       requires: cuda
180c4762a1bSJed Brown 
181499ce956SJunchao Zhang    testset:
182499ce956SJunchao Zhang       args: -snes_monitor_short -dm_mat_type aijcusparse -dm_vec_type cuda
183499ce956SJunchao Zhang       output_file: output/ex47cu_1.out
184c4762a1bSJed Brown       test:
185499ce956SJunchao Zhang         suffix: 1
186499ce956SJunchao Zhang         nsize:  1
187499ce956SJunchao Zhang       test:
188499ce956SJunchao Zhang         suffix: 2
189499ce956SJunchao Zhang         nsize:  2
190c4762a1bSJed Brown 
191c4762a1bSJed Brown TEST*/
192