1c4762a1bSJed Brown static char help[] = "Solves -Laplacian u - exp(u) = 0, 0 < x < 1 using GPU\n\n"; 2c4762a1bSJed Brown /* 3c4762a1bSJed Brown Same as ex47.c except it also uses the GPU to evaluate the function 4c4762a1bSJed Brown */ 5c4762a1bSJed Brown 6c4762a1bSJed Brown #include <petscdm.h> 7c4762a1bSJed Brown #include <petscdmda.h> 8c4762a1bSJed Brown #include <petscsnes.h> 9c4762a1bSJed Brown 10c4762a1bSJed Brown #include <thrust/device_ptr.h> 11c4762a1bSJed Brown #include <thrust/for_each.h> 12c4762a1bSJed Brown #include <thrust/tuple.h> 13c4762a1bSJed Brown #include <thrust/iterator/constant_iterator.h> 14c4762a1bSJed Brown #include <thrust/iterator/counting_iterator.h> 15c4762a1bSJed Brown #include <thrust/iterator/zip_iterator.h> 16c4762a1bSJed Brown 17c4762a1bSJed Brown extern PetscErrorCode ComputeFunction(SNES, Vec, Vec, void *), ComputeJacobian(SNES, Vec, Mat, Mat, void *); 18c4762a1bSJed Brown PetscBool useCUDA = PETSC_FALSE; 19c4762a1bSJed Brown 20*9371c9d4SSatish Balay int main(int argc, char **argv) { 21c4762a1bSJed Brown SNES snes; 22c4762a1bSJed Brown Vec x, f; 23c4762a1bSJed Brown Mat J; 24c4762a1bSJed Brown DM da; 25c4762a1bSJed Brown char *tmp, typeName[256]; 26c4762a1bSJed Brown PetscBool flg; 27c4762a1bSJed Brown 28327415f7SBarry Smith PetscFunctionBeginUser; 299566063dSJacob Faibussowitsch PetscCall(PetscInitialize(&argc, &argv, (char *)0, help)); 309566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetString(NULL, NULL, "-dm_vec_type", typeName, sizeof(typeName), &flg)); 31c4762a1bSJed Brown if (flg) { 329566063dSJacob Faibussowitsch PetscCall(PetscStrstr(typeName, "cuda", &tmp)); 33c4762a1bSJed Brown if (tmp) useCUDA = PETSC_TRUE; 34c4762a1bSJed Brown } 35c4762a1bSJed Brown 369566063dSJacob Faibussowitsch PetscCall(DMDACreate1d(PETSC_COMM_WORLD, DM_BOUNDARY_NONE, 8, 1, 1, NULL, &da)); 379566063dSJacob Faibussowitsch PetscCall(DMSetFromOptions(da)); 389566063dSJacob Faibussowitsch PetscCall(DMSetUp(da)); 39e09e07f2SBarry Smith PetscCall(DMCreateGlobalVector(da, &x)); 40e09e07f2SBarry Smith PetscCall(VecDuplicate(x, &f)); 419566063dSJacob Faibussowitsch PetscCall(DMCreateMatrix(da, &J)); 42c4762a1bSJed Brown 439566063dSJacob Faibussowitsch PetscCall(SNESCreate(PETSC_COMM_WORLD, &snes)); 449566063dSJacob Faibussowitsch PetscCall(SNESSetFunction(snes, f, ComputeFunction, da)); 459566063dSJacob Faibussowitsch PetscCall(SNESSetJacobian(snes, J, J, ComputeJacobian, da)); 469566063dSJacob Faibussowitsch PetscCall(SNESSetFromOptions(snes)); 479566063dSJacob Faibussowitsch PetscCall(SNESSolve(snes, NULL, x)); 48c4762a1bSJed Brown 499566063dSJacob Faibussowitsch PetscCall(MatDestroy(&J)); 509566063dSJacob Faibussowitsch PetscCall(VecDestroy(&x)); 519566063dSJacob Faibussowitsch PetscCall(VecDestroy(&f)); 529566063dSJacob Faibussowitsch PetscCall(SNESDestroy(&snes)); 539566063dSJacob Faibussowitsch PetscCall(DMDestroy(&da)); 54c4762a1bSJed Brown 559566063dSJacob Faibussowitsch PetscCall(PetscFinalize()); 56b122ec5aSJacob Faibussowitsch return 0; 57c4762a1bSJed Brown } 58c4762a1bSJed Brown 59*9371c9d4SSatish Balay struct ApplyStencil { 60c4762a1bSJed Brown template <typename Tuple> 61*9371c9d4SSatish Balay __host__ __device__ void operator()(Tuple t) { 62c4762a1bSJed Brown /* f = (2*x_i - x_(i+1) - x_(i-1))/h - h*exp(x_i) */ 63c4762a1bSJed Brown thrust::get<0>(t) = 1; 64c4762a1bSJed Brown if ((thrust::get<4>(t) > 0) && (thrust::get<4>(t) < thrust::get<5>(t) - 1)) { 6588e3df94SJunchao Zhang thrust::get<0>(t) = (((PetscScalar)2.0) * thrust::get<1>(t) - thrust::get<2>(t) - thrust::get<3>(t)) / (thrust::get<6>(t)) - (thrust::get<6>(t)) * exp(thrust::get<1>(t)); 66c4762a1bSJed Brown } else if (thrust::get<4>(t) == 0) { 67c4762a1bSJed Brown thrust::get<0>(t) = thrust::get<1>(t) / (thrust::get<6>(t)); 68c4762a1bSJed Brown } else if (thrust::get<4>(t) == thrust::get<5>(t) - 1) { 69c4762a1bSJed Brown thrust::get<0>(t) = thrust::get<1>(t) / (thrust::get<6>(t)); 70c4762a1bSJed Brown } 71c4762a1bSJed Brown } 72c4762a1bSJed Brown }; 73c4762a1bSJed Brown 74*9371c9d4SSatish Balay PetscErrorCode ComputeFunction(SNES snes, Vec x, Vec f, void *ctx) { 75c4762a1bSJed Brown PetscInt i, Mx, xs, xm, xstartshift, xendshift, fstart, lsize; 76c4762a1bSJed Brown PetscScalar *xx, *ff, hx; 77c4762a1bSJed Brown DM da = (DM)ctx; 78c4762a1bSJed Brown Vec xlocal; 79c4762a1bSJed Brown PetscMPIInt rank, size; 80c4762a1bSJed Brown MPI_Comm comm; 81c4762a1bSJed Brown PetscScalar const *xarray; 82c4762a1bSJed Brown PetscScalar *farray; 83c4762a1bSJed Brown 849566063dSJacob Faibussowitsch PetscCall(DMDAGetInfo(da, PETSC_IGNORE, &Mx, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE)); 85c4762a1bSJed Brown hx = 1.0 / (PetscReal)(Mx - 1); 869566063dSJacob Faibussowitsch PetscCall(DMGetLocalVector(da, &xlocal)); 879566063dSJacob Faibussowitsch PetscCall(DMGlobalToLocalBegin(da, x, INSERT_VALUES, xlocal)); 889566063dSJacob Faibussowitsch PetscCall(DMGlobalToLocalEnd(da, x, INSERT_VALUES, xlocal)); 89c4762a1bSJed Brown 90c4762a1bSJed Brown if (useCUDA) { 919566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 929566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 939566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 9406cf5b03SBarry Smith PetscCall(VecCUDAGetArrayRead(xlocal, &xarray)); 9506cf5b03SBarry Smith PetscCall(VecCUDAGetArrayWrite(f, &farray)); 96c4762a1bSJed Brown if (rank) xstartshift = 1; 97c4762a1bSJed Brown else xstartshift = 0; 98c4762a1bSJed Brown if (rank != size - 1) xendshift = 1; 99c4762a1bSJed Brown else xendshift = 0; 1009566063dSJacob Faibussowitsch PetscCall(VecGetOwnershipRange(f, &fstart, NULL)); 1019566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(x, &lsize)); 10211486bccSBarry Smith // clang-format off 103c4762a1bSJed Brown try { 104c4762a1bSJed Brown thrust::for_each( 105c4762a1bSJed Brown thrust::make_zip_iterator( 106c4762a1bSJed Brown thrust::make_tuple( 107c4762a1bSJed Brown thrust::device_ptr<PetscScalar>(farray), 108c4762a1bSJed Brown thrust::device_ptr<const PetscScalar>(xarray + xstartshift), 109c4762a1bSJed Brown thrust::device_ptr<const PetscScalar>(xarray + xstartshift + 1), 110c4762a1bSJed Brown thrust::device_ptr<const PetscScalar>(xarray + xstartshift - 1), 111c4762a1bSJed Brown thrust::counting_iterator<int>(fstart), 112c4762a1bSJed Brown thrust::constant_iterator<int>(Mx), 113c4762a1bSJed Brown thrust::constant_iterator<PetscScalar>(hx))), 114c4762a1bSJed Brown thrust::make_zip_iterator( 115c4762a1bSJed Brown thrust::make_tuple( 116c4762a1bSJed Brown thrust::device_ptr<PetscScalar>(farray + lsize), 117c4762a1bSJed Brown thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift), 118c4762a1bSJed Brown thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift + 1), 119c4762a1bSJed Brown thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift - 1), 120c4762a1bSJed Brown thrust::counting_iterator<int>(fstart) + lsize, 121c4762a1bSJed Brown thrust::constant_iterator<int>(Mx), 122c4762a1bSJed Brown thrust::constant_iterator<PetscScalar>(hx))), 123c4762a1bSJed Brown ApplyStencil()); 124c4762a1bSJed Brown } 12511486bccSBarry Smith // clang-format on 126c4762a1bSJed Brown catch (char *all) { 1279566063dSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Thrust is not working\n")); 128c4762a1bSJed Brown } 1299566063dSJacob Faibussowitsch PetscCall(VecCUDARestoreArrayRead(xlocal, &xarray)); 1309566063dSJacob Faibussowitsch PetscCall(VecCUDARestoreArrayWrite(f, &farray)); 131c4762a1bSJed Brown } else { 1329566063dSJacob Faibussowitsch PetscCall(DMDAVecGetArray(da, xlocal, &xx)); 1339566063dSJacob Faibussowitsch PetscCall(DMDAVecGetArray(da, f, &ff)); 1349566063dSJacob Faibussowitsch PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &xm, NULL, NULL)); 135c4762a1bSJed Brown 136c4762a1bSJed Brown for (i = xs; i < xs + xm; i++) { 137c4762a1bSJed Brown if (i == 0 || i == Mx - 1) ff[i] = xx[i] / hx; 138c4762a1bSJed Brown else ff[i] = (2.0 * xx[i] - xx[i - 1] - xx[i + 1]) / hx - hx * PetscExpScalar(xx[i]); 139c4762a1bSJed Brown } 1409566063dSJacob Faibussowitsch PetscCall(DMDAVecRestoreArray(da, xlocal, &xx)); 1419566063dSJacob Faibussowitsch PetscCall(DMDAVecRestoreArray(da, f, &ff)); 142c4762a1bSJed Brown } 1439566063dSJacob Faibussowitsch PetscCall(DMRestoreLocalVector(da, &xlocal)); 144c4762a1bSJed Brown return 0; 145c4762a1bSJed Brown } 146*9371c9d4SSatish Balay PetscErrorCode ComputeJacobian(SNES snes, Vec x, Mat J, Mat B, void *ctx) { 147c4762a1bSJed Brown DM da = (DM)ctx; 148c4762a1bSJed Brown PetscInt i, Mx, xm, xs; 149c4762a1bSJed Brown PetscScalar hx, *xx; 150c4762a1bSJed Brown Vec xlocal; 151c4762a1bSJed Brown 1529566063dSJacob Faibussowitsch PetscCall(DMDAGetInfo(da, PETSC_IGNORE, &Mx, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE)); 153c4762a1bSJed Brown hx = 1.0 / (PetscReal)(Mx - 1); 1549566063dSJacob Faibussowitsch PetscCall(DMGetLocalVector(da, &xlocal)); 1559566063dSJacob Faibussowitsch PetscCall(DMGlobalToLocalBegin(da, x, INSERT_VALUES, xlocal)); 1569566063dSJacob Faibussowitsch PetscCall(DMGlobalToLocalEnd(da, x, INSERT_VALUES, xlocal)); 1579566063dSJacob Faibussowitsch PetscCall(DMDAVecGetArray(da, xlocal, &xx)); 1589566063dSJacob Faibussowitsch PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &xm, NULL, NULL)); 159c4762a1bSJed Brown 160c4762a1bSJed Brown for (i = xs; i < xs + xm; i++) { 161c4762a1bSJed Brown if (i == 0 || i == Mx - 1) { 1629566063dSJacob Faibussowitsch PetscCall(MatSetValue(J, i, i, 1.0 / hx, INSERT_VALUES)); 163c4762a1bSJed Brown } else { 1649566063dSJacob Faibussowitsch PetscCall(MatSetValue(J, i, i - 1, -1.0 / hx, INSERT_VALUES)); 1659566063dSJacob Faibussowitsch PetscCall(MatSetValue(J, i, i, 2.0 / hx - hx * PetscExpScalar(xx[i]), INSERT_VALUES)); 1669566063dSJacob Faibussowitsch PetscCall(MatSetValue(J, i, i + 1, -1.0 / hx, INSERT_VALUES)); 167c4762a1bSJed Brown } 168c4762a1bSJed Brown } 1699566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1709566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1719566063dSJacob Faibussowitsch PetscCall(DMDAVecRestoreArray(da, xlocal, &xx)); 1729566063dSJacob Faibussowitsch PetscCall(DMRestoreLocalVector(da, &xlocal)); 173c4762a1bSJed Brown return 0; 174c4762a1bSJed Brown } 175c4762a1bSJed Brown 176c4762a1bSJed Brown /*TEST 177c4762a1bSJed Brown 178c4762a1bSJed Brown build: 179c4762a1bSJed Brown requires: cuda 180c4762a1bSJed Brown 181499ce956SJunchao Zhang testset: 182499ce956SJunchao Zhang args: -snes_monitor_short -dm_mat_type aijcusparse -dm_vec_type cuda 183499ce956SJunchao Zhang output_file: output/ex47cu_1.out 184c4762a1bSJed Brown test: 185499ce956SJunchao Zhang suffix: 1 186499ce956SJunchao Zhang nsize: 1 187499ce956SJunchao Zhang test: 188499ce956SJunchao Zhang suffix: 2 189499ce956SJunchao Zhang nsize: 2 190c4762a1bSJed Brown 191c4762a1bSJed Brown TEST*/ 192