1c5566c22SJunchao Zhang #include <Kokkos_Core.hpp> 2c5566c22SJunchao Zhang #include <petscdmda_kokkos.hpp> 3c5566c22SJunchao Zhang 4c5566c22SJunchao Zhang #include <petscdm.h> 5c5566c22SJunchao Zhang #include <petscdmda.h> 6c5566c22SJunchao Zhang #include <petscsnes.h> 7c5566c22SJunchao Zhang #include "ex55.h" 8c5566c22SJunchao Zhang 9c5566c22SJunchao Zhang using DefaultMemorySpace = Kokkos::DefaultExecutionSpace::memory_space; 10c5566c22SJunchao Zhang using ConstPetscScalarKokkosOffsetView2D = Kokkos::Experimental::OffsetView<const PetscScalar **, Kokkos::LayoutRight, DefaultMemorySpace>; 11c5566c22SJunchao Zhang using PetscScalarKokkosOffsetView2D = Kokkos::Experimental::OffsetView<PetscScalar **, Kokkos::LayoutRight, DefaultMemorySpace>; 12c5566c22SJunchao Zhang 13c5566c22SJunchao Zhang using PetscCountKokkosView = Kokkos::View<PetscCount *, DefaultMemorySpace>; 14c5566c22SJunchao Zhang using PetscIntKokkosView = Kokkos::View<PetscInt *, DefaultMemorySpace>; 15c5566c22SJunchao Zhang using PetscCountKokkosViewHost = Kokkos::View<PetscCount *, Kokkos::HostSpace>; 16c5566c22SJunchao Zhang using PetscScalarKokkosView = Kokkos::View<PetscScalar *, DefaultMemorySpace>; 1733d966b4SJunchao Zhang using Kokkos::Iterate; 1833d966b4SJunchao Zhang using Kokkos::MDRangePolicy; 19*9371c9d4SSatish Balay using Kokkos::Rank; 20c5566c22SJunchao Zhang 21*9371c9d4SSatish Balay KOKKOS_INLINE_FUNCTION PetscErrorCode MMSSolution1(AppCtx *user, const DMDACoor2d *c, PetscScalar *u) { 22c5566c22SJunchao Zhang PetscReal x = PetscRealPart(c->x), y = PetscRealPart(c->y); 23c5566c22SJunchao Zhang u[0] = x * (1 - x) * y * (1 - y); 24c5566c22SJunchao Zhang return 0; 25c5566c22SJunchao Zhang } 26c5566c22SJunchao Zhang 27*9371c9d4SSatish Balay KOKKOS_INLINE_FUNCTION PetscErrorCode MMSForcing1(PetscReal user_param, const DMDACoor2d *c, PetscScalar *f) { 28c5566c22SJunchao Zhang PetscReal x = PetscRealPart(c->x), y = PetscRealPart(c->y); 29c5566c22SJunchao Zhang f[0] = 2 * x * (1 - x) + 2 * y * (1 - y) - user_param * PetscExpReal(x * (1 - x) * y * (1 - y)); 30c5566c22SJunchao Zhang return 0; 31c5566c22SJunchao Zhang } 32c5566c22SJunchao Zhang 33*9371c9d4SSatish Balay PetscErrorCode FormFunctionLocalVec(DMDALocalInfo *info, Vec x, Vec f, AppCtx *user) { 34c5566c22SJunchao Zhang PetscReal lambda, hx, hy, hxdhy, hydhx; 35c5566c22SJunchao Zhang PetscInt xs = info->xs, ys = info->ys, xm = info->xm, ym = info->ym, mx = info->mx, my = info->my; 36c5566c22SJunchao Zhang PetscReal user_param = user->param; 37c5566c22SJunchao Zhang 38c5566c22SJunchao Zhang ConstPetscScalarKokkosOffsetView2D xv; 39c5566c22SJunchao Zhang PetscScalarKokkosOffsetView2D fv; 40c5566c22SJunchao Zhang 41c5566c22SJunchao Zhang PetscFunctionBeginUser; 42c5566c22SJunchao Zhang lambda = user->param; 43c5566c22SJunchao Zhang hx = 1.0 / (PetscReal)(info->mx - 1); 44c5566c22SJunchao Zhang hy = 1.0 / (PetscReal)(info->my - 1); 45c5566c22SJunchao Zhang hxdhy = hx / hy; 46c5566c22SJunchao Zhang hydhx = hy / hx; 47c5566c22SJunchao Zhang /* 48c5566c22SJunchao Zhang Compute function over the locally owned part of the grid 49c5566c22SJunchao Zhang */ 50c5566c22SJunchao Zhang PetscCallCXX(DMDAVecGetKokkosOffsetView(info->da, x, &xv)); 51c5566c22SJunchao Zhang PetscCallCXX(DMDAVecGetKokkosOffsetViewWrite(info->da, f, &fv)); 52c5566c22SJunchao Zhang 53*9371c9d4SSatish Balay PetscCallCXX(Kokkos::parallel_for( 54*9371c9d4SSatish Balay "FormFunctionLocalVec", MDRangePolicy<Rank<2, Iterate::Right, Iterate::Right>>({ys, xs}, {ys + ym, xs + xm}), KOKKOS_LAMBDA(PetscInt j, PetscInt i) { 55c5566c22SJunchao Zhang DMDACoor2d c; 56c5566c22SJunchao Zhang PetscScalar u, ue, uw, un, us, uxx, uyy, mms_solution, mms_forcing; 57c5566c22SJunchao Zhang 58c5566c22SJunchao Zhang if (i == 0 || j == 0 || i == mx - 1 || j == my - 1) { 59*9371c9d4SSatish Balay c.x = i * hx; 60*9371c9d4SSatish Balay c.y = j * hy; 61c5566c22SJunchao Zhang MMSSolution1(user, &c, &mms_solution); 62c5566c22SJunchao Zhang fv(j, i) = 2.0 * (hydhx + hxdhy) * (xv(j, i) - mms_solution); 63c5566c22SJunchao Zhang } else { 64c5566c22SJunchao Zhang u = xv(j, i); 65c5566c22SJunchao Zhang uw = xv(j, i - 1); 66c5566c22SJunchao Zhang ue = xv(j, i + 1); 67c5566c22SJunchao Zhang un = xv(j - 1, i); 68c5566c22SJunchao Zhang us = xv(j + 1, i); 69c5566c22SJunchao Zhang 70c5566c22SJunchao Zhang /* Enforce boundary conditions at neighboring points -- setting these values causes the Jacobian to be symmetric. */ 71*9371c9d4SSatish Balay if (i - 1 == 0) { 72*9371c9d4SSatish Balay c.x = (i - 1) * hx; 73*9371c9d4SSatish Balay c.y = j * hy; 74*9371c9d4SSatish Balay MMSSolution1(user, &c, &uw); 75*9371c9d4SSatish Balay } 76*9371c9d4SSatish Balay if (i + 1 == mx - 1) { 77*9371c9d4SSatish Balay c.x = (i + 1) * hx; 78*9371c9d4SSatish Balay c.y = j * hy; 79*9371c9d4SSatish Balay MMSSolution1(user, &c, &ue); 80*9371c9d4SSatish Balay } 81*9371c9d4SSatish Balay if (j - 1 == 0) { 82*9371c9d4SSatish Balay c.x = i * hx; 83*9371c9d4SSatish Balay c.y = (j - 1) * hy; 84*9371c9d4SSatish Balay MMSSolution1(user, &c, &un); 85*9371c9d4SSatish Balay } 86*9371c9d4SSatish Balay if (j + 1 == my - 1) { 87*9371c9d4SSatish Balay c.x = i * hx; 88*9371c9d4SSatish Balay c.y = (j + 1) * hy; 89*9371c9d4SSatish Balay MMSSolution1(user, &c, &us); 90*9371c9d4SSatish Balay } 91c5566c22SJunchao Zhang 92c5566c22SJunchao Zhang uxx = (2.0 * u - uw - ue) * hydhx; 93c5566c22SJunchao Zhang uyy = (2.0 * u - un - us) * hxdhy; 94c5566c22SJunchao Zhang mms_forcing = 0; 95*9371c9d4SSatish Balay c.x = i * hx; 96*9371c9d4SSatish Balay c.y = j * hy; 97c5566c22SJunchao Zhang MMSForcing1(user_param, &c, &mms_forcing); 98c5566c22SJunchao Zhang fv(j, i) = uxx + uyy - hx * hy * (lambda * PetscExpScalar(u) + mms_forcing); 99c5566c22SJunchao Zhang } 100c5566c22SJunchao Zhang })); 101c5566c22SJunchao Zhang 102c5566c22SJunchao Zhang PetscCallCXX(DMDAVecRestoreKokkosOffsetView(info->da, x, &xv)); 103c5566c22SJunchao Zhang PetscCallCXX(DMDAVecRestoreKokkosOffsetViewWrite(info->da, f, &fv)); 104c5566c22SJunchao Zhang 105c5566c22SJunchao Zhang PetscCall(PetscLogFlops(11.0 * info->ym * info->xm)); 106c5566c22SJunchao Zhang PetscFunctionReturn(0); 107c5566c22SJunchao Zhang } 108c5566c22SJunchao Zhang 109*9371c9d4SSatish Balay PetscErrorCode FormObjectiveLocalVec(DMDALocalInfo *info, Vec x, PetscReal *obj, AppCtx *user) { 110c5566c22SJunchao Zhang PetscInt xs = info->xs, ys = info->ys, xm = info->xm, ym = info->ym, mx = info->mx, my = info->my; 111c5566c22SJunchao Zhang PetscReal lambda, hx, hy, hxdhy, hydhx, sc, lobj = 0; 112c5566c22SJunchao Zhang MPI_Comm comm; 113c5566c22SJunchao Zhang 114c5566c22SJunchao Zhang ConstPetscScalarKokkosOffsetView2D xv; 115c5566c22SJunchao Zhang 116c5566c22SJunchao Zhang PetscFunctionBeginUser; 117c5566c22SJunchao Zhang *obj = 0; 118c5566c22SJunchao Zhang PetscCall(PetscObjectGetComm((PetscObject)info->da, &comm)); 119c5566c22SJunchao Zhang lambda = user->param; 120c5566c22SJunchao Zhang hx = 1.0 / (PetscReal)(mx - 1); 121c5566c22SJunchao Zhang hy = 1.0 / (PetscReal)(my - 1); 122c5566c22SJunchao Zhang sc = hx * hy * lambda; 123c5566c22SJunchao Zhang hxdhy = hx / hy; 124c5566c22SJunchao Zhang hydhx = hy / hx; 125c5566c22SJunchao Zhang /* 126c5566c22SJunchao Zhang Compute function over the locally owned part of the grid 127c5566c22SJunchao Zhang */ 128c5566c22SJunchao Zhang PetscCallCXX(DMDAVecGetKokkosOffsetView(info->da, x, &xv)); 129c5566c22SJunchao Zhang 130*9371c9d4SSatish Balay PetscCallCXX(Kokkos::parallel_reduce( 131*9371c9d4SSatish Balay "FormObjectiveLocalVec", MDRangePolicy<Rank<2, Iterate::Right, Iterate::Right>>({ys, xs}, {ys + ym, xs + xm}), 132*9371c9d4SSatish Balay KOKKOS_LAMBDA(PetscInt j, PetscInt i, PetscReal & update) { 133c5566c22SJunchao Zhang PetscScalar u, ue, uw, un, us, uxux, uyuy; 134c5566c22SJunchao Zhang if (i == 0 || j == 0 || i == mx - 1 || j == my - 1) { 135c5566c22SJunchao Zhang update += PetscRealPart((hydhx + hxdhy) * xv(j, i) * xv(j, i)); 136c5566c22SJunchao Zhang } else { 137c5566c22SJunchao Zhang u = xv(j, i); 138c5566c22SJunchao Zhang uw = xv(j, i - 1); 139c5566c22SJunchao Zhang ue = xv(j, i + 1); 140c5566c22SJunchao Zhang un = xv(j - 1, i); 141c5566c22SJunchao Zhang us = xv(j + 1, i); 142c5566c22SJunchao Zhang 143c5566c22SJunchao Zhang if (i - 1 == 0) uw = 0.; 144c5566c22SJunchao Zhang if (i + 1 == mx - 1) ue = 0.; 145c5566c22SJunchao Zhang if (j - 1 == 0) un = 0.; 146c5566c22SJunchao Zhang if (j + 1 == my - 1) us = 0.; 147c5566c22SJunchao Zhang 148c5566c22SJunchao Zhang /* F[u] = 1/2\int_{\omega}\nabla^2u(x)*u(x)*dx */ 149c5566c22SJunchao Zhang 150c5566c22SJunchao Zhang uxux = u * (2. * u - ue - uw) * hydhx; 151c5566c22SJunchao Zhang uyuy = u * (2. * u - un - us) * hxdhy; 152c5566c22SJunchao Zhang 153c5566c22SJunchao Zhang update += PetscRealPart(0.5 * (uxux + uyuy) - sc * PetscExpScalar(u)); 154c5566c22SJunchao Zhang } 155*9371c9d4SSatish Balay }, 156*9371c9d4SSatish Balay lobj)); 157c5566c22SJunchao Zhang 158c5566c22SJunchao Zhang PetscCallCXX(DMDAVecRestoreKokkosOffsetView(info->da, x, &xv)); 159c5566c22SJunchao Zhang PetscCall(PetscLogFlops(12.0 * info->ym * info->xm)); 160c5566c22SJunchao Zhang PetscCallMPI(MPI_Allreduce(&lobj, obj, 1, MPIU_REAL, MPIU_SUM, comm)); 161c5566c22SJunchao Zhang PetscFunctionReturn(0); 162c5566c22SJunchao Zhang } 163c5566c22SJunchao Zhang 164*9371c9d4SSatish Balay PetscErrorCode FormJacobianLocalVec(DMDALocalInfo *info, Vec x, Mat jac, Mat jacpre, AppCtx *user) { 165c5566c22SJunchao Zhang PetscInt i, j; 166c5566c22SJunchao Zhang PetscInt xs = info->xs, ys = info->ys, xm = info->xm, ym = info->ym, mx = info->mx, my = info->my; 167c5566c22SJunchao Zhang MatStencil col[5], row; 168c5566c22SJunchao Zhang PetscScalar lambda, hx, hy, hxdhy, hydhx, sc; 169c5566c22SJunchao Zhang DM coordDA; 170c5566c22SJunchao Zhang Vec coordinates; 171c5566c22SJunchao Zhang DMDACoor2d **coords; 172c5566c22SJunchao Zhang 173c5566c22SJunchao Zhang PetscFunctionBeginUser; 174c5566c22SJunchao Zhang lambda = user->param; 175c5566c22SJunchao Zhang /* Extract coordinates */ 176c5566c22SJunchao Zhang PetscCall(DMGetCoordinateDM(info->da, &coordDA)); 177c5566c22SJunchao Zhang PetscCall(DMGetCoordinates(info->da, &coordinates)); 178c5566c22SJunchao Zhang 179c5566c22SJunchao Zhang PetscCall(DMDAVecGetArray(coordDA, coordinates, &coords)); 180c5566c22SJunchao Zhang hx = xm > 1 ? PetscRealPart(coords[ys][xs + 1].x) - PetscRealPart(coords[ys][xs].x) : 1.0; 181c5566c22SJunchao Zhang hy = ym > 1 ? PetscRealPart(coords[ys + 1][xs].y) - PetscRealPart(coords[ys][xs].y) : 1.0; 182c5566c22SJunchao Zhang PetscCall(DMDAVecRestoreArray(coordDA, coordinates, &coords)); 183c5566c22SJunchao Zhang 184c5566c22SJunchao Zhang hxdhy = hx / hy; 185c5566c22SJunchao Zhang hydhx = hy / hx; 186c5566c22SJunchao Zhang sc = hx * hy * lambda; 187c5566c22SJunchao Zhang 188c5566c22SJunchao Zhang /* ----------------------------------------- */ 189c5566c22SJunchao Zhang /* MatSetPreallocationCOO() */ 190c5566c22SJunchao Zhang /* ----------------------------------------- */ 191c5566c22SJunchao Zhang PetscCount ncoo = ((PetscCount)xm) * ((PetscCount)ym) * 5; 192c5566c22SJunchao Zhang PetscInt *coo_i, *coo_j, *ip, *jp; 193c5566c22SJunchao Zhang PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); /* 5-point stencil such that each row has at most 5 nonzeros */ 194c5566c22SJunchao Zhang 195c5566c22SJunchao Zhang ip = coo_i; 196c5566c22SJunchao Zhang jp = coo_j; 197c5566c22SJunchao Zhang for (j = ys; j < ys + ym; j++) { 198c5566c22SJunchao Zhang for (i = xs; i < xs + xm; i++) { 199*9371c9d4SSatish Balay row.j = j; 200*9371c9d4SSatish Balay row.i = i; 201c5566c22SJunchao Zhang /* Initialize neighbors with negative indices */ 202c5566c22SJunchao Zhang col[0].j = col[1].j = col[3].j = col[4].j = -1; 203c5566c22SJunchao Zhang /* boundary points */ 204c5566c22SJunchao Zhang if (i == 0 || j == 0 || i == mx - 1 || j == my - 1) { 205c5566c22SJunchao Zhang col[2].j = row.j; 206c5566c22SJunchao Zhang col[2].i = row.i; 207c5566c22SJunchao Zhang } else { 208c5566c22SJunchao Zhang /* interior grid points */ 209c5566c22SJunchao Zhang if (j - 1 != 0) { 210c5566c22SJunchao Zhang col[0].j = j - 1; 211c5566c22SJunchao Zhang col[0].i = i; 212c5566c22SJunchao Zhang } 213c5566c22SJunchao Zhang 214c5566c22SJunchao Zhang if (i - 1 != 0) { 215c5566c22SJunchao Zhang col[1].j = j; 216c5566c22SJunchao Zhang col[1].i = i - 1; 217c5566c22SJunchao Zhang } 218c5566c22SJunchao Zhang 219c5566c22SJunchao Zhang col[2].j = row.j; 220c5566c22SJunchao Zhang col[2].i = row.i; 221c5566c22SJunchao Zhang 222c5566c22SJunchao Zhang if (i + 1 != mx - 1) { 223c5566c22SJunchao Zhang col[3].j = j; 224c5566c22SJunchao Zhang col[3].i = i + 1; 225c5566c22SJunchao Zhang } 226c5566c22SJunchao Zhang 227c5566c22SJunchao Zhang if (j + 1 != mx - 1) { 228c5566c22SJunchao Zhang col[4].j = j + 1; 229c5566c22SJunchao Zhang col[4].i = i; 230c5566c22SJunchao Zhang } 231c5566c22SJunchao Zhang } 232c5566c22SJunchao Zhang PetscCall(DMDAMapMatStencilToGlobal(info->da, 5, col, jp)); 233c5566c22SJunchao Zhang for (PetscInt k = 0; k < 5; k++) ip[k] = jp[2]; 234c5566c22SJunchao Zhang ip += 5; 235c5566c22SJunchao Zhang jp += 5; 236c5566c22SJunchao Zhang } 237c5566c22SJunchao Zhang } 238c5566c22SJunchao Zhang 239c5566c22SJunchao Zhang PetscCall(MatSetPreallocationCOO(jacpre, ncoo, coo_i, coo_j)); 240c5566c22SJunchao Zhang PetscCall(PetscFree2(coo_i, coo_j)); 241c5566c22SJunchao Zhang 242c5566c22SJunchao Zhang /* ----------------------------------------- */ 243c5566c22SJunchao Zhang /* MatSetValuesCOO() */ 244c5566c22SJunchao Zhang /* ----------------------------------------- */ 245c5566c22SJunchao Zhang PetscScalarKokkosView coo_v("coo_v", ncoo); 246c5566c22SJunchao Zhang ConstPetscScalarKokkosOffsetView2D xv; 247c5566c22SJunchao Zhang 248c5566c22SJunchao Zhang PetscCallCXX(DMDAVecGetKokkosOffsetView(info->da, x, &xv)); 249c5566c22SJunchao Zhang 250*9371c9d4SSatish Balay PetscCallCXX(Kokkos::parallel_for( 251*9371c9d4SSatish Balay "FormFunctionLocalVec", MDRangePolicy<Rank<2, Iterate::Right, Iterate::Right>>({ys, xs}, {ys + ym, xs + xm}), KOKKOS_LAMBDA(PetscCount j, PetscCount i) { 252c5566c22SJunchao Zhang PetscInt p = ((j - ys) * xm + (i - xs)) * 5; 253c5566c22SJunchao Zhang /* boundary points */ 254c5566c22SJunchao Zhang if (i == 0 || j == 0 || i == mx - 1 || j == my - 1) { 255c5566c22SJunchao Zhang coo_v(p + 2) = 2.0 * (hydhx + hxdhy); 256c5566c22SJunchao Zhang } else { 257c5566c22SJunchao Zhang /* interior grid points */ 258*9371c9d4SSatish Balay if (j - 1 != 0) { coo_v(p + 0) = -hxdhy; } 259*9371c9d4SSatish Balay if (i - 1 != 0) { coo_v(p + 1) = -hydhx; } 260c5566c22SJunchao Zhang 261c5566c22SJunchao Zhang coo_v(p + 2) = 2.0 * (hydhx + hxdhy) - sc * PetscExpScalar(xv(j, i)); 262c5566c22SJunchao Zhang 263*9371c9d4SSatish Balay if (i + 1 != mx - 1) { coo_v(p + 3) = -hydhx; } 264*9371c9d4SSatish Balay if (j + 1 != mx - 1) { coo_v(p + 4) = -hxdhy; } 265c5566c22SJunchao Zhang } 266c5566c22SJunchao Zhang })); 267c5566c22SJunchao Zhang PetscCall(MatSetValuesCOO(jacpre, coo_v.data(), INSERT_VALUES)); 268c5566c22SJunchao Zhang PetscCallCXX(DMDAVecRestoreKokkosOffsetView(info->da, x, &xv)); 269c5566c22SJunchao Zhang PetscFunctionReturn(0); 270c5566c22SJunchao Zhang } 271