1*c0c276a7Ssdargavi static char help[] = "Testing MatCreateSeqAIJKokkosWithKokkosViews() and building Mat on the device.\n\n"; 2*c0c276a7Ssdargavi 3*c0c276a7Ssdargavi #include <petscvec_kokkos.hpp> 4*c0c276a7Ssdargavi #include <petscdevice.h> 5*c0c276a7Ssdargavi #include <petscmat.h> 6*c0c276a7Ssdargavi #include <petscmat_kokkos.hpp> 7*c0c276a7Ssdargavi #include <Kokkos_Core.hpp> 8*c0c276a7Ssdargavi #include <Kokkos_DualView.hpp> 9*c0c276a7Ssdargavi 10*c0c276a7Ssdargavi using HostMirrorMemorySpace = Kokkos::DualView<PetscScalar *>::host_mirror_space::memory_space; 11*c0c276a7Ssdargavi 12*c0c276a7Ssdargavi int main(int argc, char **argv) 13*c0c276a7Ssdargavi { 14*c0c276a7Ssdargavi Mat A, B; 15*c0c276a7Ssdargavi PetscInt i, j, column, M, N, m, n, m_ab, n_ab; 16*c0c276a7Ssdargavi PetscInt *di, *dj, *oi, *oj, nd; 17*c0c276a7Ssdargavi const PetscInt *garray; 18*c0c276a7Ssdargavi PetscInt *garray_h; 19*c0c276a7Ssdargavi PetscScalar *oa, *da; 20*c0c276a7Ssdargavi PetscScalar value; 21*c0c276a7Ssdargavi PetscRandom rctx; 22*c0c276a7Ssdargavi PetscBool equal, done; 23*c0c276a7Ssdargavi Mat AA, AB; 24*c0c276a7Ssdargavi PetscMPIInt size, rank; 25*c0c276a7Ssdargavi 26*c0c276a7Ssdargavi // ~~~~~~~~~~~~~~~~~~~~~ 27*c0c276a7Ssdargavi // This test shows the routines needed to build a kokkos matrix without preallocation 28*c0c276a7Ssdargavi // on the host 29*c0c276a7Ssdargavi // ~~~~~~~~~~~~~~~~~~~~~ 30*c0c276a7Ssdargavi 31*c0c276a7Ssdargavi PetscFunctionBeginUser; 32*c0c276a7Ssdargavi PetscCall(PetscInitialize(&argc, &argv, NULL, help)); 33*c0c276a7Ssdargavi PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 34*c0c276a7Ssdargavi PetscCheck(size > 1, PETSC_COMM_WORLD, PETSC_ERR_WRONG_MPI_SIZE, "Must run with 2 or more processes"); 35*c0c276a7Ssdargavi PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank)); 36*c0c276a7Ssdargavi 37*c0c276a7Ssdargavi /* Create a mpiaij matrix for checking */ 38*c0c276a7Ssdargavi PetscCall(MatCreateAIJ(PETSC_COMM_WORLD, 5, 5, PETSC_DECIDE, PETSC_DECIDE, 0, NULL, 0, NULL, &A)); 39*c0c276a7Ssdargavi PetscCall(MatSetFromOptions(A)); 40*c0c276a7Ssdargavi PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 41*c0c276a7Ssdargavi PetscCall(MatSetUp(A)); 42*c0c276a7Ssdargavi PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rctx)); 43*c0c276a7Ssdargavi PetscCall(PetscRandomSetFromOptions(rctx)); 44*c0c276a7Ssdargavi 45*c0c276a7Ssdargavi for (i = 5 * rank; i < 5 * rank + 5; i++) { 46*c0c276a7Ssdargavi for (j = 0; j < 5 * size; j++) { 47*c0c276a7Ssdargavi PetscCall(PetscRandomGetValue(rctx, &value)); 48*c0c276a7Ssdargavi column = (PetscInt)(5 * size * PetscRealPart(value)); 49*c0c276a7Ssdargavi PetscCall(PetscRandomGetValue(rctx, &value)); 50*c0c276a7Ssdargavi PetscCall(MatSetValues(A, 1, &i, 1, &column, &value, INSERT_VALUES)); 51*c0c276a7Ssdargavi } 52*c0c276a7Ssdargavi } 53*c0c276a7Ssdargavi PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 54*c0c276a7Ssdargavi PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 55*c0c276a7Ssdargavi 56*c0c276a7Ssdargavi PetscCall(MatGetSize(A, &M, &N)); 57*c0c276a7Ssdargavi PetscCall(MatGetLocalSize(A, &m, &n)); 58*c0c276a7Ssdargavi PetscCall(MatMPIAIJGetSeqAIJ(A, &AA, &AB, &garray)); 59*c0c276a7Ssdargavi PetscCall(MatGetRowIJ(AA, 0, PETSC_FALSE, PETSC_FALSE, &nd, (const PetscInt **)&di, (const PetscInt **)&dj, &done)); 60*c0c276a7Ssdargavi PetscCall(MatSeqAIJGetArray(AA, &da)); 61*c0c276a7Ssdargavi PetscCall(MatGetRowIJ(AB, 0, PETSC_FALSE, PETSC_FALSE, &nd, (const PetscInt **)&oi, (const PetscInt **)&oj, &done)); 62*c0c276a7Ssdargavi PetscCall(MatSeqAIJGetArray(AB, &oa)); 63*c0c276a7Ssdargavi PetscCall(MatGetSize(AB, &m_ab, &n_ab)); 64*c0c276a7Ssdargavi 65*c0c276a7Ssdargavi Mat output_mat_local, output_mat_nonlocal; 66*c0c276a7Ssdargavi // Be careful about scope given the kokkos memory reference counts 67*c0c276a7Ssdargavi { 68*c0c276a7Ssdargavi // Local 69*c0c276a7Ssdargavi Kokkos::View<PetscScalar *> a_local_d; 70*c0c276a7Ssdargavi Kokkos::View<PetscInt *> i_local_d; 71*c0c276a7Ssdargavi Kokkos::View<PetscInt *> j_local_d; 72*c0c276a7Ssdargavi 73*c0c276a7Ssdargavi // Nonlocal 74*c0c276a7Ssdargavi Kokkos::View<PetscScalar *> a_nonlocal_d; 75*c0c276a7Ssdargavi Kokkos::View<PetscInt *> i_nonlocal_d; 76*c0c276a7Ssdargavi Kokkos::View<PetscInt *> j_nonlocal_d; 77*c0c276a7Ssdargavi 78*c0c276a7Ssdargavi // Create device memory 79*c0c276a7Ssdargavi PetscCallCXX(a_local_d = Kokkos::View<PetscScalar *>("a_local_d", di[5])); 80*c0c276a7Ssdargavi PetscCallCXX(i_local_d = Kokkos::View<PetscInt *>("i_local_d", m + 1)); 81*c0c276a7Ssdargavi PetscCallCXX(j_local_d = Kokkos::View<PetscInt *>("j_local_d", di[5])); 82*c0c276a7Ssdargavi 83*c0c276a7Ssdargavi // Create non-local device memory 84*c0c276a7Ssdargavi PetscCallCXX(a_nonlocal_d = Kokkos::View<PetscScalar *>("a_nonlocal_d", oi[5])); 85*c0c276a7Ssdargavi PetscCallCXX(i_nonlocal_d = Kokkos::View<PetscInt *>("i_nonlocal_d", m + 1)); 86*c0c276a7Ssdargavi PetscCallCXX(j_nonlocal_d = Kokkos::View<PetscInt *>("j_nonlocal_d", oi[5])); 87*c0c276a7Ssdargavi 88*c0c276a7Ssdargavi // ~~~~~~~~~~~~~~~~~~~~~ 89*c0c276a7Ssdargavi // Could fill the aij on the device - we're just going to test 90*c0c276a7Ssdargavi // by copying in the existing host values 91*c0c276a7Ssdargavi // ~~~~~~~~~~~~~~~~~~~~~ 92*c0c276a7Ssdargavi Kokkos::View<PetscScalar *, HostMirrorMemorySpace> a_local_h; 93*c0c276a7Ssdargavi Kokkos::View<PetscInt *, HostMirrorMemorySpace> i_local_h; 94*c0c276a7Ssdargavi Kokkos::View<PetscInt *, HostMirrorMemorySpace> j_local_h; 95*c0c276a7Ssdargavi Kokkos::View<PetscScalar *, HostMirrorMemorySpace> a_nonlocal_h; 96*c0c276a7Ssdargavi Kokkos::View<PetscInt *, HostMirrorMemorySpace> i_nonlocal_h; 97*c0c276a7Ssdargavi Kokkos::View<PetscInt *, HostMirrorMemorySpace> j_nonlocal_h; 98*c0c276a7Ssdargavi 99*c0c276a7Ssdargavi PetscCallCXX(a_local_h = Kokkos::View<PetscScalar *, HostMirrorMemorySpace>(da, di[5])); 100*c0c276a7Ssdargavi PetscCallCXX(i_local_h = Kokkos::View<PetscInt *, HostMirrorMemorySpace>(di, m + 1)); 101*c0c276a7Ssdargavi PetscCallCXX(j_local_h = Kokkos::View<PetscInt *, HostMirrorMemorySpace>(dj, di[5])); 102*c0c276a7Ssdargavi PetscCallCXX(a_nonlocal_h = Kokkos::View<PetscScalar *, HostMirrorMemorySpace>(oa, oi[5])); 103*c0c276a7Ssdargavi PetscCallCXX(i_nonlocal_h = Kokkos::View<PetscInt *, HostMirrorMemorySpace>(oi, m + 1)); 104*c0c276a7Ssdargavi PetscCallCXX(j_nonlocal_h = Kokkos::View<PetscInt *, HostMirrorMemorySpace>(oj, oi[5])); 105*c0c276a7Ssdargavi 106*c0c276a7Ssdargavi // Haven't specified an exec space so these will all be synchronous 107*c0c276a7Ssdargavi // and finish without a need to call fence after 108*c0c276a7Ssdargavi PetscCallCXX(Kokkos::deep_copy(a_local_d, a_local_h)); 109*c0c276a7Ssdargavi PetscCallCXX(Kokkos::deep_copy(i_local_d, i_local_h)); 110*c0c276a7Ssdargavi PetscCallCXX(Kokkos::deep_copy(j_local_d, j_local_h)); 111*c0c276a7Ssdargavi PetscCallCXX(Kokkos::deep_copy(a_nonlocal_d, a_nonlocal_h)); 112*c0c276a7Ssdargavi PetscCallCXX(Kokkos::deep_copy(i_nonlocal_d, i_nonlocal_h)); 113*c0c276a7Ssdargavi PetscCallCXX(Kokkos::deep_copy(j_nonlocal_d, j_nonlocal_h)); 114*c0c276a7Ssdargavi 115*c0c276a7Ssdargavi // The garray passed in has to be on the host, but it can be created 116*c0c276a7Ssdargavi // on device and copied to the host 117*c0c276a7Ssdargavi // We're just going to copy the existing host values here 118*c0c276a7Ssdargavi PetscCall(PetscMalloc1(n_ab, &garray_h)); 119*c0c276a7Ssdargavi for (int i = 0; i < n_ab; i++) { garray_h[i] = garray[i]; } 120*c0c276a7Ssdargavi 121*c0c276a7Ssdargavi // ~~~~~~~~~~~~~~~~~~~~~ 122*c0c276a7Ssdargavi 123*c0c276a7Ssdargavi // ~~~~~~~~~~~~~~~~~ 124*c0c276a7Ssdargavi // Test MatCreateSeqAIJKokkosWithKokkosViews 125*c0c276a7Ssdargavi // ~~~~~~~~~~~~~~~~~ 126*c0c276a7Ssdargavi 127*c0c276a7Ssdargavi // We can create our local diagonal block matrix directly on the device 128*c0c276a7Ssdargavi PetscCall(MatCreateSeqAIJKokkosWithKokkosViews(PETSC_COMM_SELF, m, n, i_local_d, j_local_d, a_local_d, &output_mat_local)); 129*c0c276a7Ssdargavi 130*c0c276a7Ssdargavi // We can create our nonlocal diagonal block matrix directly on the device 131*c0c276a7Ssdargavi PetscCall(MatCreateSeqAIJKokkosWithKokkosViews(PETSC_COMM_SELF, m, n_ab, i_nonlocal_d, j_nonlocal_d, a_nonlocal_d, &output_mat_nonlocal)); 132*c0c276a7Ssdargavi 133*c0c276a7Ssdargavi // Build our MPI matrix 134*c0c276a7Ssdargavi // If we provide garray and output_mat_nonlocal with local indices and the compactified size 135*c0c276a7Ssdargavi // almost nothing happens on the host 136*c0c276a7Ssdargavi PetscCall(MatCreateMPIAIJWithSeqAIJ(PETSC_COMM_WORLD, M, N, output_mat_local, output_mat_nonlocal, garray_h, &B)); 137*c0c276a7Ssdargavi 138*c0c276a7Ssdargavi PetscCall(MatEqual(A, B, &equal)); 139*c0c276a7Ssdargavi PetscCall(MatRestoreRowIJ(AA, 0, PETSC_FALSE, PETSC_FALSE, &nd, (const PetscInt **)&di, (const PetscInt **)&dj, &done)); 140*c0c276a7Ssdargavi PetscCall(MatSeqAIJRestoreArray(AA, &da)); 141*c0c276a7Ssdargavi PetscCall(MatRestoreRowIJ(AB, 0, PETSC_FALSE, PETSC_FALSE, &nd, (const PetscInt **)&oi, (const PetscInt **)&oj, &done)); 142*c0c276a7Ssdargavi PetscCall(MatSeqAIJRestoreArray(AB, &oa)); 143*c0c276a7Ssdargavi 144*c0c276a7Ssdargavi PetscCheck(equal, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Likely a bug in MatCreateSeqAIJKokkosWithKokkosViews()"); 145*c0c276a7Ssdargavi PetscCall(MatDestroy(&B)); 146*c0c276a7Ssdargavi 147*c0c276a7Ssdargavi // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 148*c0c276a7Ssdargavi 149*c0c276a7Ssdargavi /* Free spaces */ 150*c0c276a7Ssdargavi PetscCall(PetscRandomDestroy(&rctx)); 151*c0c276a7Ssdargavi PetscCall(MatDestroy(&A)); 152*c0c276a7Ssdargavi } 153*c0c276a7Ssdargavi PetscCall(PetscFinalize()); 154*c0c276a7Ssdargavi return 0; 155*c0c276a7Ssdargavi } 156*c0c276a7Ssdargavi 157*c0c276a7Ssdargavi /*TEST 158*c0c276a7Ssdargavi build: 159*c0c276a7Ssdargavi requires: kokkos_kernels 160*c0c276a7Ssdargavi 161*c0c276a7Ssdargavi test: 162*c0c276a7Ssdargavi nsize: 2 163*c0c276a7Ssdargavi args: -mat_type aijkokkos 164*c0c276a7Ssdargavi requires: kokkos_kernels 165*c0c276a7Ssdargavi output_file: output/empty.out 166*c0c276a7Ssdargavi 167*c0c276a7Ssdargavi TEST*/ 168