1c0c276a7Ssdargavi static char help[] = "Testing MatCreateSeqAIJKokkosWithKokkosViews() and building Mat on the device.\n\n"; 2c0c276a7Ssdargavi 3c0c276a7Ssdargavi #include <petscvec_kokkos.hpp> 4c0c276a7Ssdargavi #include <petscdevice.h> 5c0c276a7Ssdargavi #include <petscmat.h> 6c0c276a7Ssdargavi #include <petscmat_kokkos.hpp> 7c0c276a7Ssdargavi #include <Kokkos_Core.hpp> 8c0c276a7Ssdargavi #include <Kokkos_DualView.hpp> 9c0c276a7Ssdargavi 10c0c276a7Ssdargavi using HostMirrorMemorySpace = Kokkos::DualView<PetscScalar *>::host_mirror_space::memory_space; 11c0c276a7Ssdargavi 12c0c276a7Ssdargavi int main(int argc, char **argv) 13c0c276a7Ssdargavi { 14ecd797f4SJunchao Zhang Mat A, B, BT; 15c0c276a7Ssdargavi PetscInt i, j, column, M, N, m, n, m_ab, n_ab; 16c0c276a7Ssdargavi PetscInt *di, *dj, *oi, *oj, nd; 17c0c276a7Ssdargavi const PetscInt *garray; 18c0c276a7Ssdargavi PetscInt *garray_h; 19c0c276a7Ssdargavi PetscScalar *oa, *da; 20c0c276a7Ssdargavi PetscScalar value; 21c0c276a7Ssdargavi PetscRandom rctx; 22c0c276a7Ssdargavi PetscBool equal, done; 23c0c276a7Ssdargavi Mat AA, AB; 24c0c276a7Ssdargavi PetscMPIInt size, rank; 25c0c276a7Ssdargavi 26c0c276a7Ssdargavi // ~~~~~~~~~~~~~~~~~~~~~ 27c0c276a7Ssdargavi // This test shows the routines needed to build a kokkos matrix without preallocation 28c0c276a7Ssdargavi // on the host 29c0c276a7Ssdargavi // ~~~~~~~~~~~~~~~~~~~~~ 30c0c276a7Ssdargavi 31c0c276a7Ssdargavi PetscFunctionBeginUser; 32c0c276a7Ssdargavi PetscCall(PetscInitialize(&argc, &argv, NULL, help)); 33c0c276a7Ssdargavi PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 34c0c276a7Ssdargavi PetscCheck(size > 1, PETSC_COMM_WORLD, PETSC_ERR_WRONG_MPI_SIZE, "Must run with 2 or more processes"); 35c0c276a7Ssdargavi PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank)); 36c0c276a7Ssdargavi 37c0c276a7Ssdargavi /* Create a mpiaij matrix for checking */ 38c0c276a7Ssdargavi PetscCall(MatCreateAIJ(PETSC_COMM_WORLD, 5, 5, PETSC_DECIDE, PETSC_DECIDE, 0, NULL, 0, NULL, &A)); 39c0c276a7Ssdargavi PetscCall(MatSetFromOptions(A)); 40c0c276a7Ssdargavi PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 41c0c276a7Ssdargavi PetscCall(MatSetUp(A)); 42c0c276a7Ssdargavi PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rctx)); 43c0c276a7Ssdargavi PetscCall(PetscRandomSetFromOptions(rctx)); 44c0c276a7Ssdargavi 45c0c276a7Ssdargavi for (i = 5 * rank; i < 5 * rank + 5; i++) { 46c0c276a7Ssdargavi for (j = 0; j < 5 * size; j++) { 47c0c276a7Ssdargavi PetscCall(PetscRandomGetValue(rctx, &value)); 48c0c276a7Ssdargavi column = (PetscInt)(5 * size * PetscRealPart(value)); 49ecd797f4SJunchao Zhang 50ecd797f4SJunchao Zhang // rank 0 has no off-process entries 51ecd797f4SJunchao Zhang if (rank == 0 && (column < i || column >= i)) column = i; 52ecd797f4SJunchao Zhang 53c0c276a7Ssdargavi PetscCall(PetscRandomGetValue(rctx, &value)); 54c0c276a7Ssdargavi PetscCall(MatSetValues(A, 1, &i, 1, &column, &value, INSERT_VALUES)); 55c0c276a7Ssdargavi } 56c0c276a7Ssdargavi } 57c0c276a7Ssdargavi PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 58c0c276a7Ssdargavi PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 59c0c276a7Ssdargavi 60c0c276a7Ssdargavi PetscCall(MatGetSize(A, &M, &N)); 61c0c276a7Ssdargavi PetscCall(MatGetLocalSize(A, &m, &n)); 62c0c276a7Ssdargavi PetscCall(MatMPIAIJGetSeqAIJ(A, &AA, &AB, &garray)); 63c0c276a7Ssdargavi PetscCall(MatGetRowIJ(AA, 0, PETSC_FALSE, PETSC_FALSE, &nd, (const PetscInt **)&di, (const PetscInt **)&dj, &done)); 64c0c276a7Ssdargavi PetscCall(MatSeqAIJGetArray(AA, &da)); 65c0c276a7Ssdargavi PetscCall(MatGetRowIJ(AB, 0, PETSC_FALSE, PETSC_FALSE, &nd, (const PetscInt **)&oi, (const PetscInt **)&oj, &done)); 66c0c276a7Ssdargavi PetscCall(MatSeqAIJGetArray(AB, &oa)); 67c0c276a7Ssdargavi PetscCall(MatGetSize(AB, &m_ab, &n_ab)); 68c0c276a7Ssdargavi 69c0c276a7Ssdargavi Mat output_mat_local, output_mat_nonlocal; 70c0c276a7Ssdargavi // Be careful about scope given the kokkos memory reference counts 71c0c276a7Ssdargavi { 72c0c276a7Ssdargavi // Local 73c0c276a7Ssdargavi Kokkos::View<PetscScalar *> a_local_d; 74c0c276a7Ssdargavi Kokkos::View<PetscInt *> i_local_d; 75c0c276a7Ssdargavi Kokkos::View<PetscInt *> j_local_d; 76c0c276a7Ssdargavi 77c0c276a7Ssdargavi // Nonlocal 78c0c276a7Ssdargavi Kokkos::View<PetscScalar *> a_nonlocal_d; 79c0c276a7Ssdargavi Kokkos::View<PetscInt *> i_nonlocal_d; 80c0c276a7Ssdargavi Kokkos::View<PetscInt *> j_nonlocal_d; 81c0c276a7Ssdargavi 82c0c276a7Ssdargavi // Create device memory 83c0c276a7Ssdargavi PetscCallCXX(a_local_d = Kokkos::View<PetscScalar *>("a_local_d", di[5])); 84c0c276a7Ssdargavi PetscCallCXX(i_local_d = Kokkos::View<PetscInt *>("i_local_d", m + 1)); 85c0c276a7Ssdargavi PetscCallCXX(j_local_d = Kokkos::View<PetscInt *>("j_local_d", di[5])); 86c0c276a7Ssdargavi 87c0c276a7Ssdargavi // Create non-local device memory 88c0c276a7Ssdargavi PetscCallCXX(a_nonlocal_d = Kokkos::View<PetscScalar *>("a_nonlocal_d", oi[5])); 89c0c276a7Ssdargavi PetscCallCXX(i_nonlocal_d = Kokkos::View<PetscInt *>("i_nonlocal_d", m + 1)); 90c0c276a7Ssdargavi PetscCallCXX(j_nonlocal_d = Kokkos::View<PetscInt *>("j_nonlocal_d", oi[5])); 91c0c276a7Ssdargavi 92c0c276a7Ssdargavi // ~~~~~~~~~~~~~~~~~~~~~ 93c0c276a7Ssdargavi // Could fill the aij on the device - we're just going to test 94c0c276a7Ssdargavi // by copying in the existing host values 95c0c276a7Ssdargavi // ~~~~~~~~~~~~~~~~~~~~~ 96c0c276a7Ssdargavi Kokkos::View<PetscScalar *, HostMirrorMemorySpace> a_local_h; 97c0c276a7Ssdargavi Kokkos::View<PetscInt *, HostMirrorMemorySpace> i_local_h; 98c0c276a7Ssdargavi Kokkos::View<PetscInt *, HostMirrorMemorySpace> j_local_h; 99c0c276a7Ssdargavi Kokkos::View<PetscScalar *, HostMirrorMemorySpace> a_nonlocal_h; 100c0c276a7Ssdargavi Kokkos::View<PetscInt *, HostMirrorMemorySpace> i_nonlocal_h; 101c0c276a7Ssdargavi Kokkos::View<PetscInt *, HostMirrorMemorySpace> j_nonlocal_h; 102c0c276a7Ssdargavi 103c0c276a7Ssdargavi PetscCallCXX(a_local_h = Kokkos::View<PetscScalar *, HostMirrorMemorySpace>(da, di[5])); 104c0c276a7Ssdargavi PetscCallCXX(i_local_h = Kokkos::View<PetscInt *, HostMirrorMemorySpace>(di, m + 1)); 105c0c276a7Ssdargavi PetscCallCXX(j_local_h = Kokkos::View<PetscInt *, HostMirrorMemorySpace>(dj, di[5])); 106c0c276a7Ssdargavi PetscCallCXX(a_nonlocal_h = Kokkos::View<PetscScalar *, HostMirrorMemorySpace>(oa, oi[5])); 107c0c276a7Ssdargavi PetscCallCXX(i_nonlocal_h = Kokkos::View<PetscInt *, HostMirrorMemorySpace>(oi, m + 1)); 108c0c276a7Ssdargavi PetscCallCXX(j_nonlocal_h = Kokkos::View<PetscInt *, HostMirrorMemorySpace>(oj, oi[5])); 109c0c276a7Ssdargavi 110c0c276a7Ssdargavi // Haven't specified an exec space so these will all be synchronous 111c0c276a7Ssdargavi // and finish without a need to call fence after 112c0c276a7Ssdargavi PetscCallCXX(Kokkos::deep_copy(a_local_d, a_local_h)); 113c0c276a7Ssdargavi PetscCallCXX(Kokkos::deep_copy(i_local_d, i_local_h)); 114c0c276a7Ssdargavi PetscCallCXX(Kokkos::deep_copy(j_local_d, j_local_h)); 115c0c276a7Ssdargavi PetscCallCXX(Kokkos::deep_copy(a_nonlocal_d, a_nonlocal_h)); 116c0c276a7Ssdargavi PetscCallCXX(Kokkos::deep_copy(i_nonlocal_d, i_nonlocal_h)); 117c0c276a7Ssdargavi PetscCallCXX(Kokkos::deep_copy(j_nonlocal_d, j_nonlocal_h)); 118c0c276a7Ssdargavi 119c0c276a7Ssdargavi // The garray passed in has to be on the host, but it can be created 120c0c276a7Ssdargavi // on device and copied to the host 121c0c276a7Ssdargavi // We're just going to copy the existing host values here 122c0c276a7Ssdargavi PetscCall(PetscMalloc1(n_ab, &garray_h)); 123*ac530a7eSPierre Jolivet for (int i = 0; i < n_ab; i++) garray_h[i] = garray[i]; 124c0c276a7Ssdargavi 125c0c276a7Ssdargavi // ~~~~~~~~~~~~~~~~~~~~~ 126c0c276a7Ssdargavi 127c0c276a7Ssdargavi // ~~~~~~~~~~~~~~~~~ 128c0c276a7Ssdargavi // Test MatCreateSeqAIJKokkosWithKokkosViews 129c0c276a7Ssdargavi // ~~~~~~~~~~~~~~~~~ 130c0c276a7Ssdargavi 131c0c276a7Ssdargavi // We can create our local diagonal block matrix directly on the device 132c0c276a7Ssdargavi PetscCall(MatCreateSeqAIJKokkosWithKokkosViews(PETSC_COMM_SELF, m, n, i_local_d, j_local_d, a_local_d, &output_mat_local)); 133c0c276a7Ssdargavi 134c0c276a7Ssdargavi // We can create our nonlocal diagonal block matrix directly on the device 135c0c276a7Ssdargavi PetscCall(MatCreateSeqAIJKokkosWithKokkosViews(PETSC_COMM_SELF, m, n_ab, i_nonlocal_d, j_nonlocal_d, a_nonlocal_d, &output_mat_nonlocal)); 136c0c276a7Ssdargavi 137c0c276a7Ssdargavi // Build our MPI matrix 138c0c276a7Ssdargavi // If we provide garray and output_mat_nonlocal with local indices and the compactified size 139c0c276a7Ssdargavi // almost nothing happens on the host 140c0c276a7Ssdargavi PetscCall(MatCreateMPIAIJWithSeqAIJ(PETSC_COMM_WORLD, M, N, output_mat_local, output_mat_nonlocal, garray_h, &B)); 141c0c276a7Ssdargavi 142c0c276a7Ssdargavi PetscCall(MatEqual(A, B, &equal)); 143c0c276a7Ssdargavi PetscCall(MatRestoreRowIJ(AA, 0, PETSC_FALSE, PETSC_FALSE, &nd, (const PetscInt **)&di, (const PetscInt **)&dj, &done)); 144c0c276a7Ssdargavi PetscCall(MatSeqAIJRestoreArray(AA, &da)); 145c0c276a7Ssdargavi PetscCall(MatRestoreRowIJ(AB, 0, PETSC_FALSE, PETSC_FALSE, &nd, (const PetscInt **)&oi, (const PetscInt **)&oj, &done)); 146c0c276a7Ssdargavi PetscCall(MatSeqAIJRestoreArray(AB, &oa)); 147c0c276a7Ssdargavi PetscCheck(equal, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Likely a bug in MatCreateSeqAIJKokkosWithKokkosViews()"); 148ecd797f4SJunchao Zhang } 149c0c276a7Ssdargavi 150ecd797f4SJunchao Zhang PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &BT)); 151c0c276a7Ssdargavi 152c0c276a7Ssdargavi /* Free spaces */ 153c0c276a7Ssdargavi PetscCall(PetscRandomDestroy(&rctx)); 154c0c276a7Ssdargavi PetscCall(MatDestroy(&A)); 155ecd797f4SJunchao Zhang PetscCall(MatDestroy(&B)); 156ecd797f4SJunchao Zhang PetscCall(MatDestroy(&BT)); 157c0c276a7Ssdargavi PetscCall(PetscFinalize()); 158c0c276a7Ssdargavi return 0; 159c0c276a7Ssdargavi } 160c0c276a7Ssdargavi 161c0c276a7Ssdargavi /*TEST 162c0c276a7Ssdargavi build: 163c0c276a7Ssdargavi requires: kokkos_kernels 164c0c276a7Ssdargavi 165c0c276a7Ssdargavi test: 166c0c276a7Ssdargavi nsize: 2 167c0c276a7Ssdargavi args: -mat_type aijkokkos 168c0c276a7Ssdargavi requires: kokkos_kernels 169c0c276a7Ssdargavi output_file: output/empty.out 170c0c276a7Ssdargavi 171c0c276a7Ssdargavi TEST*/ 172